* @return array Returns the header and the first item in dry run mode
* @throws \Friendica\Network\HTTPException\InternalServerErrorException
*/
- public static function import($xml, array $importer = [], array $contact = [])
+ public static function import(string $xml, array $importer = [], array $contact = []): array
{
$dryRun = empty($importer) && empty($contact);
@$doc->loadXML($xml);
$xpath = new DOMXPath($doc);
$xpath->registerNamespace('atom', ActivityNamespace::ATOM1);
- $xpath->registerNamespace('dc', "http://purl.org/dc/elements/1.1/");
- $xpath->registerNamespace('content', "http://purl.org/rss/1.0/modules/content/");
- $xpath->registerNamespace('rdf', "http://www.w3.org/1999/02/22-rdf-syntax-ns#");
- $xpath->registerNamespace('rss', "http://purl.org/rss/1.0/");
- $xpath->registerNamespace('media', "http://search.yahoo.com/mrss/");
+ $xpath->registerNamespace('dc', 'http://purl.org/dc/elements/1.1/');
+ $xpath->registerNamespace('content', 'http://purl.org/rss/1.0/modules/content/');
+ $xpath->registerNamespace('rdf', 'http://www.w3.org/1999/02/22-rdf-syntax-ns#');
+ $xpath->registerNamespace('rss', 'http://purl.org/rss/1.0/');
+ $xpath->registerNamespace('media', 'http://search.yahoo.com/mrss/');
$xpath->registerNamespace('poco', ActivityNamespace::POCO);
$author = [];
// Is it RDF?
if ($xpath->query('/rdf:RDF/rss:channel')->length > 0) {
- $author["author-link"] = XML::getFirstNodeValue($xpath, '/rdf:RDF/rss:channel/rss:link/text()');
- $author["author-name"] = XML::getFirstNodeValue($xpath, '/rdf:RDF/rss:channel/rss:title/text()');
+ $author['author-link'] = XML::getFirstNodeValue($xpath, '/rdf:RDF/rss:channel/rss:link/text()');
+ $author['author-name'] = XML::getFirstNodeValue($xpath, '/rdf:RDF/rss:channel/rss:title/text()');
- if (empty($author["author-name"])) {
- $author["author-name"] = XML::getFirstNodeValue($xpath, '/rdf:RDF/rss:channel/rss:description/text()');
+ if (empty($author['author-name'])) {
+ $author['author-name'] = XML::getFirstNodeValue($xpath, '/rdf:RDF/rss:channel/rss:description/text()');
}
$entries = $xpath->query('/rdf:RDF/rss:item');
}
$alternate = XML::getFirstAttributes($xpath, "atom:link[@rel='alternate']");
if (is_object($alternate)) {
foreach ($alternate as $attribute) {
- if ($attribute->name == "href") {
- $author["author-link"] = $attribute->textContent;
+ if ($attribute->name == 'href') {
+ $author['author-link'] = $attribute->textContent;
}
}
}
- if (empty($author["author-link"])) {
+ if (empty($author['author-link'])) {
$self = XML::getFirstAttributes($xpath, "atom:link[@rel='self']");
if (is_object($self)) {
foreach ($self as $attribute) {
- if ($attribute->name == "href") {
- $author["author-link"] = $attribute->textContent;
+ if ($attribute->name == 'href') {
+ $author['author-link'] = $attribute->textContent;
}
}
}
}
- if (empty($author["author-link"])) {
- $author["author-link"] = XML::getFirstNodeValue($xpath, '/atom:feed/atom:id/text()');
+ if (empty($author['author-link'])) {
+ $author['author-link'] = XML::getFirstNodeValue($xpath, '/atom:feed/atom:id/text()');
}
- $author["author-avatar"] = XML::getFirstNodeValue($xpath, '/atom:feed/atom:logo/text()');
+ $author['author-avatar'] = XML::getFirstNodeValue($xpath, '/atom:feed/atom:logo/text()');
- $author["author-name"] = XML::getFirstNodeValue($xpath, '/atom:feed/atom:title/text()');
+ $author['author-name'] = XML::getFirstNodeValue($xpath, '/atom:feed/atom:title/text()');
- if (empty($author["author-name"])) {
- $author["author-name"] = XML::getFirstNodeValue($xpath, '/atom:feed/atom:subtitle/text()');
+ if (empty($author['author-name'])) {
+ $author['author-name'] = XML::getFirstNodeValue($xpath, '/atom:feed/atom:subtitle/text()');
}
- if (empty($author["author-name"])) {
- $author["author-name"] = XML::getFirstNodeValue($xpath, '/atom:feed/atom:author/atom:name/text()');
+ if (empty($author['author-name'])) {
+ $author['author-name'] = XML::getFirstNodeValue($xpath, '/atom:feed/atom:author/atom:name/text()');
}
$value = XML::getFirstNodeValue($xpath, 'atom:author/poco:displayName/text()');
- if ($value != "") {
- $author["author-name"] = $value;
+ if ($value != '') {
+ $author['author-name'] = $value;
}
if ($dryRun) {
- $author["author-id"] = XML::getFirstNodeValue($xpath, '/atom:feed/atom:author/atom:id/text()');
+ $author['author-id'] = XML::getFirstNodeValue($xpath, '/atom:feed/atom:author/atom:id/text()');
// See https://tools.ietf.org/html/rfc4287#section-3.2.2
$value = XML::getFirstNodeValue($xpath, 'atom:author/atom:uri/text()');
- if ($value != "") {
- $author["author-link"] = $value;
+ if ($value != '') {
+ $author['author-link'] = $value;
}
$value = XML::getFirstNodeValue($xpath, 'atom:author/poco:preferredUsername/text()');
- if ($value != "") {
- $author["author-nick"] = $value;
+ if ($value != '') {
+ $author['author-nick'] = $value;
}
$value = XML::getFirstNodeValue($xpath, 'atom:author/poco:address/poco:formatted/text()');
- if ($value != "") {
- $author["author-location"] = $value;
+ if ($value != '') {
+ $author['author-location'] = $value;
}
$value = XML::getFirstNodeValue($xpath, 'atom:author/poco:note/text()');
- if ($value != "") {
- $author["author-about"] = $value;
+ if ($value != '') {
+ $author['author-about'] = $value;
}
$avatar = XML::getFirstAttributes($xpath, "atom:author/atom:link[@rel='avatar']");
if (is_object($avatar)) {
foreach ($avatar as $attribute) {
- if ($attribute->name == "href") {
- $author["author-avatar"] = $attribute->textContent;
+ if ($attribute->name == 'href') {
+ $author['author-avatar'] = $attribute->textContent;
}
}
}
}
- $author["edited"] = $author["created"] = XML::getFirstNodeValue($xpath, '/atom:feed/atom:updated/text()');
+ $author['edited'] = $author['created'] = XML::getFirstNodeValue($xpath, '/atom:feed/atom:updated/text()');
- $author["app"] = XML::getFirstNodeValue($xpath, '/atom:feed/atom:generator/text()');
+ $author['app'] = XML::getFirstNodeValue($xpath, '/atom:feed/atom:generator/text()');
$entries = $xpath->query('/atom:feed/atom:entry');
}
// Is it RSS?
if ($xpath->query('/rss/channel')->length > 0) {
- $author["author-link"] = XML::getFirstNodeValue($xpath, '/rss/channel/link/text()');
+ $author['author-link'] = XML::getFirstNodeValue($xpath, '/rss/channel/link/text()');
- $author["author-name"] = XML::getFirstNodeValue($xpath, '/rss/channel/title/text()');
+ $author['author-name'] = XML::getFirstNodeValue($xpath, '/rss/channel/title/text()');
- if (empty($author["author-name"])) {
- $author["author-name"] = XML::getFirstNodeValue($xpath, '/rss/channel/copyright/text()');
+ if (empty($author['author-name'])) {
+ $author['author-name'] = XML::getFirstNodeValue($xpath, '/rss/channel/copyright/text()');
}
- if (empty($author["author-name"])) {
- $author["author-name"] = XML::getFirstNodeValue($xpath, '/rss/channel/description/text()');
+ if (empty($author['author-name'])) {
+ $author['author-name'] = XML::getFirstNodeValue($xpath, '/rss/channel/description/text()');
}
- $author["author-avatar"] = XML::getFirstNodeValue($xpath, '/rss/channel/image/url/text()');
+ $author['author-avatar'] = XML::getFirstNodeValue($xpath, '/rss/channel/image/url/text()');
- if (empty($author["author-avatar"])) {
- $avatar = XML::getFirstAttributes($xpath, "/rss/channel/itunes:image");
+ if (empty($author['author-avatar'])) {
+ $avatar = XML::getFirstAttributes($xpath, '/rss/channel/itunes:image');
if (is_object($avatar)) {
foreach ($avatar as $attribute) {
- if ($attribute->name == "href") {
- $author["author-avatar"] = $attribute->textContent;
+ if ($attribute->name == 'href') {
+ $author['author-avatar'] = $attribute->textContent;
}
}
}
}
- $author["author-about"] = HTML::toBBCode(XML::getFirstNodeValue($xpath, '/rss/channel/description/text()'), $basepath);
+ $author['author-about'] = HTML::toBBCode(XML::getFirstNodeValue($xpath, '/rss/channel/description/text()'), $basepath);
- if (empty($author["author-about"])) {
- $author["author-about"] = XML::getFirstNodeValue($xpath, '/rss/channel/itunes:summary/text()');
+ if (empty($author['author-about'])) {
+ $author['author-about'] = XML::getFirstNodeValue($xpath, '/rss/channel/itunes:summary/text()');
}
- $author["edited"] = $author["created"] = XML::getFirstNodeValue($xpath, '/rss/channel/pubDate/text()');
+ $author['edited'] = $author['created'] = XML::getFirstNodeValue($xpath, '/rss/channel/pubDate/text()');
- $author["app"] = XML::getFirstNodeValue($xpath, '/rss/channel/generator/text()');
+ $author['app'] = XML::getFirstNodeValue($xpath, '/rss/channel/generator/text()');
$entries = $xpath->query('/rss/channel/item');
}
if (!$dryRun) {
- $author["author-link"] = $contact["url"];
+ $author['author-link'] = $contact['url'];
- if (empty($author["author-name"])) {
- $author["author-name"] = $contact["name"];
+ if (empty($author['author-name'])) {
+ $author['author-name'] = $contact['name'];
}
- $author["author-avatar"] = $contact["thumb"];
+ $author['author-avatar'] = $contact['thumb'];
- $author["owner-link"] = $contact["url"];
- $author["owner-name"] = $contact["name"];
- $author["owner-avatar"] = $contact["thumb"];
+ $author['owner-link'] = $contact['url'];
+ $author['owner-name'] = $contact['name'];
+ $author['owner-avatar'] = $contact['thumb'];
}
$header = [];
- $header["uid"] = $importer["uid"] ?? 0;
- $header["network"] = Protocol::FEED;
- $header["wall"] = 0;
- $header["origin"] = 0;
- $header["gravity"] = GRAVITY_PARENT;
- $header["private"] = Item::PUBLIC;
- $header["verb"] = Activity::POST;
- $header["object-type"] = Activity\ObjectType::NOTE;
- $header["post-type"] = Item::PT_ARTICLE;
-
- $header["contact-id"] = $contact["id"] ?? 0;
+ $header['uid'] = $importer['uid'] ?? 0;
+ $header['network'] = Protocol::FEED;
+ $header['wall'] = 0;
+ $header['origin'] = 0;
+ $header['gravity'] = GRAVITY_PARENT;
+ $header['private'] = Item::PUBLIC;
+ $header['verb'] = Activity::POST;
+ $header['object-type'] = Activity\ObjectType::NOTE;
+ $header['post-type'] = Item::PT_ARTICLE;
+
+ $header['contact-id'] = $contact['id'] ?? 0;
if (!is_object($entries)) {
Logger::info("There are no entries in this feed.");
$alternate = XML::getFirstAttributes($xpath, "atom:link[@rel='alternate']", $entry);
if (!is_object($alternate)) {
- $alternate = XML::getFirstAttributes($xpath, "atom:link", $entry);
+ $alternate = XML::getFirstAttributes($xpath, 'atom:link', $entry);
}
if (is_object($alternate)) {
foreach ($alternate as $attribute) {
- if ($attribute->name == "href") {
- $item["plink"] = $attribute->textContent;
+ if ($attribute->name == 'href') {
+ $item['plink'] = $attribute->textContent;
}
}
}
- if (empty($item["plink"])) {
- $item["plink"] = XML::getFirstNodeValue($xpath, 'link/text()', $entry);
+ if (empty($item['plink'])) {
+ $item['plink'] = XML::getFirstNodeValue($xpath, 'link/text()', $entry);
}
- if (empty($item["plink"])) {
- $item["plink"] = XML::getFirstNodeValue($xpath, 'rss:link/text()', $entry);
+ if (empty($item['plink'])) {
+ $item['plink'] = XML::getFirstNodeValue($xpath, 'rss:link/text()', $entry);
}
// Add the base path if missing
- $item["plink"] = Network::addBasePath($item["plink"], $basepath);
+ $item['plink'] = Network::addBasePath($item['plink'], $basepath);
- $item["uri"] = XML::getFirstNodeValue($xpath, 'atom:id/text()', $entry);
+ $item['uri'] = XML::getFirstNodeValue($xpath, 'atom:id/text()', $entry);
$guid = XML::getFirstNodeValue($xpath, 'guid/text()', $entry);
if (!empty($guid)) {
- $item["uri"] = $guid;
+ $item['uri'] = $guid;
// Don't use the GUID value directly but instead use it as a basis for the GUID
- $item["guid"] = Item::guidFromUri($guid, parse_url($guid, PHP_URL_HOST) ?? parse_url($item["plink"], PHP_URL_HOST));
+ $item['guid'] = Item::guidFromUri($guid, parse_url($guid, PHP_URL_HOST) ?? parse_url($item['plink'], PHP_URL_HOST));
}
- if (empty($item["uri"])) {
- $item["uri"] = $item["plink"];
+ if (empty($item['uri'])) {
+ $item['uri'] = $item['plink'];
}
- $orig_plink = $item["plink"];
+ $orig_plink = $item['plink'];
try {
- $item["plink"] = DI::httpClient()->finalUrl($item["plink"]);
+ $item['plink'] = DI::httpClient()->finalUrl($item['plink']);
} catch (TransferException $exception) {
- Logger::notice('Item URL couldn\'t get expanded', ['url' => $item["plink"], 'exception' => $exception]);
+ Logger::notice('Item URL couldn\'t get expanded', ['url' => $item['plink'], 'exception' => $exception]);
}
- $item["title"] = XML::getFirstNodeValue($xpath, 'atom:title/text()', $entry);
+ $item['title'] = XML::getFirstNodeValue($xpath, 'atom:title/text()', $entry);
- if (empty($item["title"])) {
- $item["title"] = XML::getFirstNodeValue($xpath, 'title/text()', $entry);
+ if (empty($item['title'])) {
+ $item['title'] = XML::getFirstNodeValue($xpath, 'title/text()', $entry);
}
- if (empty($item["title"])) {
- $item["title"] = XML::getFirstNodeValue($xpath, 'rss:title/text()', $entry);
+ if (empty($item['title'])) {
+ $item['title'] = XML::getFirstNodeValue($xpath, 'rss:title/text()', $entry);
}
- if (empty($item["title"])) {
- $item["title"] = XML::getFirstNodeValue($xpath, 'itunes:title/text()', $entry);
+ if (empty($item['title'])) {
+ $item['title'] = XML::getFirstNodeValue($xpath, 'itunes:title/text()', $entry);
}
- $item["title"] = html_entity_decode($item["title"], ENT_QUOTES, 'UTF-8');
+ $item['title'] = html_entity_decode($item['title'], ENT_QUOTES, 'UTF-8');
$published = XML::getFirstNodeValue($xpath, 'atom:published/text()', $entry);
$published = $updated;
}
- if ($published != "") {
- $item["created"] = $published;
+ if ($published != '') {
+ $item['created'] = $published;
}
- if ($updated != "") {
- $item["edited"] = $updated;
+ if ($updated != '') {
+ $item['edited'] = $updated;
}
if (!$dryRun) {
$condition = ["`uid` = ? AND `uri` = ? AND `network` IN (?, ?)",
- $importer["uid"], $item["uri"], Protocol::FEED, Protocol::DFRN];
+ $importer['uid'], $item['uri'], Protocol::FEED, Protocol::DFRN];
$previous = Post::selectFirst(['id', 'created'], $condition);
if (DBA::isResult($previous)) {
// Use the creation date when the post had been stored. It can happen this date changes in the feed.
$creator = XML::getFirstNodeValue($xpath, 'dc:creator/text()', $entry);
}
- if ($creator != "") {
- $item["author-name"] = $creator;
+ if ($creator != '') {
+ $item['author-name'] = $creator;
}
$creator = XML::getFirstNodeValue($xpath, 'dc:creator/text()', $entry);
- if ($creator != "") {
- $item["author-name"] = $creator;
+ if ($creator != '') {
+ $item['author-name'] = $creator;
}
/// @TODO ?
$enclosures = $xpath->query("enclosure|atom:link[@rel='enclosure']", $entry);
foreach ($enclosures as $enclosure) {
- $href = "";
+ $href = '';
$length = null;
$type = null;
foreach ($enclosure->attributes as $attribute) {
- if (in_array($attribute->name, ["url", "href"])) {
+ if (in_array($attribute->name, ['url', 'href'])) {
$href = $attribute->textContent;
- } elseif ($attribute->name == "length") {
+ } elseif ($attribute->name == 'length') {
$length = (int)$attribute->textContent;
- } elseif ($attribute->name == "type") {
+ } elseif ($attribute->name == 'type') {
$type = $attribute->textContent;
}
}
}
$taglist = [];
- $categories = $xpath->query("category", $entry);
+ $categories = $xpath->query('category', $entry);
foreach ($categories as $category) {
$taglist[] = $category->nodeValue;
}
// remove the content of the title if it is identically to the body
// This helps with auto generated titles e.g. from tumblr
- if (self::titleIsBody($item["title"], $body)) {
- $item["title"] = "";
+ if (self::titleIsBody($item['title'], $body)) {
+ $item['title'] = '';
}
- $item["body"] = HTML::toBBCode($body, $basepath);
+ $item['body'] = HTML::toBBCode($body, $basepath);
// Remove tracking pixels
- $item["body"] = preg_replace("/\[img=1x1\]([^\[\]]*)\[\/img\]/Usi", '', $item["body"]);
+ $item['body'] = preg_replace("/\[img=1x1\]([^\[\]]*)\[\/img\]/Usi", '', $item['body']);
- if (($item["body"] == '') && ($item["title"] != '')) {
- $item["body"] = $item["title"];
- $item["title"] = '';
+ if (($item['body'] == '') && ($item['title'] != '')) {
+ $item['body'] = $item['title'];
+ $item['title'] = '';
}
if ($dryRun) {
}
$preview = '';
- if (!empty($contact["fetch_further_information"]) && ($contact["fetch_further_information"] < 3)) {
+ if (!empty($contact['fetch_further_information']) && ($contact['fetch_further_information'] < 3)) {
// Handle enclosures and treat them as preview picture
foreach ($attachments as $attachment) {
- if ($attachment["mimetype"] == "image/jpeg") {
- $preview = $attachment["url"];
+ if ($attachment['mimetype'] == 'image/jpeg') {
+ $preview = $attachment['url'];
}
}
// Remove a possible link to the item itself
- $item["body"] = str_replace($item["plink"], '', $item["body"]);
- $item["body"] = trim(preg_replace('/\[url\=\](\w+.*?)\[\/url\]/i', '', $item["body"]));
+ $item['body'] = str_replace($item['plink'], '', $item['body']);
+ $item['body'] = trim(preg_replace('/\[url\=\](\w+.*?)\[\/url\]/i', '', $item['body']));
// Replace the content when the title is longer than the body
- $replace = (strlen($item["title"]) > strlen($item["body"]));
+ $replace = (strlen($item['title']) > strlen($item['body']));
// Replace it, when there is an image in the body
- if (strstr($item["body"], '[/img]')) {
+ if (strstr($item['body'], '[/img]')) {
$replace = true;
}
// Replace it, when there is a link in the body
- if (strstr($item["body"], '[/url]')) {
+ if (strstr($item['body'], '[/url]')) {
$replace = true;
}
- $saved_body = $item["body"];
- $saved_title = $item["title"];
+ $saved_body = $item['body'];
+ $saved_title = $item['title'];
if ($replace) {
- $item["body"] = trim($item["title"]);
+ $item['body'] = trim($item['title']);
}
$data = ParseUrl::getSiteinfoCached($item['plink']);
}
}
- $data = PageInfo::queryUrl($item["plink"], false, $preview, ($contact["fetch_further_information"] == 2), $contact["ffi_keyword_denylist"] ?? '');
+ $data = PageInfo::queryUrl($item['plink'], false, $preview, ($contact['fetch_further_information'] == 2), $contact['ffi_keyword_denylist'] ?? '');
if (!empty($data)) {
// Take the data that was provided by the feed if the query is empty
if (($data['type'] == 'link') && empty($data['title']) && empty($data['text'])) {
$data['title'] = $saved_title;
- $item["body"] = $saved_body;
+ $item['body'] = $saved_body;
}
$data_text = strip_tags(trim($data['text'] ?? ''));
}
// We always strip the title since it will be added in the page information
- $item["title"] = "";
- $item["body"] = $item["body"] . "\n" . PageInfo::getFooterFromData($data, false);
- $taglist = $contact["fetch_further_information"] == 2 ? PageInfo::getTagsFromUrl($item["plink"], $preview, $contact["ffi_keyword_denylist"] ?? '') : [];
- $item["object-type"] = Activity\ObjectType::BOOKMARK;
+ $item['title'] = '';
+ $item['body'] = $item['body'] . "\n" . PageInfo::getFooterFromData($data, false);
+ $taglist = $contact['fetch_further_information'] == 2 ? PageInfo::getTagsFromUrl($item['plink'], $preview, $contact['ffi_keyword_denylist'] ?? '') : [];
+ $item['object-type'] = Activity\ObjectType::BOOKMARK;
$attachments = [];
foreach (['audio', 'video'] as $elementname) {
}
} else {
if (!empty($summary)) {
- $item["body"] = '[abstract]' . HTML::toBBCode($summary, $basepath) . "[/abstract]\n" . $item["body"];
+ $item['body'] = '[abstract]' . HTML::toBBCode($summary, $basepath) . "[/abstract]\n" . $item['body'];
}
- if (!empty($contact["fetch_further_information"]) && ($contact["fetch_further_information"] == 3)) {
+ if (!empty($contact['fetch_further_information']) && ($contact['fetch_further_information'] == 3)) {
if (empty($taglist)) {
- $taglist = PageInfo::getTagsFromUrl($item["plink"], $preview, $contact["ffi_keyword_denylist"] ?? '');
+ $taglist = PageInfo::getTagsFromUrl($item['plink'], $preview, $contact['ffi_keyword_denylist'] ?? '');
}
- $item["body"] .= "\n" . self::tagToString($taglist);
+ $item['body'] .= "\n" . self::tagToString($taglist);
} else {
$taglist = [];
}
// Add the link to the original feed entry if not present in feed
- if (($item['plink'] != '') && !strstr($item["body"], $item['plink']) && !in_array($item['plink'], array_column($attachments, 'url'))) {
- $item["body"] .= "[hr][url]" . $item['plink'] . "[/url]";
+ if (($item['plink'] != '') && !strstr($item['body'], $item['plink']) && !in_array($item['plink'], array_column($attachments, 'url'))) {
+ $item['body'] .= '[hr][url]' . $item['plink'] . '[/url]';
}
}
}
$condition = ['uid' => $item['uid'], 'uri' => $item['uri']];
- if (!Post::exists($condition) && !Post\Delayed::exists($item["uri"], $item['uid'])) {
+ if (!Post::exists($condition) && !Post\Delayed::exists($item['uri'], $item['uid'])) {
if (!$notify) {
Post\Delayed::publish($item, $notify, $taglist, $attachments);
} else {
'taglist' => $taglist, 'attachments' => $attachments];
}
} else {
- Logger::info('Post already created or exists in the delayed posts queue', ['uid' => $item['uid'], 'uri' => $item["uri"]]);
+ Logger::info('Post already created or exists in the delayed posts queue', ['uid' => $item['uid'], 'uri' => $item['uri']]);
}
}
self::adjustPollFrequency($contact, $creation_dates);
}
- return ["header" => $author, "items" => $items];
+ return ['header' => $author, 'items' => $items];
}
/**