From 8786bcdf0a615e95989af5eedd3fce4ad153c503 Mon Sep 17 00:00:00 2001 From: Michael Date: Sat, 8 Apr 2017 08:12:14 +0000 Subject: [PATCH] Avoid duplicates with feeds and "remote self" --- include/feed.php | 9 +++++++-- include/items.php | 10 ++++++---- 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/include/feed.php b/include/feed.php index 2959933703..2117676dba 100644 --- a/include/feed.php +++ b/include/feed.php @@ -200,7 +200,6 @@ function feed_import($xml,$importer,&$contact, &$hub, $simulate = false) { if ($item["plink"] == "") { $item["plink"] = $xpath->evaluate('rss:link/text()', $entry)->item(0)->nodeValue; } - $item["plink"] = original_url($item["plink"]); $item["uri"] = $xpath->evaluate('atom:id/text()', $entry)->item(0)->nodeValue; @@ -210,12 +209,17 @@ function feed_import($xml,$importer,&$contact, &$hub, $simulate = false) { if ($item["uri"] == "") { $item["uri"] = $item["plink"]; } + + $orig_plink = $item["plink"]; + + $item["plink"] = original_url($item["plink"]); + $item["parent-uri"] = $item["uri"]; if (!$simulate) { $r = q("SELECT `id` FROM `item` WHERE `uid` = %d AND `uri` = '%s' AND `network` IN ('%s', '%s')", intval($importer["uid"]), dbesc($item["uri"]), dbesc(NETWORK_FEED), dbesc(NETWORK_DFRN)); - if ($r) { + if (dbm::is_result($r)) { logger("Item with uri ".$item["uri"]." for user ".$importer["uid"]." already existed under id ".$r[0]["id"], LOGGER_DEBUG); continue; } @@ -340,6 +344,7 @@ function feed_import($xml,$importer,&$contact, &$hub, $simulate = false) { // Distributed items should have a well formatted URI. // Additionally we have to avoid conflicts with identical URI between imported feeds and these items. if ($notify) { + $item['guid'] = uri_to_guid($orig_plink, $a->get_hostname()); unset($item['uri']); unset($item['parent-uri']); } diff --git a/include/items.php b/include/items.php index c2b3d7d1f5..f3885d21f9 100644 --- a/include/items.php +++ b/include/items.php @@ -407,10 +407,12 @@ function item_store($arr,$force_parent = false, $notify = false, $dontcache = fa // We have to avoid duplicates. So we create the GUID in form of a hash of the plink or uri. // In difference to the call to "uri_to_guid" several lines below we add the hash of our own host. // This is done because our host is the original creator of the post. - if (isset($arr['plink'])) { - $arr['guid'] = uri_to_guid($arr['plink'], $a->get_hostname()); - } elseif (isset($arr['uri'])) { - $arr['guid'] = uri_to_guid($arr['uri'], $a->get_hostname()); + if (!isset($arr['guid'])) { + if (isset($arr['plink'])) { + $arr['guid'] = uri_to_guid($arr['plink'], $a->get_hostname()); + } elseif (isset($arr['uri'])) { + $arr['guid'] = uri_to_guid($arr['uri'], $a->get_hostname()); + } } } -- 2.39.5