From f2eec66240f674ac56ef57350606aab6cfda8aa8 Mon Sep 17 00:00:00 2001
From: Michael <heluecht@pirati.ca>
Date: Wed, 3 Aug 2022 03:38:03 +0000
Subject: [PATCH] Hopefully fixes loops during message processing

---
 src/Model/Contact.php                  |   2 +-
 src/Protocol/ActivityPub/Processor.php | 136 +++++++++++++++----------
 src/Protocol/ActivityPub/Queue.php     |  44 ++++++--
 src/Protocol/ActivityPub/Receiver.php  |  28 +++--
 src/Worker/Cron.php                    |   4 +-
 static/defaults.config.php             |   4 +
 6 files changed, 142 insertions(+), 76 deletions(-)

diff --git a/src/Model/Contact.php b/src/Model/Contact.php
index 2c1ca2d4ce..c873c3941e 100644
--- a/src/Model/Contact.php
+++ b/src/Model/Contact.php
@@ -2449,7 +2449,7 @@ class Contact
 
 		$new_pubkey = $ret['pubkey'] ?? '';
 
-		if ($uid == 0) {
+		if ($uid == 0 && DI::config()->get('system', 'fetch_featured_posts')) {
 			if ($ret['network'] == Protocol::ACTIVITYPUB) {
 				$apcontact = APContact::getByURL($ret['url'], false);
 				if (!empty($apcontact['featured'])) {
diff --git a/src/Protocol/ActivityPub/Processor.php b/src/Protocol/ActivityPub/Processor.php
index 653649aa2d..a409fec115 100644
--- a/src/Protocol/ActivityPub/Processor.php
+++ b/src/Protocol/ActivityPub/Processor.php
@@ -306,58 +306,13 @@ class Processor
 		}
 
 		if (empty($activity['directmessage']) && ($activity['id'] != $activity['reply-to-id']) && !Post::exists(['uri' => $activity['reply-to-id']])) {
-			if (self::hasJustBeenFetched($activity['reply-to-id'])) {
-				Logger::notice('We just have tried to fetch this activity. We don\'t try it again.', ['parent' => $activity['reply-to-id']]);
-				$fetch_by_worker = false;
-				if (empty($conversation)) {
-					return [];
-				}
-			} else {
-				$recursion_depth = $activity['recursion-depth'] ?? 0;
-				Logger::notice('Parent not found. Try to refetch it.', ['parent' => $activity['reply-to-id'], 'recursion-depth' => $recursion_depth]);
-				if ($recursion_depth < DI::config()->get('system', 'max_recursion_depth')) {
-					$result = self::fetchMissingActivity($activity['reply-to-id'], $activity, '', Receiver::COMPLETION_AUTO);
-					$fetch_by_worker = empty($result);
-					if (empty($result) && self::isActivityGone($activity['reply-to-id'])) {
-						if (!empty($activity['entry-id'])) {
-							Queue::deleteById($activity['entry-id']);
-						}
-						if (empty($conversation)) {
-							return [];
-						}
-					}
-				} else {
-					Logger::notice('Recursion level is too high.', ['parent' => $activity['reply-to-id'], 'recursion-depth' => $recursion_depth]);
-					$fetch_by_worker = true;
-				}
-			}
-
-			if ($fetch_by_worker && Queue::hasWorker($activity)) {
-				Logger::notice('There is already a worker task to fetch the post.', ['id' => $activity['id'], 'parent' => $activity['reply-to-id']]);
-				$fetch_by_worker = false;
-				if (empty($conversation)) {
-					return [];
-				}
-			}
-
-			if ($fetch_by_worker && DI::config()->get('system', 'fetch_by_worker')) {
-				Logger::notice('Fetching is done by worker.', ['parent' => $activity['reply-to-id'], 'recursion-depth' => $recursion_depth]);
-				$activity['recursion-depth'] = 0;
-				if (!Fetch::hasWorker($activity['reply-to-id'])) {
-					Fetch::add($activity['reply-to-id']);
-					$wid = Worker::add(PRIORITY_HIGH, 'FetchMissingActivity', $activity['reply-to-id'], $activity, '', Receiver::COMPLETION_AUTO);
-					Fetch::setWorkerId($activity['reply-to-id'], $wid);
-					Queue::setWorkerId($activity, $wid);
-				} else {
-					Logger::debug('Activity will already be fetched via a worker.', ['url' => $activity['reply-to-id']]);
-				}
-				if (empty($conversation)) {
-					return [];
-				}
-			} elseif (!empty($result)) {
+			$result = self::fetchParent($activity);
+			if (!empty($result)) {
 				if (($item['thr-parent'] != $result) && Post::exists(['uri' => $result])) {
 					$item['thr-parent'] = $result;
 				}
+			} elseif (empty($conversation)) {
+				return [];
 			}
 		}
 
@@ -482,6 +437,77 @@ class Processor
 		return $item;
 	}
 
+	/**
+	 * Fetch and process parent posts for the given activity
+	 *
+	 * @param array $activity
+	 *
+	 * @return string
+	 */
+	private static function fetchParent(array $activity): string
+	{
+		if (self::hasJustBeenFetched($activity['reply-to-id'])) {
+			Logger::notice('We just have tried to fetch this activity. We don\'t try it again.', ['parent' => $activity['reply-to-id']]);
+			return '';
+		} 
+
+		$recursion_depth = $activity['recursion-depth'] ?? 0;
+
+		if ($recursion_depth < DI::config()->get('system', 'max_recursion_depth')) {
+			Logger::notice('Parent not found. Try to refetch it.', ['parent' => $activity['reply-to-id'], 'recursion-depth' => $recursion_depth]);
+			$result = self::fetchMissingActivity($activity['reply-to-id'], $activity, '', Receiver::COMPLETION_AUTO);
+			if (empty($result) && self::isActivityGone($activity['reply-to-id'])) {
+				Logger::notice('The activity is gone, the queue entry will be deleted', ['parent' => $activity['reply-to-id']]);
+				if (!empty($activity['entry-id'])) {
+					Queue::deleteById($activity['entry-id']);
+				}
+				return '';
+			} elseif (!empty($result)) {
+				$exists = Post::exists(['uri' => [$result, $activity['reply-to-id']]]);
+				if ($exists) {
+					Logger::notice('The activity has been fetched and created.', ['parent' => $result]);
+					return $result;
+				} elseif (DI::config()->get('system', 'fetch_by_worker') || DI::config()->get('system', 'decoupled_receiver')) {
+					Logger::notice('The activity has been fetched and will hopefully be created later.', ['parent' => $result]);
+				} else {
+					Logger::notice('The activity exists but has not been created, the queue entry will be deleted.', ['parent' => $result]);
+					if (!empty($activity['entry-id'])) {
+						Queue::deleteById($activity['entry-id']);
+					}
+				}
+				return '';
+			}
+			if (empty($result) && !DI::config()->get('system', 'fetch_by_worker')) {
+				return '';
+			}
+		} elseif (self::isActivityGone($activity['reply-to-id'])) {
+			Logger::notice('The activity is gone. We will not spawn a worker. The queue entry will be deleted', ['parent' => $activity['reply-to-id']]);
+			if (!empty($activity['entry-id'])) {
+				Queue::deleteById($activity['entry-id']);
+			}
+			return '';
+		} else {
+			Logger::notice('Recursion level is too high.', ['parent' => $activity['reply-to-id'], 'recursion-depth' => $recursion_depth]);
+		}
+
+		if (Queue::hasWorker($activity['worker-id'] ?? 0)) {
+			Logger::notice('There is already a worker task to fetch the post.', ['id' => $activity['id'], 'parent' => $activity['reply-to-id']]);
+			return '';
+		}
+
+		if (!Fetch::hasWorker($activity['reply-to-id'])) {
+			Logger::notice('Fetching is done by worker.', ['parent' => $activity['reply-to-id'], 'recursion-depth' => $recursion_depth]);
+			Fetch::add($activity['reply-to-id']);
+			$activity['recursion-depth'] = 0;
+			$wid = Worker::add(PRIORITY_HIGH, 'FetchMissingActivity', $activity['reply-to-id'], $activity, '', Receiver::COMPLETION_AUTO);
+			Fetch::setWorkerId($activity['reply-to-id'], $wid);
+		} else {
+			Logger::debug('Activity will already be fetched via a worker.', ['url' => $activity['reply-to-id']]);
+		}
+
+		return '';
+	}
+
 	/**
 	 * Check if a given activity has recently been fetched
 	 *
@@ -1022,7 +1048,7 @@ class Processor
 		Queue::remove($activity);
 
 		if ($success && Queue::hasChildren($item['uri'])) {
-			Worker::add(PRIORITY_HIGH, 'ProcessReplyByUri', $item['uri']);
+			Queue::processReplyByUri($item['uri']);
 		}
 
 		// Store send a follow request for every reshare - but only when the item had been stored
@@ -1366,9 +1392,13 @@ class Processor
 			return '';
 		}
 
-		ActivityPub\Receiver::processActivity($ldactivity, json_encode($activity), $uid, true, false, $signer);
-
-		Logger::notice('Activity had been fetched and processed.', ['url' => $url, 'object' => $activity['id']]);
+		if (($completion == Receiver::COMPLETION_RELAY) && Queue::exists($url, 'as:Create')) {
+			Logger::notice('Activity has already been queued.', ['url' => $url, 'object' => $activity['id']]);
+		} elseif (ActivityPub\Receiver::processActivity($ldactivity, json_encode($activity), $uid, true, false, $signer, '', $completion)) {
+			Logger::notice('Activity had been fetched and processed.', ['url' => $url, 'entry' => $child['entry-id'] ?? 0, 'completion' => $completion, 'object' => $activity['id']]);
+		} else {
+			Logger::notice('Activity had been fetched and will be processed later.', ['url' => $url, 'entry' => $child['entry-id'] ?? 0, 'completion' => $completion, 'object' => $activity['id']]);
+		}
 
 		return $activity['id'];
 	}
diff --git a/src/Protocol/ActivityPub/Queue.php b/src/Protocol/ActivityPub/Queue.php
index 321bd7f432..87d491bd99 100644
--- a/src/Protocol/ActivityPub/Queue.php
+++ b/src/Protocol/ActivityPub/Queue.php
@@ -84,6 +84,18 @@ class Queue
 		return $activity;
 	}
 
+	/**
+	 * Checks if an entryy for a given url and type already exists
+	 *
+	 * @param string $url
+	 * @param string $type
+	 * @return boolean
+	 */
+	public static function exists(string $url, string $type): bool
+	{
+		return DBA::exists('inbox-entry', ['type' => $type, 'object-id' => $url]);
+	}
+
 	/**
 	 * Remove activity from the queue
 	 *
@@ -132,30 +144,31 @@ class Queue
 	/**
 	 * Set the worker id for the queue entry
 	 *
-	 * @param array $activity
-	 * @param int   $wid
+	 * @param int $entryid
+	 * @param int $wid
 	 * @return void
 	 */
-	public static function setWorkerId(array $activity, int $wid)
+	public static function setWorkerId(int $entryid, int $wid)
 	{
-		if (empty($activity['entry-id']) || empty($wid)) {
+		if (empty($entryid) || empty($wid)) {
 			return;
 		}
-		DBA::update('inbox-entry', ['wid' => $wid], ['id' => $activity['entry-id']]);
+		DBA::update('inbox-entry', ['wid' => $wid], ['id' => $entryid]);
 	}
 
 	/**
 	 * Check if there is an assigned worker task
 	 *
-	 * @param array $activity
+	 * @param int $wid
+	 *
 	 * @return bool
 	 */
-	public static function hasWorker(array $activity = []): bool
+	public static function hasWorker(int $wid): bool
 	{
-		if (empty($activity['worker-id'])) {
+		if (empty($wid)) {
 			return false;
 		}
-		return DBA::exists('workerqueue', ['id' => $activity['worker-id'], 'done' => false]);
+		return DBA::exists('workerqueue', ['id' => $wid, 'done' => false]);
 	}
 
 	/**
@@ -172,6 +185,18 @@ class Queue
 			return false;
 		}
 
+		if (!empty($entry['wid'])) {
+			$worker = DI::app()->getQueue();
+			$wid = $worker['id'] ?? 0;
+			if ($entry['wid'] != $wid) {
+				$workerqueue = DBA::selectFirst('workerqueue', ['pid'], ['id' => $entry['wid'], 'done' => false]);
+				if (!empty($workerqueue['pid']) && posix_kill($workerqueue['pid'], 0)) {
+					Logger::notice('Entry is already processed via another process.', ['current' => $wid, 'processor' => $entry['wid']]);
+					return false;
+				}
+			}
+		}
+
 		Logger::debug('Processing queue entry', ['id' => $entry['id'], 'type' => $entry['type'], 'object-type' => $entry['object-type'], 'uri' => $entry['object-id'], 'in-reply-to' => $entry['in-reply-to-id']]);
 
 		$activity = json_decode($entry['activity'], true);
@@ -314,6 +339,5 @@ class Queue
 			}
 		}
 		DBA::close($entries);
-
 	}
 }
diff --git a/src/Protocol/ActivityPub/Receiver.php b/src/Protocol/ActivityPub/Receiver.php
index 1d337f28af..b3d67b2af7 100644
--- a/src/Protocol/ActivityPub/Receiver.php
+++ b/src/Protocol/ActivityPub/Receiver.php
@@ -507,26 +507,29 @@ class Receiver
 	 * @param boolean    $trust_source Do we trust the source?
 	 * @param boolean    $push         Message had been pushed to our system
 	 * @param array      $signer       The signer of the post
+	 *
+	 * @return bool
+	 *
 	 * @throws \Friendica\Network\HTTPException\InternalServerErrorException
 	 * @throws \ImagickException
 	 */
-	public static function processActivity(array $activity, string $body = '', int $uid = null, bool $trust_source = false, bool $push = false, array $signer = [], string $http_signer = '')
+	public static function processActivity(array $activity, string $body = '', int $uid = null, bool $trust_source = false, bool $push = false, array $signer = [], string $http_signer = '', int $completion = Receiver::COMPLETION_AUTO): bool
 	{
 		$type = JsonLD::fetchElement($activity, '@type');
 		if (!$type) {
 			Logger::info('Empty type', ['activity' => $activity]);
-			return;
+			return true;
 		}
 
 		if (!JsonLD::fetchElement($activity, 'as:object', '@id')) {
 			Logger::info('Empty object', ['activity' => $activity]);
-			return;
+			return true;
 		}
 
 		$actor = JsonLD::fetchElement($activity, 'as:actor', '@id');
 		if (empty($actor)) {
 			Logger::info('Empty actor', ['activity' => $activity]);
-			return;
+			return true;
 		}
 
 		if (is_array($activity['as:object'])) {
@@ -548,7 +551,7 @@ class Receiver
 		$object_data = self::prepareObjectData($activity, $uid, $push, $trust_source);
 		if (empty($object_data)) {
 			Logger::info('No object data found', ['activity' => $activity]);
-			return;
+			return true;
 		}
 
 		// Lemmy is announcing activities.
@@ -583,21 +586,27 @@ class Receiver
 			$object_data['object_activity']	= $activity;
 		}
 
+		if (($type == 'as:Create') && Queue::exists($object_data['object_id'], $type)) {
+			Logger::info('The activity is already added.', ['id' => $object_data['object_id']]);
+			return true;
+		}
+
 		if (DI::config()->get('system', 'decoupled_receiver') && ($trust_source || DI::config()->get('debug', 'ap_inbox_store_untrusted'))) {
 			$object_data = Queue::add($object_data, $type, $uid, $http_signer, $push, $trust_source);
 		}
 
 		if (!$trust_source) {
 			Logger::info('Activity trust could not be achieved.',  ['id' => $object_data['object_id'], 'type' => $type, 'signer' => $signer, 'actor' => $actor, 'attributedTo' => $attributed_to]);
-			return;
+			return true;
 		}
 
-		if (!empty($object_data['entry-id']) && DI::config()->get('system', 'decoupled_receiver') && ($push || ($activity['completion-mode'] == self::COMPLETION_RELAY))) {
+		if (!empty($object_data['entry-id']) && DI::config()->get('system', 'decoupled_receiver') && ($push || ($completion == self::COMPLETION_RELAY))) {
 			// We delay by 5 seconds to allow to accumulate all receivers
 			$delayed = date(DateTimeFormat::MYSQL, time() + 5);
 			Logger::debug('Initiate processing', ['id' => $object_data['entry-id'], 'uri' => $object_data['object_id']]);
-			Worker::add(['priority' => PRIORITY_HIGH, 'delayed' => $delayed], 'ProcessQueue', $object_data['entry-id']);
-			return;
+			$wid = Worker::add(['priority' => PRIORITY_HIGH, 'delayed' => $delayed], 'ProcessQueue', $object_data['entry-id']);
+			Queue::setWorkerId($object_data['entry-id'], $wid);
+			return false;
 		}
 
 		if (!empty($activity['recursion-depth'])) {
@@ -612,6 +621,7 @@ class Receiver
 			self::storeUnhandledActivity(true, $type, $object_data, $activity, $body, $uid, $trust_source, $push, $signer);
 			Queue::remove($object_data);
 		}
+		return true;
 	}
 
 	/**
diff --git a/src/Worker/Cron.php b/src/Worker/Cron.php
index 2c724366da..12ea6e60a3 100644
--- a/src/Worker/Cron.php
+++ b/src/Worker/Cron.php
@@ -93,9 +93,7 @@ class Cron
 			Queue::clear();
 
 			// Process all unprocessed entries
-			if (DI::config()->get('system', 'decoupled_receiver')) {
-				Queue::processAll();
-			}
+			Queue::processAll();
 
 			// Search for new contacts in the directory
 			if (DI::config()->get('system', 'synchronize_directory')) {
diff --git a/static/defaults.config.php b/static/defaults.config.php
index ee15ce826d..44c80b7b0c 100644
--- a/static/defaults.config.php
+++ b/static/defaults.config.php
@@ -286,6 +286,10 @@ return [
 		// Fetch missing posts via a background process
 		'fetch_by_worker' => false,
 
+		// fetch_featured_posts (Boolean)
+		// Fetch featured posts from all contacts
+		'fetch_featured_posts' => false,
+
 		// free_crawls (Integer)
 		// Number of "free" searches when system => permit_crawling is enabled.
 		'free_crawls' => 10,
-- 
2.39.5