From 3f65d0bb56308bfdeb6bb79eeb61a5f0db2fd0f2 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Roland=20H=C3=A4der?= Date: Wed, 17 Apr 2024 00:26:14 +0200 Subject: [PATCH] Continued: - added some missing instances.is_recent(foo) invocations - use last_instance_fetch (default) where ever federation.fetch_instances() is been invoked - wording fixed --- fba/commands.py | 69 ++++++++++++++++++++++++++---------------- fba/http/federation.py | 2 +- 2 files changed, 44 insertions(+), 27 deletions(-) diff --git a/fba/commands.py b/fba/commands.py index f1e7590..6dc322f 100644 --- a/fba/commands.py +++ b/fba/commands.py @@ -163,7 +163,7 @@ def fetch_pixelfed_api(args: argparse.Namespace) -> int: logger.debug("domain='%s' is already registered - SKIPPED!", domain) continue elif instances.is_recent(domain): - logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain) + logger.debug("domain='%s' has recently been crawled - SKIPPED!", domain) continue logger.debug("Fetching instances from domain='%s' ...", domain) @@ -237,7 +237,7 @@ def fetch_bkali(args: argparse.Namespace) -> int: logger.debug("entry[domain]='%s' is already registered - SKIPPED!", entry["domain"]) continue elif instances.is_recent(entry["domain"]): - logger.debug("entry[domain]='%s' has been recently crawled - SKIPPED!", entry["domain"]) + logger.debug("entry[domain]='%s' has recently been crawled - SKIPPED!", entry["domain"]) continue logger.debug("Adding domain='%s' ...", entry["domain"]) @@ -315,7 +315,7 @@ def fetch_blocks(args: argparse.Namespace) -> int: logger.warning("blocker='%s' is not wanted - SKIPPED!", blocker) continue elif not args.force and instances.is_recent(blocker, "last_blocked"): - logger.debug("blocker='%s' has been recently accessed - SKIPPED!", blocker) + logger.debug("blocker='%s' has recently been accessed - SKIPPED!", blocker) continue logger.debug("Setting last_blocked,has_obfuscation=false for blocker='%s' ...", blocker) @@ -430,10 +430,13 @@ def fetch_blocks(args: argparse.Namespace) -> int: logger.debug("block[blocked]='%s' - AFTER!", block["blocked"]) if not domain_helper.is_wanted(block["blocked"]): - logger.debug("blocked='%s' is not wanted - SKIPPED!", block["blocked"]) + logger.debug("block[blocked]='%s' is not wanted - SKIPPED!", block["blocked"]) continue elif block["block_level"] in ["accept", "accepted"]: - logger.debug("blocked='%s' is accepted, not wanted here - SKIPPED!", block["blocked"]) + logger.debug("block[blocked]='%s' is accepted, not wanted here - SKIPPED!", block["blocked"]) + continue + elif instances.is_recent(block["blocked"]): + logger.debug("block[blocked]='%s' has recently been crawled - SKIPPED!", block["blocked"]) continue elif not instances.is_registered(block["blocked"]): logger.debug("Hash wasn't found, adding: blocked='%s',blocker='%s'", block["blocked"], blocker) @@ -585,6 +588,9 @@ def fetch_observer(args: argparse.Namespace) -> int: elif instances.is_registered(domain): logger.debug("domain='%s' is already registered - SKIPPED!", domain) continue + elif instances.is_recent(domain): + logger.debug("domain='%s' has recently been crawled - SKIPPED!", domain) + continue logger.info("Fetching instances for domain='%s',software='%s' ...", domain, software) federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name) @@ -645,14 +651,6 @@ def fetch_todon_wiki(args: argparse.Namespace) -> int: for blocked in blockers: logger.debug("blocked='%s'", blocked) - if not instances.is_registered(blocked): - try: - logger.info("Fetching instances from domain='%s' ...", blocked) - federation.fetch_instances(blocked, blocker, None, inspect.currentframe().f_code.co_name) - except network.exceptions as exception: - logger.warning("Exception '%s' during fetching instances (fetch_cs) from blocked='%s'", type(exception), blocked) - instances.set_last_error(blocked, exception) - if not domain_helper.is_wanted(blocked): logger.warning("blocked='%s' is not wanted - SKIPPED!", blocked) continue @@ -662,6 +660,14 @@ def fetch_todon_wiki(args: argparse.Namespace) -> int: elif blocks.is_instance_blocked(blocker, blocked, block_level): logger.debug("blocked='%s',block_level='%s' is already blocked - SKIPPED!", blocked, block_level) continue + elif not instances.is_registered(blocked): + try: + logger.info("Fetching instances from domain='%s' ...", blocked) + federation.fetch_instances(blocked, blocker, None, inspect.currentframe().f_code.co_name) + except network.exceptions as exception: + logger.warning("Exception '%s' during fetching instances (fetch_cs) from blocked='%s'", type(exception), blocked) + instances.set_last_error(blocked, exception) + logger.info("Adding new block: blocked='%s',block_level='%s'", blocked, block_level) if processing.block(blocker, blocked, None, block_level) and block_level == "reject" and config.get("bot_enabled"): @@ -846,7 +852,7 @@ def fetch_fba_rss(args: argparse.Namespace) -> int: logger.debug("domain='%s' is already registered - SKIPPED!", domain) continue elif instances.is_recent(domain): - logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain) + logger.debug("domain='%s' has recently been crawled - SKIPPED!", domain) continue logger.debug("Adding domain='%s'", domain) @@ -933,7 +939,7 @@ def fetch_fbabot_atom(args: argparse.Namespace) -> int: logger.debug("domain='%s' is already registered - SKIPPED!", domain) continue elif instances.is_recent(domain): - logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain) + logger.debug("domain='%s' has recently been crawled - SKIPPED!", domain) continue logger.debug("Adding domain='%s',domains()=%d", domain, len(domains)) @@ -1000,7 +1006,7 @@ def fetch_instances(args: argparse.Namespace) -> int: logger.warning("row[domain]='%s' is of software type '%s' which is not supported by this command. Please invoke fetch_relays instead - SKIPPED!", row["domain"], row["software"]) continue elif not args.force and not args.software in [None, ""]and instances.is_recent(row["domain"]): - logger.debug("row[domain]='%s' has been recently crawled - SKIPPED!", row["domain"]) + logger.debug("row[domain]='%s' has recently been crawled - SKIPPED!", row["domain"]) continue # Initial fetch @@ -1022,8 +1028,7 @@ def fetch_instances(args: argparse.Namespace) -> int: "SELECT domain, origin, software \ FROM instances \ WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'lemmy', 'peertube', 'takahe', 'gotosocial', 'brighteon', 'wildebeest', 'bookwyrm', 'mitra', 'areionskey', 'mammuthus', 'neodb', 'smithereen', 'vebinet', 'hugo', 'toki') \ -AND (last_instance_fetch IS NULL OR last_instance_fetch < ?) \ -ORDER BY total_peers DESC, last_response_time ASC, last_updated ASC", [time.time() - config.get("recheck_instance")] +ORDER BY total_peers DESC, last_response_time ASC, last_updated ASC" ) rows = database.cursor.fetchall() @@ -1034,7 +1039,10 @@ ORDER BY total_peers DESC, last_response_time ASC, last_updated ASC", [time.time logger.debug("domain='%s' - AFTER!", domain) if not domain_helper.is_wanted(domain): - logger.debug("Domain domain='%s' is not wanted - SKIPPED!", domain) + logger.debug("domain='%s' is not wanted - SKIPPED!", domain) + continue + elif instances.is_recent(domain): + logger.debug("domain='%s' has recently been crawled - SKIPPED!") continue try: @@ -1190,7 +1198,7 @@ def fetch_fedipact(args: argparse.Namespace) -> int: logger.debug("domain='%s' is already registered - SKIPPED!", domain) continue elif instances.is_recent(domain): - logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain) + logger.debug("domain='%s' has recently been crawled - SKIPPED!", domain) continue logger.info("Fetching domain='%s' ...", domain) @@ -1240,6 +1248,9 @@ def fetch_joinmobilizon(args: argparse.Namespace) -> int: elif instances.is_registered(row["host"]): logger.debug("row[host]='%s' is already registered - SKIPPED!", row["host"]) continue + elif instances.is_recent(row["host"]): + logger.debug("row[host]='%s' has recently been crawled - SKIPPED!", row["host"]) + continue logger.info("Fetching row[host]='%s' ...", row["host"]) federation.fetch_instances(row["host"], "demo.mobilizon.org", None, inspect.currentframe().f_code.co_name) @@ -1288,6 +1299,9 @@ def fetch_joinmisskey(args: argparse.Namespace) -> int: elif instances.is_registered(row["url"]): logger.debug("row[url]='%s' is already registered - SKIPPED!", row["url"]) continue + elif instances.is_recent(row["url"]): + logger.debug("row[url]='%s' has recently been crawled - SKIPPED!", row["url"]) + continue logger.info("Fetching row[url]='%s' ...", row["url"]) federation.fetch_instances(row["url"], "misskey.io", None, inspect.currentframe().f_code.co_name) @@ -1319,7 +1333,7 @@ def recheck_obfuscation(args: argparse.Namespace) -> int: logger.debug("row[domain]='%s' is blacklisted - SKIPPED!", row["domain"]) continue elif (args.force is None or not args.force) and args.domain is None and args.software is None and instances.is_recent(row["domain"], "last_blocked"): - logger.debug("row[domain]='%s' has been recently checked, args.force[]='%s' - SKIPPED!", row["domain"], type(args.force)) + logger.debug("row[domain]='%s' has recently been checked, args.force[]='%s' - SKIPPED!", row["domain"], type(args.force)) continue logger.debug("Invoking federation.fetch_blocks(%s) ...", row["domain"]) @@ -1499,7 +1513,7 @@ def fetch_fedilist(args: argparse.Namespace) -> int: logger.debug("domain='%s' is already registered, --force not specified: args.force[]='%s'", domain, type(args.force)) continue elif instances.is_recent(domain): - logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain) + logger.debug("domain='%s' has recently been crawled - SKIPPED!", domain) continue logger.info("Fetching instances from domain='%s' ...", domain) @@ -1564,7 +1578,7 @@ def update_nodeinfo(args: argparse.Namespace) -> int: logger.debug("row[domain]='%s' is blacklisted - SKIPPED!", row["domain"]) continue elif not args.force and instances.is_recent(row["domain"], "last_nodeinfo"): - logger.debug("row[domain]='%s' has been recently checked - SKIPPED!", row["domain"]) + logger.debug("row[domain]='%s' has recently been checked - SKIPPED!", row["domain"]) continue try: @@ -1666,7 +1680,7 @@ def fetch_instances_social(args: argparse.Namespace) -> int: logger.debug("domain='%s' is already registered - SKIPPED!", domain) continue elif instances.is_recent(domain): - logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain) + logger.debug("domain='%s' has recently been crawled - SKIPPED!", domain) continue logger.info("Fetching instances from domain='%s' ...", domain) @@ -1735,7 +1749,7 @@ def fetch_relaylist(args: argparse.Namespace) -> int: logger.debug("domain='%s' is already registered - SKIPPED!", domain) continue elif instances.is_recent(domain): - logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain) + logger.debug("domain='%s' has recently been crawled - SKIPPED!", domain) continue logger.info("Fetching instances from domain='%s'", domain) @@ -1767,7 +1781,7 @@ def fetch_relays(args: argparse.Namespace) -> int: for row in rows: logger.debug("row[domain]='%s',row[software]='%s'", row["domain"], row["software"]) if not args.force and instances.is_recent(row["domain"]): - logger.debug("row[domain]='%s' has been recently fetched - SKIPPED!", row["domain"]) + logger.debug("row[domain]='%s' has recently been fetched - SKIPPED!", row["domain"]) continue elif row["nodeinfo_url"] is None: logger.warning("row[domain]='%s' has empty nodeinfo_url but this is required - SKIPPED!", row["domain"]) @@ -1957,6 +1971,9 @@ def fetch_relays(args: argparse.Namespace) -> int: elif instances.is_registered(row["domain"]): logger.debug("row[domain]='%s' is already registered - SKIPPED!", row["domain"]) continue + elif instances.is_recent(row["domain"]): + logger.debug("row[domain]='%s' has recently been crawled - SKIPPED!", row["domain"]) + continue logger.info("Fetching row[domain]='%s',row[origin]='%s' ...", row["domain"], row["origin"]) federation.fetch_instances(row["domain"], row["origin"], None, inspect.currentframe().f_code.co_name) diff --git a/fba/http/federation.py b/fba/http/federation.py index 481853d..aafef11 100644 --- a/fba/http/federation.py +++ b/fba/http/federation.py @@ -73,7 +73,7 @@ def fetch_instances(domain: str, origin: str, software: str, command: str, path: raise ValueError(f"path='{path}' does not start with a slash") elif _DEPTH > 0 and instances.is_recent(domain, "last_instance_fetch"): raise ValueError(f"domain='{domain}' has recently been fetched but function was invoked") - elif software is None and not instances.is_recent(domain, "last_nodeinfo"): + elif software is None and not instances.is_recent(domain, "last_instance_fetch"): try: logger.debug("Software for domain='%s',path='%s' is not set, determining ...", domain, path) software = determine_software(domain, path) -- 2.39.5