From 4f79c281be1815eb4cbee7ddc0baabfd61f7ffe3 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Roland=20H=C3=A4der?= Date: Mon, 21 Apr 2025 05:03:19 +0200 Subject: [PATCH] Continued: - added `srv.us` and `linodeusercontent.com` as mass-hosters/tunnel service, no real instance can be expected here - if table `instances` doesn't bear a record then `is_recent()` should return `False` - removed parameter `--single` from command `fetch_instances` and moved SQL statement into `else` block --- fba/boot.py | 1 - fba/commands.py | 51 +++++++++++----------------------------- fba/helpers/blacklist.py | 2 ++ fba/models/instances.py | 3 +++ 4 files changed, 19 insertions(+), 38 deletions(-) diff --git a/fba/boot.py b/fba/boot.py index 01bfe66..f25ea98 100644 --- a/fba/boot.py +++ b/fba/boot.py @@ -149,7 +149,6 @@ def init_parser() -> None: parser.set_defaults(command=commands.fetch_instances) parser.add_argument("--domain", help="Instance name (aka. domain) to fetch further instances from. Start with a large instance, for example mastodon.social .") parser.add_argument("--force-all", action="store_true", help="Include also already existing instances, otherwise only new are checked") - parser.add_argument("--single", action="store_true", help="Only fetch given instance.") parser.add_argument("--software", help="Name of software, for example 'lemmy'") ### Fetch blocks from static text file(s) ### diff --git a/fba/commands.py b/fba/commands.py index af28be6..d48a769 100644 --- a/fba/commands.py +++ b/fba/commands.py @@ -1047,36 +1047,9 @@ def fetch_instances(args: argparse.Namespace) -> int: # Fetch records database.cursor.execute("SELECT domain, origin, software FROM instances WHERE software = ? ORDER BY last_instance_fetch ASC", [software]) rows = database.cursor.fetchall() - - logger.info("Checking %d entries ...", len(rows)) - for row in rows: - logger.debug("row[domain]='%s',row[origin]='%s',row[software]='%s'", row["domain"], row["origin"], row["software"]) - if row["software"] is None: - logger.warning("row[domain]='%s' has no software detected. You can try to run ./fba.py update_nodeinfo --domain=%s --force-all to get it updated - SKIPPED!", row["domain"], row["domain"]) - continue - elif software_helper.is_relay(row["software"]): - logger.warning("row[domain]='%s' is a relay of type '%s' which is not supported by this command. Please invoke fetch_relays instead - SKIPPED!", row["domain"], row["software"]) - continue - elif not args.force_all and not args.software in [None, ""] and instances.is_recent(row["domain"]): - logger.debug("row[domain]='%s' has recently been crawled - SKIPPED!", row["domain"]) - continue - - # Initial fetch - try: - logger.info("Fetching instances from row[domain]='%s',row[origin]='%s',row[software]='%s' ...", row["domain"], row["origin"], row["software"]) - federation.fetch_instances(row["domain"], row["origin"], row["software"], inspect.currentframe().f_code.co_name) - except network.exceptions as exception: - logger.warning("Exception '%s' during fetching instances (fetch_instances) from row[domain]='%s'", type(exception), row["domain"]) - instances.set_last_error(row["domain"], exception) - instances.update(row["domain"]) - continue - - if args.single: - logger.debug("Not fetching more instances - EXIT!") - return 0 - - # Loop through some instances - database.cursor.execute("SELECT domain, origin, software \ + else: + # Loop through some instances + database.cursor.execute("SELECT domain, origin, software \ FROM instances \ WHERE software IN ( \ 'pleroma' , 'mastodon', 'friendica' , 'misskey' , 'lemmy' , \ @@ -1087,26 +1060,30 @@ WHERE software IN ( \ ) \ ORDER BY total_peers DESC, last_response_time ASC, last_updated ASC" ) + rows = database.cursor.fetchall() - rows = database.cursor.fetchall() logger.info("Checking %d entries ...", len(rows)) for row in rows: - logger.debug("row[domain]='%s'", row["domain"]) - - if not domain_helper.is_wanted(row["domain"]): - logger.warning("row[domain]='%s' is not wanted, you should execute remove_invalid for cleaning your database - SKIPPED!", row["domain"]) + logger.debug("row[domain]='%s',row[origin]='%s',row[software]='%s'", row["domain"], row["origin"], row["software"]) + if row["software"] is None: + logger.warning("row[domain]='%s' has no software detected. You can try to run ./fba.py update_nodeinfo --domain=%s --force-all to get it updated - SKIPPED!", row["domain"], row["domain"]) continue - elif instances.is_recent(row["domain"]): + elif software_helper.is_relay(row["software"]): + logger.warning("row[domain]='%s' is a relay of type '%s' which is not supported by this command. Please invoke fetch_relays instead - SKIPPED!", row["domain"], row["software"]) + continue + elif not args.force_all and not args.software in [None, ""] and instances.is_recent(row["domain"]): logger.debug("row[domain]='%s' has recently been crawled - SKIPPED!", row["domain"]) continue + # Initial fetch try: - logger.info("Fetching instances for row[domain]='%s',origin='%s',software='%s' ...", row["domain"], row["origin"], row["software"]) + logger.info("Fetching instances from row[domain]='%s',row[origin]='%s',row[software]='%s' ...", row["domain"], row["origin"], row["software"]) federation.fetch_instances(row["domain"], row["origin"], row["software"], inspect.currentframe().f_code.co_name) except network.exceptions as exception: logger.warning("Exception '%s' during fetching instances (fetch_instances) from row[domain]='%s'", type(exception), row["domain"]) instances.set_last_error(row["domain"], exception) instances.update(row["domain"]) + continue logger.debug("Success - EXIT!") return 0 diff --git a/fba/helpers/blacklist.py b/fba/helpers/blacklist.py index 42da424..5db28da 100644 --- a/fba/helpers/blacklist.py +++ b/fba/helpers/blacklist.py @@ -56,6 +56,7 @@ _blacklist = { "tunnel.silicon.moe" : "Testing/developing instances shouldn't be part of public instances", "7988276.xyz" : "Testing/developing instances shouldn't be part of public instances", "devtunnels.ms" : "Testing/developing instances shouldn't be part of public instances", + "srv.us" : "Testing/developing instances shouldn't be part of public instances", "serveo.net" : "Provides service 'expose local servers to the internet'", "hexbear.net" : "Is a Lemmy instance with malicious JavaScript code (shell commands)", "mastodon.n41.lat" : "Somehow this instance repeatedly causes an OOM here", @@ -82,6 +83,7 @@ _blacklist = { "drankdrankdrank" : "Mass flooding of instances", "cn24tv.it/page/" : "Useless massive pages", "youtube.com/channel/" : "Useless massive YT channels", + "linodeusercontent.com" : "Some user-content hoster?", } @lru_cache diff --git a/fba/models/instances.py b/fba/models/instances.py index 06e3283..a527038 100644 --- a/fba/models/instances.py +++ b/fba/models/instances.py @@ -367,6 +367,9 @@ def is_recent(domain: str, column: str = "last_instance_fetch") -> bool: # Fetch row row = database.cursor.fetchone() logger.debug("row[%s]='%s'", type(row), row) + if row is None: + logger.warning("domain='%s' has returned no instances record - EXIT!", domain) + return False fetched = float(row[column]) if row[column] is not None else 0.0 logger.debug("fetched[%s]=%f", type(fetched), fetched) -- 2.39.5