From c387fc16d9a61c347f36d31dea7b155ea009ad50 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Roland=20H=C3=A4der?= Date: Sun, 19 Nov 2023 17:38:45 +0100 Subject: [PATCH] Continued: - added command "fetch_relaylist" to fetch relays from relaylist.com --- fba/boot.py | 7 +++++ fba/commands.py | 69 ++++++++++++++++++++++++++++++++++++++++++ fba/http/federation.py | 2 +- 3 files changed, 77 insertions(+), 1 deletion(-) diff --git a/fba/boot.py b/fba/boot.py index 86411b7..dddc80f 100644 --- a/fba/boot.py +++ b/fba/boot.py @@ -250,6 +250,13 @@ def init_parser(): parser.add_argument("--software", help="Name of software, e.g. 'lemmy'") parser.add_argument("--force", action="store_true", help="Forces update of data, no matter what.") + ### Fetches relay list from relaylist.com + parser = subparser_command.add_parser( + "fetch_relaylist", + help="Fetches relay list from relaylist.com", + ) + parser.set_defaults(command=commands.fetch_relaylist) + ### Remove invalid domains ### parser = subparser_command.add_parser( "remove_invalid", diff --git a/fba/commands.py b/fba/commands.py index e10ab32..b000e3d 100644 --- a/fba/commands.py +++ b/fba/commands.py @@ -1599,6 +1599,75 @@ def fetch_instances_social(args: argparse.Namespace) -> int: logger.debug("Success! - EXIT!") return 0 +def fetch_relaylist(args: argparse.Namespace) -> int: + logger.debug("args[]='%s' - CALLED!", type(args)) + + logger.debug("Invoking locking.acquire() ...") + locking.acquire() + + source_domain = "api.relaylist.com" + + if sources.is_recent(source_domain): + logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain) + return 1 + else: + logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain) + sources.update(source_domain) + + logger.info("Fetching list from source_domain='%s' ...", source_domain) + fetched = network.get_json_api( + source_domain, + "/relays", + {}, + (config.get("connection_timeout"), config.get("read_timeout")) + ) + logger.debug("fetched[]='%s'", type(fetched)) + + if "error_message" in fetched: + logger.warning("Error during fetching API result: '%s' - EXIT!", fetched["error_message"]) + return 2 + elif "exception" in fetched: + logger.warning("Exception '%s' during fetching API result - EXIT!", type(fetched["exception"])) + return 3 + elif "json" not in fetched: + logger.warning("fetched has no element 'json' - EXIT!") + return 4 + + domains = list() + + logger.info("Checking %d row(s) ...", len(fetched["json"])) + for row in fetched["json"]: + logger.debug("row[]='%s'", type(row)) + domain = urlparse(row["url"]).netloc.lower().split(":")[0] + + logger.debug("domain='%s' - AFTER!", domain) + if domain is None and domain == "": + logger.debug("domain='%s' is empty after tidyup.domain() - SKIPPED!", domain) + continue + + logger.debug("domain='%s' - BEFORE!", domain) + domain = domain.encode("idna").decode("utf-8") + logger.debug("domain='%s' - AFTER!", domain) + + if not domain_helper.is_wanted(domain): + logger.debug("domain='%s' is not wanted - SKIPPED!", domain) + continue + elif domain in domains: + logger.debug("domain='%s' is already added - SKIPPED!", domain) + continue + elif instances.is_registered(domain): + logger.debug("domain='%s' is already registered - SKIPPED!", domain) + continue + elif instances.is_recent(domain): + logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain) + continue + + logger.info("Fetching instances from domain='%s'", domain) + federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name) + + logger.debug("Success! - EXIT!") + return 0 + def fetch_relays(args: argparse.Namespace) -> int: logger.debug("args[]='%s' - CALLED!", type(args)) diff --git a/fba/http/federation.py b/fba/http/federation.py index 50f66c3..6f7f033 100644 --- a/fba/http/federation.py +++ b/fba/http/federation.py @@ -56,7 +56,7 @@ def fetch_instances(domain: str, origin: str, software: str, command: str, path: raise ValueError(f"Parameter command[]='{type(command)}' is not of type 'str'") elif command == "": raise ValueError("Parameter 'command' is empty") - elif command in ["fetch_blocks", "fetch_cs", "fetch_bkali", "fetch_relays", "fetch_fedipact", "fetch_joinmobilizon", "fetch_joinmisskey", "fetch_joinfediverse"] and origin is None: + elif command in ["fetch_blocks", "fetch_cs", "fetch_bkali", "fetch_relays", "fetch_fedipact", "fetch_joinmobilizon", "fetch_joinmisskey", "fetch_joinfediverse", "fetch_relaylist"] and origin is None: raise ValueError(f"Parameter command='{command}' but origin is None, please fix invoking this function.") elif not isinstance(path, str) and path is not None: raise ValueError(f"Parameter path[]='{type(path)}' is not of type 'str'") -- 2.39.5