From 453aa5dd2cb9cef9cd5a4e08bcc211d012809b52 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Roland=20H=C3=A4der?= Date: Wed, 21 Jun 2023 15:36:10 +0200 Subject: [PATCH] Continued: - added command fetch_fedipact() where some instances might be gathered (who are happy to block) - a warning was to noisy for missing keys --- fba/boot.py | 7 ++++++ fba/commands.py | 50 +++++++++++++++++++++++++++++++++++++++--- fba/http/federation.py | 2 +- 3 files changed, 55 insertions(+), 4 deletions(-) diff --git a/fba/boot.py b/fba/boot.py index 0e083b8..aed135a 100644 --- a/fba/boot.py +++ b/fba/boot.py @@ -122,6 +122,13 @@ def init_parser(): ) parser.set_defaults(command=commands.fetch_observer) + ### Fetch instances from fedipact.online ### + parser = subparser_command.add_parser( + "fetch_fedipact", + help="Fetches blocks from fedipact.online.", + ) + parser.set_defaults(command=commands.fetch_fedipact) + logger.debug("init_parser(): EXIT!") def run_command(): diff --git a/fba/commands.py b/fba/commands.py index a55e8a3..ab82b24 100644 --- a/fba/commands.py +++ b/fba/commands.py @@ -671,7 +671,7 @@ def fetch_oliphant(args: argparse.Namespace): logger.info(f"Fetching csv_url='{block['csv_url']}' for blocker='{block['blocker']}' ...") response = fba.fetch_url(f"{base_url}/{block['csv_url']}", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))) - logger.debug("response[]='%s'", type(response)) + logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text)) if response.ok and response.content != "": logger.debug(f"Fetched {len(response.content)} Bytes, parsing CSV ...") reader = csv.DictReader(response.content.decode('utf-8').splitlines(), dialect="unix") @@ -724,8 +724,8 @@ def fetch_txt(args: argparse.Namespace): logger.debug("Fetching url='%s' ...", url) response = fba.fetch_url(url, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))) - logger.debug("response[]='%s'", type(response)) - if response.ok and response.text != "": + logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text)) + if response.ok and response.status_code < 300 and response.text != "": logger.debug(f"Returned {len(response.text.strip())} Bytes for processing") domains = response.text.split("\n") @@ -757,3 +757,47 @@ def fetch_txt(args: argparse.Namespace): continue logger.debug("EXIT!") + +def fetch_fedipact(args: argparse.Namespace): + logger.debug("args[]='%s' - CALLED!", type(args)) + locking.acquire() + + response = fba.fetch_url("https://fedipact.online", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))) + + logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text)) + if response.ok and response.status_code < 300 and response.text != "": + logger.debug("Parsing %d Bytes ...", len(response.text)) + + doc = bs4.BeautifulSoup(response.text, "html.parser") + logger.debug("doc[]='%s'", type(doc)) + + rows = doc.findAll("li") + logger.info("Checking %d row(s) ...", len(rows)) + for row in rows: + logger.debug("row[]='%s'", type(row)) + domain = tidyup.domain(row.contents[0]) + + logger.debug("domain='%s'", domain) + if domain == "": + logger.debug("domain is empty - SKIPPED!") + continue + elif not validators.domain(domain): + logger.warning("domain='%s' is not a valid domain name - SKIPPED!", domain) + continue + elif domain.endswith(".arpa"): + logger.debug("domain='%s' is a domain for reversed IP addresses - SKIPPED!", domain) + continue + elif domain.endswith(".tld"): + logger.debug("domain='%s' is a fake domain - SKIPPED!", domain) + continue + elif blacklist.is_blacklisted(domain): + logger.debug("domain='%s' is blacklisted - SKIPPED!", domain) + continue + elif instances.is_registered(domain): + logger.debug("domain='%s' is already registered - SKIPPED!", domain) + continue + + logger.info("Fetching domain='%s' ...", domain) + federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name) + + logger.debug("EXIT!") diff --git a/fba/http/federation.py b/fba/http/federation.py index a7b2db4..a8291c7 100644 --- a/fba/http/federation.py +++ b/fba/http/federation.py @@ -645,7 +645,7 @@ def add_peers(rows: dict) -> list: for key in ["linked", "allowed", "blocked"]: logger.debug(f"Checking key='{key}'") if key not in rows or rows[key] is None: - logger.warning(f"Cannot find key='{key}' or it is NoneType - SKIPPED!") + logger.debug(f"Cannot find key='{key}' or it is NoneType - SKIPPED!") continue logger.debug(f"Adding {len(rows[key])} peer(s) to peers list ...") -- 2.39.5