From 9a649fe40c8cf745404415136b03ceb58b57b2ab Mon Sep 17 00:00:00 2001 From: =?utf8?q?Roland=20H=C3=A4der?= Date: Sat, 16 Dec 2023 09:51:12 +0100 Subject: [PATCH] Continued: - added missing 'continue' - added some debug messages - skip empty/NoneType domain names --- fba/commands.py | 2 +- fba/networks/lemmy.py | 14 ++++++++++---- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/fba/commands.py b/fba/commands.py index 2d26357..5ddb44a 100644 --- a/fba/commands.py +++ b/fba/commands.py @@ -1611,7 +1611,7 @@ def fetch_instances_social(args: argparse.Namespace) -> int: logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain) continue - logger.info("Fetching instances from domain='%s'", domain) + logger.info("Fetching instances from domain='%s' ...", domain) federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name) logger.debug("Success! - EXIT!") diff --git a/fba/networks/lemmy.py b/fba/networks/lemmy.py index d216cf8..7d058d5 100644 --- a/fba/networks/lemmy.py +++ b/fba/networks/lemmy.py @@ -35,7 +35,7 @@ logger = logging.getLogger(__name__) #logger.setLevel(logging.DEBUG) # Lemmy translations -translations = [ +_translations = [ "Blocked Instances".lower(), "Instàncies bloquejades".lower(), "Blocáilte Ásc".lower(), @@ -113,6 +113,7 @@ def fetch_peers(domain: str, origin: str) -> list: logger.debug("Marking domain='%s' as successfully handled ...", domain) instances.set_success(domain) + logger.debug("peers()=%d", len(peers)) if len(peers) == 0: logger.debug("Fetching instances for domain='%s' from /instances ...", domain) peers = fetch_instances(domain, origin) @@ -173,7 +174,7 @@ def fetch_blocks(domain: str) -> list: elif not isinstance(content, str): logger.debug("content[]='%s' is not supported/wanted type 'str' - SKIPPED!", type(content)) continue - elif content.lower() in translations: + elif content.lower() in _translations: logger.debug("Found header='%s' with blocked instances - BREAK(3) !", header) found = header break @@ -278,7 +279,7 @@ def fetch_instances(domain: str, origin: str) -> list: logger.debug("tag[]='%s'", type(tag)) text = tag.contents[0] if isinstance(tag.contents[0], str) else tag.contents[0].text - logger.debug("text='%s' - BEFORE!", text) + logger.debug("text[%s]='%s' - BEFORE!", type(text), text) peer = tidyup.domain(text) if text != "" else None logger.debug("peer='%s' - AFTER", peer) @@ -299,6 +300,7 @@ def fetch_instances(domain: str, origin: str) -> list: if len(peers) == 0: logger.debug("Found no peers for domain='%s', trying script tag ...", domain) peers = parse_script(doc) + logger.debug("Parsing doc()=%d returned %d peer(s).", len(doc), len(peers)) else: logger.warning("Cannot fetch /instances due to error: response.ok='%s',response.status_code=%d,response.details='%s'", response.ok, response.status_code, response.reason) instances.set_last_error(domain, response) @@ -366,7 +368,7 @@ def parse_script(doc: bs4.BeautifulSoup, only: str = None) -> list: data = parsed["routeData"]["federatedInstancesResponse"]["data"]["federated_instances"] logger.debug("Checking %d data elements ...", len(data)) for element in data: - logger.debug("element='%s'", element) + logger.debug("element[%s]='%s'", type(element), element) if isinstance(only, str) and only != element: logger.debug("Skipping unwanted element='%s',only='%s'", element, only) continue @@ -377,6 +379,9 @@ def parse_script(doc: bs4.BeautifulSoup, only: str = None) -> list: if "domain" not in row: logger.warning("row()=%d has no element 'domain' - SKIPPED!", len(row)) continue + elif row["domain"] in [None, ""]: + logger.debug("row[domain]='%s' is empty - SKIPPED!", row["domain"]) + continue logger.debug("row[domain]='%s' - BEFORE!", row["domain"]) peer = tidyup.domain(row["domain"]) @@ -387,6 +392,7 @@ def parse_script(doc: bs4.BeautifulSoup, only: str = None) -> list: continue elif not domain_helper.is_wanted(peer): logger.debug("peer='%s' is not wanted - SKIPPED!", peer) + continue elif peer in peers: logger.debug("peer='%s' already added - SKIPPED!", peer) continue -- 2.39.5