From 2ab5b8a8ff7cc367595f9377347fcafc91c0a0cb Mon Sep 17 00:00:00 2001 From: =?utf8?q?Roland=20H=C3=A4der?= Date: Wed, 22 Nov 2023 23:04:51 +0100 Subject: [PATCH] Continued: - improved/added some debug lines --- fba/http/federation.py | 20 +++++++++++--------- fba/networks/lemmy.py | 11 ++++++----- 2 files changed, 17 insertions(+), 14 deletions(-) diff --git a/fba/http/federation.py b/fba/http/federation.py index b48255d..49eee69 100644 --- a/fba/http/federation.py +++ b/fba/http/federation.py @@ -110,7 +110,6 @@ def fetch_instances(domain: str, origin: str, software: str, command: str, path: logger.debug("peerlist[]='%s'", type(peerlist)) if peerlist is None: logger.warning("Cannot fetch peers: domain='%s',software='%s'", domain, software) - if instances.has_pending(domain): logger.debug("Flushing updates for domain='%s' ...", domain) instances.update(domain) @@ -120,7 +119,6 @@ def fetch_instances(domain: str, origin: str, software: str, command: str, path: return elif len(peerlist) == 0: logger.info("domain='%s' returned an empty peer list.", domain) - if instances.has_pending(domain): logger.debug("Flushing updates for domain='%s' ...", domain) instances.update(domain) @@ -131,7 +129,7 @@ def fetch_instances(domain: str, origin: str, software: str, command: str, path: logger.info("Checking %d instance(s) from domain='%s',software='%s',depth=%d ...", len(peerlist), domain, software, _DEPTH) for instance in peerlist: - logger.debug("instance='%s'", instance) + logger.debug("instance[%s]='%s'", type(instance), instance) if instance is None or instance == "": logger.debug("instance[%s]='%s' is either None or empty - SKIPPED!", type(instance), instance) continue @@ -295,8 +293,8 @@ def fetch_generator_from_path(domain: str, path: str = "/") -> str: if isinstance(platform, bs4.element.Tag) and isinstance(platform.get("content"), str) and platform.get("content") != "": logger.debug("Found property=og:platform, domain='%s'", domain) software = tidyup.domain(platform.get("content")) + logger.debug("software[%s]='%s' after tidyup.domain() ...", type(software), software) - logger.debug("software[%s]='%s'", type(software), software) if software is not None and software != "": logger.debug("domain='%s' has og:platform='%s' - Setting detection_mode=PLATFORM ...", domain, software) instances.set_detection_mode(domain, "PLATFORM") @@ -354,7 +352,7 @@ def fetch_generator_from_path(domain: str, path: str = "/") -> str: logger.debug("software='%s' may contain a version number, domain='%s', removing it ...", software, domain) software = version.remove(software) - logger.debug("software[]='%s'", type(software)) + logger.debug("software[%s]='%s'", type(software), software) if isinstance(software, str) and "powered by " in software: logger.debug("software='%s' has 'powered by' in it", software) software = version.remove(software_helper.strip_powered_by(software)) @@ -368,7 +366,7 @@ def fetch_generator_from_path(domain: str, path: str = "/") -> str: logger.debug("software='%s' has ' see ' in it", software) software = software_helper.strip_until(software, " see ") - logger.debug("software='%s' - EXIT!", software) + logger.debug("software[%s]='%s' - EXIT!", type(software), software) return software def determine_software(domain: str, path: str = None) -> str: @@ -455,7 +453,7 @@ def determine_software(domain: str, path: str = None) -> str: software = software.strip() - logger.debug("software='%s' - EXIT!", software) + logger.debug("software[%s]='%s' - EXIT!", type(software), software) return software def find_domains(tag: bs4.element.Tag) -> list: @@ -481,7 +479,7 @@ def find_domains(tag: bs4.element.Tag) -> list: logger.debug("domain='%s' is blacklisted - SKIPPED!", domain) continue elif domain == "gab.com/.ai, develop.gab.com": - logger.debug("Multiple domains detected in one row") + logger.debug("Multiple gab.com domains detected in one row") domains.append({ "domain": "gab.com", "reason": reason, @@ -515,7 +513,7 @@ def add_peers(rows: dict) -> list: peers = list() for key in ["linked", "allowed", "blocked"]: - logger.debug("Checking key='%s'", key) + logger.debug("key='%s'", key) if key not in rows or rows[key] is None: logger.debug("Cannot find key='%s' or it is NoneType - SKIPPED!", key) continue @@ -584,10 +582,14 @@ def fetch_blocks(domain: str) -> list: if "error_message" in data: logger.debug("Was not able to fetch domain_blocks from domain='%s': status_code=%d,error_message='%s'", domain, data['status_code'], data['error_message']) instances.set_last_error(domain, data) + + logger.debug("blocklist()=%d - EXIT!", len(blocklist)) return blocklist elif "json" in data and "error" in data["json"]: logger.warning("JSON API returned error message: '%s'", data["json"]["error"]) instances.set_last_error(domain, data) + + logger.debug("blocklist()=%d - EXIT!", len(blocklist)) return blocklist else: # Getting blocklist diff --git a/fba/networks/lemmy.py b/fba/networks/lemmy.py index bfe1e80..52b2195 100644 --- a/fba/networks/lemmy.py +++ b/fba/networks/lemmy.py @@ -101,18 +101,20 @@ def fetch_peers(domain: str, origin: str) -> list: logger.debug("data[]='%s'", type(data)) if "error_message" in data: - logger.warning("Could not reach any JSON API: domain='%s'", domain) + logger.warning("Could not reach any JSON API: domain='%s',error_message='%s'", domain, data["error_message"]) instances.set_last_error(domain, data) elif "federated_instances" in data["json"] and isinstance(data["json"]["federated_instances"], dict): logger.debug("Found federated_instances for domain='%s'", domain) peers = peers + federation.add_peers(data["json"]["federated_instances"]) + logger.debug("peers()=%d after adding", len(peers)) logger.debug("Marking domain='%s' as successfully handled ...", domain) instances.set_success(domain) if len(peers) == 0: - logger.warning("Fetching instances for domain='%s' from /instances ...", domain) + logger.debug("Fetching instances for domain='%s' from /instances ...", domain) peers = fetch_instances(domain, origin) + logger.debug("peers()=%d after fetch_instances(%s, %s)", len(peers), domain, origin) except network.exceptions as exception: logger.warning("Exception during fetching JSON: domain='%s',exception[%s]:'%s'", domain, type(exception), str(exception)) @@ -145,13 +147,12 @@ def fetch_blocks(domain: str) -> list: logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text)) if response.ok and response.status_code == 200 and response.text != "": logger.debug("Parsing %s Bytes ...", len(response.text)) - doc = bs4.BeautifulSoup(response.text, "html.parser") logger.debug("doc[]='%s'", type(doc)) found = None for criteria in [{"class": "home-instances container-lg"}, {"class": "container"}]: - logger.debug("criteria='%s'", criteria) + logger.debug("Trying to find criteria='%s' ...", criteria) containers = doc.findAll("div", criteria) logger.debug("Checking %d containers ...", len(containers)) @@ -274,8 +275,8 @@ def fetch_instances(domain: str, origin: str) -> list: for tag in rows: logger.debug("tag[]='%s'", type(tag)) text = tag.contents[0] if isinstance(tag.contents[0], str) else tag.contents[0].text - logger.debug("text='%s' - BEFORE!", text) + logger.debug("text='%s' - BEFORE!", text) peer = tidyup.domain(text) if text != "" else None logger.debug("peer='%s' - AFTER", peer) -- 2.39.5