From 683a759783b9d0487a39fea503046830ea00e591 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Roland=20H=C3=A4der?= Date: Fri, 8 Sep 2023 18:50:17 +0200 Subject: [PATCH] Continued: - more checks on empty/None domain names --- fba/commands.py | 80 +++++++++++++++++++++++++++---------------------- 1 file changed, 44 insertions(+), 36 deletions(-) diff --git a/fba/commands.py b/fba/commands.py index cb3200c..52401ff 100644 --- a/fba/commands.py +++ b/fba/commands.py @@ -534,10 +534,11 @@ def fetch_observer(args: argparse.Namespace) -> int: for item in items: logger.debug("item[]='%s'", type(item)) domain = item.decode_contents() + domain = tidyup.domain(domain) if domain != None and domain != "" else None logger.debug("domain='%s' - AFTER!", domain) - if domain == "": - logger.debug("domain is empty - SKIPPED!") + if domain is None or domain == "": + logger.debug("domain='%s' is empty after tidyup.domain() - SKIPPED!", domain) continue logger.debug("domain='%s' - BEFORE!", domain) @@ -788,11 +789,12 @@ def fetch_fba_rss(args: argparse.Namespace) -> int: logger.debug("rss[]='%s'", type(rss)) for item in rss.items: logger.debug("item[%s]='%s'", type(item), item) - domain = tidyup.domain(item.link.split("=")[1]) + domain = item.link.split("=")[1] + domain = tidyup.domain(domain) if domain != None and domain != "" else None logger.debug("domain='%s' - AFTER!", domain) - if domain == "": - logger.debug("domain is empty - SKIPPED!") + if domain is None or domain == "": + logger.debug("domain='%s' is empty after tidyup.domain() - SKIPPED!", domain) continue logger.debug("domain='%s' - BEFORE!", domain) @@ -872,11 +874,11 @@ def fetch_fbabot_atom(args: argparse.Namespace) -> int: logger.debug("element[]='%s'", type(element)) for href in element["href"].split(","): logger.debug("href[%s]='%s' - BEFORE!", type(href), href) - domain = tidyup.domain(href) + domain = tidyup.domain(href) if href != None and href != "" else None logger.debug("domain='%s' - AFTER!", domain) - if domain == "": - logger.debug("domain is empty - SKIPPED!") + if domain is None or domain == "": + logger.debug("domain='%s' is empty after tidyup.domain() - SKIPPED!", domain) continue logger.debug("domain='%s' - BEFORE!", domain) @@ -1068,8 +1070,8 @@ def fetch_oliphant(args: argparse.Namespace) -> int: cnt = cnt + 1 logger.debug("domain='%s',severity='%s',reject_media='%s',reject_reports='%s'", domain, severity, reject_media, reject_reports) - if domain == "": - logger.debug("domain is empty - SKIPPED!") + if domain is None or domain == "": + logger.debug("domain='%s' is empty - SKIPPED!", domain) continue elif domain.endswith(".onion"): logger.debug("domain='%s' is a TOR .onion domain - SKIPPED", domain) @@ -1164,7 +1166,7 @@ def fetch_txt(args: argparse.Namespace) -> int: logger.debug("domain='%s' - AFTER!", domain) if domain is None or domain == "": - logger.debug("domain='%s' is empty - SKIPPED!", domain) + logger.debug("domain='%s' is empty after tidyup.domain() - SKIPPED!", domain) continue elif not domain_helper.is_wanted(domain): logger.debug("domain='%s' is not wanted - SKIPPED!", domain) @@ -1216,11 +1218,11 @@ def fetch_fedipact(args: argparse.Namespace) -> int: logger.info("Checking %d row(s) ...", len(rows)) for row in rows: logger.debug("row[]='%s'", type(row)) - domain = tidyup.domain(row.contents[0]) + domain = tidyup.domain(row.contents[0]) if row.contents[0] != None and row.contents[0] != "" else None logger.debug("domain='%s' - AFTER!", domain) - if domain == "": - logger.debug("domain is empty - SKIPPED!") + if domain is None or domain == "": + logger.debug("domain='%s' is empty after tidyup.domain() - SKIPPED!", domain) continue logger.debug("domain='%s' - BEFORE!", domain) @@ -1697,11 +1699,11 @@ def fetch_fedilist(args: argparse.Namespace) -> int: continue logger.debug("row[hostname]='%s' - BEFORE!", row["hostname"]) - domain = tidyup.domain(row["hostname"]) + domain = tidyup.domain(row["hostname"]) if row["hostname"] != None and row["hostname"] != "" else None logger.debug("domain='%s' - AFTER!", domain) - if domain == "": - logger.debug("domain is empty after tidyup: row[hostname]='%s' - SKIPPED!", row["hostname"]) + if domain is None or domain == "": + logger.debug("domain='%s' is empty after tidyup.domain(): row[hostname]='%s' - SKIPPED!", domain, row["hostname"]) continue logger.debug("domain='%s' - BEFORE!", domain) @@ -1834,11 +1836,11 @@ def fetch_instances_social(args: argparse.Namespace) -> int: logger.info("Checking %d row(s) ...", len(rows)) for row in rows: logger.debug("row[]='%s'", type(row)) - domain = tidyup.domain(row["name"]) + domain = tidyup.domain(row["name"]) if row["name"] != None and row["name"] != "" else None logger.debug("domain='%s' - AFTER!", domain) - if domain == "": - logger.debug("domain is empty - SKIPPED!") + if domain is None and domain == "": + logger.debug("domain='%s' is empty after tidyup.domain() - SKIPPED!", domain) continue logger.debug("domain='%s' - BEFORE!", domain) @@ -1962,11 +1964,11 @@ def fetch_relays(args: argparse.Namespace) -> int: continue logger.debug("domain='%s' - BEFORE!", domain) - domain = tidyup.domain(domain) + domain = tidyup.domain(domain) if domain != None and domain != "" else None logger.debug("domain='%s' - AFTER!", domain) - if domain == "": - logger.debug("Empty domain after tidyup.domain() from origin='%s' - SKIPPED!", row["domain"]) + if domain is None or domain == "": + logger.debug("domain='%s' is empty after tidyup.domain() from origin='%s' - SKIPPED!", domain, row["domain"]) continue elif domain not in peers: logger.debug("Appending domain='%s' to peers list for relay='%s' ...", domain, row["domain"]) @@ -1995,18 +1997,21 @@ def fetch_relays(args: argparse.Namespace) -> int: link = tag.find("a") logger.debug("link[%s]='%s'", type(link), link) if link is None: - logger.warning("tag='%s' has no a-tag ...", tag) + logger.warning("tag='%s' has no a-tag - SKIPPED!", tag) + continue + elif "href" not in link: + logger.warning("link()=%d has no key 'href' - SKIPPED!", len(link)) continue components = urlparse(link["href"]) domain = components.netloc.lower() logger.debug("domain='%s' - BEFORE!", domain) - domain = tidyup.domain(domain) + domain = tidyup.domain(domain) if domain != None and domain != "" else None logger.debug("domain='%s' - AFTER!", domain) - if domain == "": - logger.debug("Empty domain after tidyup.domain() from origin='%s' - SKIPPED!", row["domain"]) + if domain is None or domain == "": + logger.debug("domain='%s' is empty after tidyup.domain() from origin='%s' - SKIPPED!", domain, row["domain"]) continue elif domain not in peers: logger.debug("Appending domain='%s' to peers list for relay='%s' ...", domain, row["domain"]) @@ -2015,9 +2020,6 @@ def fetch_relays(args: argparse.Namespace) -> int: if dict_helper.has_key(domains, "domain", domain): logger.debug("domain='%s' already added", domain) continue - elif not domain_helper.is_wanted(domain): - logger.debug("domain='%s' is not wanted - SKIPPED!", domain) - continue logger.debug("Appending domain='%s',origin='%s',software='%s'", domain, row["domain"], row["software"]) domains.append({ @@ -2027,17 +2029,20 @@ def fetch_relays(args: argparse.Namespace) -> int: elif row["software"] == "pub-relay": logger.debug("Checking %d peer(s) row[domain]='%s' ...", len(raw["json"]["metadata"]["peers"]), row["domain"]) for domain in raw["json"]["metadata"]["peers"]: - logger.debug("domain='%s'", domain) - if domain not in peers: + logger.debug("domain='%s' - BEFORE!", domain) + domain = tidyup.domain(domain) if domain != None and domain != "" else None + logger.debug("domain='%s' - AFTER!", domain) + + if domain is None or domain == "": + logger.debug("domain='%s' is empty after tidyup.domain() from origin='%s' - SKIPPED!", domain, row["domain"]) + continue + elif domain not in peers: logger.debug("Appending domain='%s' to peers list for relay='%s' ...", domain, row["domain"]) peers.append(domain) if dict_helper.has_key(domains, "domain", domain): logger.debug("domain='%s' already added", domain) continue - elif not domain_helper.is_wanted(domain): - logger.debug("domain='%s' is not wanted - SKIPPED!", domain) - continue logger.debug("Appending domain='%s',origin='%s',software='%s'", domain, row["domain"], row["software"]) domains.append({ @@ -2060,7 +2065,10 @@ def fetch_relays(args: argparse.Namespace) -> int: logger.info("Checking %d domains ...", len(domains)) for row in domains: logger.debug("row[domain]='%s',row[origin]='%s'", row["domain"], row["origin"]) - if instances.is_registered(row["domain"]): + if not domain_helper.is_wanted(row["domain"]): + logger.debug("row[domain]='%s' is not wanted - SKIPPED!", row["domain"]) + continue + elif instances.is_registered(row["domain"]): logger.debug("row[domain]='%s' is already registered - SKIPPED!", row["domain"]) continue -- 2.39.5