From 028b7cc273d3eb727e4191f2aa1fa06c755a4170 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Roland=20H=C3=A4der?= Date: Wed, 21 Jun 2023 05:41:20 +0200 Subject: [PATCH] Continued: - also skip here to avoid exception --- fba/http/federation.py | 29 ++++++++++++++++++++++------- fba/networks/misskey.py | 13 +++++++++++-- 2 files changed, 33 insertions(+), 9 deletions(-) diff --git a/fba/http/federation.py b/fba/http/federation.py index 492d932..c61bea5 100644 --- a/fba/http/federation.py +++ b/fba/http/federation.py @@ -357,12 +357,18 @@ def fetch_wellknown_nodeinfo(domain: str) -> dict: url = f"https://{domain}{url}" components = urlparse(url) - if blacklist.is_blacklisted(components.netloc): - print(f"WARNING: components.netloc='{components.netloc}' is blacklisted - SKIPPED!") - continue - elif not validators.domain(components.netloc): + if not validators.domain(components.netloc): print(f"WARNING: components.netloc='{components.netloc}' is not a valid domain - SKIPPED!") continue + elif domain.endswith(".arpa"): + print(f"WARNING: domain='{domain}' is a domain for reversed IP addresses - SKIPPED!") + continue + elif domain.endswith(".tld"): + print(f"WARNING: domain='{domain}' is a fake domain - SKIPPED!") + continue + elif blacklist.is_blacklisted(components.netloc): + # DEBUG: print(f"DEBUG: components.netloc='{components.netloc}' is blacklisted - SKIPPED!") + continue # DEBUG: print("DEBUG: Fetching nodeinfo from:", url) data = network.fetch_api_url( @@ -585,8 +591,17 @@ def find_domains(tag: bs4.element.Tag) -> list: # DEBUG: print(f"DEBUG: domain='{domain}',reason='{reason}'") - if blacklist.is_blacklisted(domain): - print(f"WARNING: domain='{domain}' is blacklisted - SKIPPED!") + if not validators.domain(domain.split("/")[0]): + print(f"WARNING: domain='{domain}' is not a valid domain - SKIPPED!") + continue + elif domain.endswith(".arpa"): + print(f"WARNING: domain='{domain}' is a domain for reversed IP addresses - SKIPPED!") + continue + elif domain.endswith(".tld"): + print(f"WARNING: domain='{domain}' is a fake domain - SKIPPED!") + continue + elif blacklist.is_blacklisted(domain): + # DEBUG: print(f"DEBUG: domain='{domain}' is blacklisted - SKIPPED!") continue elif domain == "gab.com/.ai, develop.gab.com": # DEBUG: print("DEBUG: Multiple domains detected in one row") @@ -638,7 +653,7 @@ def add_peers(rows: dict) -> list: print(f"WARNING: peer='{peer}' is a fake domain - SKIPPED!") continue elif blacklist.is_blacklisted(peer): - print(f"WARNING: peer='{peer}' is blacklisted - SKIPPED!") + # DEBUG: print(f"DEBUG: peer='{peer}' is blacklisted - SKIPPED!") continue # DEBUG: print(f"DEBUG: Adding peer='{peer}' ...") diff --git a/fba/networks/misskey.py b/fba/networks/misskey.py index 2cede8b..f39c649 100644 --- a/fba/networks/misskey.py +++ b/fba/networks/misskey.py @@ -110,10 +110,19 @@ def fetch_peers(domain: str) -> list: print(f"WARNING: row()={len(row)} does not contain key 'host': {row},domain='{domain}'") continue elif not isinstance(row["host"], str): - print(f"WARNING: row[host][]='{type(row['host'])}' is not 'str'") + print(f"WARNING: row[host][]='{type(row['host'])}' is not 'str' - SKIPPED!") + continue + elif not validators.domain(row["host"].split("/")[0]): + print(f"WARNING: row[host]='{row['host']}' is not a valid domain - SKIPPED!") + continue + elif row["host"].endswith(".arpa"): + print(f"WARNING: row[host]='{row['host']}' is a domain for reversed IP addresses - SKIPPED!") + continue + elif row["host"].endswith(".tld"): + print(f"WARNING: row[host]='{row['host']}' is a fake domain - SKIPPED!") continue elif blacklist.is_blacklisted(row["host"]): - # DEBUG: print(f"DEBUG: row[host]='{row['host']}' is blacklisted. domain='{domain}'") + # DEBUG: print(f"DEBUG: row[host]='{row['host']}' is blacklisted. domain='{domain}' - SKIPPED!") continue elif row["host"] in peers: # DEBUG: print(f"DEBUG: Not adding row[host]='{row['host']}', already found.") -- 2.39.5