From dc1544a3eefea096037fbf041f53fcc1814a7b74 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Roland=20H=C3=A4der?= Date: Sat, 13 Jul 2024 16:18:34 +0200 Subject: [PATCH] Continued: - skip invalid/unwanted domains (like .ip2/tld/onion ...) - proper name logged --- fba/commands.py | 30 +++++++++++++++--------------- fba/helpers/processing.py | 8 ++++---- fba/http/federation.py | 10 +++++----- fba/models/instances.py | 18 ++++++++++++++++++ fba/networks/peertube.py | 4 ++-- 5 files changed, 44 insertions(+), 26 deletions(-) diff --git a/fba/commands.py b/fba/commands.py index 1e0ccb3..646a60e 100644 --- a/fba/commands.py +++ b/fba/commands.py @@ -374,16 +374,16 @@ def fetch_blocks(args: argparse.Namespace) -> int: logger.warning("block[blocked]='%s' is empty, blocker='%s'", block["blocked"], blocker) continue elif block["blocked"].endswith(".onion"): - logger.debug("blocked='%s' is a TOR .onion domain - SKIPPED", block["blocked"]) + logger.debug("block[blocked]='%s' is a TOR .onion domain - SKIPPED!", block["blocked"]) continue elif block["blocked"].endswith(".i2p") and not config.get("allow_i2p_domain"): - logger.debug("blocked='%s' is an I2P .onion domain - SKIPPED", block["blocked"]) + logger.debug("block[blocked]='%s' is an I2P .onion domain - SKIPPED!", block["blocked"]) continue elif block["blocked"].endswith(".arpa"): - logger.debug("blocked='%s' is a reverse IP address - SKIPPED", block["blocked"]) + logger.debug("block[blocked]='%s' is a reverse IP address - SKIPPED!", block["blocked"]) continue elif block["blocked"].endswith(".tld"): - logger.debug("blocked='%s' is a fake domain - SKIPPED", block["blocked"]) + logger.debug("block[blocked]='%s' is a fake domain - SKIPPED!", block["blocked"]) continue elif block["blocked"].find("*") >= 0: logger.debug("blocker='%s' uses obfuscated domains", blocker) @@ -395,7 +395,7 @@ def fetch_blocks(args: argparse.Namespace) -> int: logger.debug("row[]='%s'", type(row)) if row is None: - logger.warning("Cannot deobfuscate blocked='%s',blocker='%s',software='%s' - SKIPPED!", block["blocked"], blocker, software) + logger.warning("Cannot deobfuscate block[blocked]='%s',blocker='%s',software='%s' - SKIPPED!", block["blocked"], blocker, software) continue deobfuscated = deobfuscated + 1 @@ -412,7 +412,7 @@ def fetch_blocks(args: argparse.Namespace) -> int: logger.debug("row[]='%s'", type(row)) if row is None: - logger.warning("Cannot deobfuscate blocked='%s',blocker='%s',software='%s' - SKIPPED!", block["blocked"], blocker, software) + logger.warning("Cannot deobfuscate block[blocked]='%s',blocker='%s',software='%s' - SKIPPED!", block["blocked"], blocker, software) continue deobfuscated = deobfuscated + 1 @@ -420,7 +420,7 @@ def fetch_blocks(args: argparse.Namespace) -> int: origin = row["origin"] nodeinfo_url = row["nodeinfo_url"] - logger.debug("Looking up instance by domain, blocked='%s'", block["blocked"]) + logger.debug("Looking up instance by domain, block[blocked]='%s'", block["blocked"]) if block["blocked"] in [None, ""]: logger.debug("block[blocked]='%s' is empty - SKIPPED!", block["blocked"]) continue @@ -428,16 +428,16 @@ def fetch_blocks(args: argparse.Namespace) -> int: logger.warning("block[blocked]='%s' is not a valid domain - SKIPPED!", block["blocked"]) continue elif block["blocked"].endswith(".onion"): - logger.debug("block[blocked]'%s' is a TOR .onion domain - SKIPPED", block["blocked"]) + logger.debug("block[blocked]'%s' is a TOR .onion domain - SKIPPED!", block["blocked"]) continue elif block["blocked"].endswith(".i2p") and not config.get("allow_i2p_domain"): - logger.debug("block[blocked]'%s' is an I2P .onion domain - SKIPPED", block["blocked"]) + logger.debug("block[blocked]'%s' is an I2P .onion domain - SKIPPED!", block["blocked"]) continue elif block["blocked"].endswith(".arpa"): - logger.debug("block[blocked]'%s' is a reverse IP address - SKIPPED", block["blocked"]) + logger.debug("block[blocked]'%s' is a reverse IP address - SKIPPED!", block["blocked"]) continue elif block["blocked"].endswith(".tld"): - logger.debug("block[blocked]'%s' is a fake domain - SKIPPED", block["blocked"]) + logger.debug("block[blocked]'%s' is a fake domain - SKIPPED!", block["blocked"]) continue logger.debug("block[blocked]='%s' - BEFORE!", block["blocked"]) @@ -1589,16 +1589,16 @@ def update_nodeinfo(args: argparse.Namespace) -> int: for row in domains: logger.debug("row[]='%s'", type(row)) if row["domain"].endswith(".i2p") and not config.get("allow_i2p_domain"): - logger.debug("row[domain]='%s' is an I2P address - SKIPPED", row["domain"]) + logger.debug("row[domain]='%s' is an I2P address - SKIPPED!", row["domain"]) continue elif row["domain"].endswith(".onion"): - logger.debug("row[domain]='%s' is a TOR .onion domain - SKIPPED", row["domain"]) + logger.debug("row[domain]='%s' is a TOR .onion domain - SKIPPED!", row["domain"]) continue elif row["domain"].endswith(".arpa"): - logger.debug("row[domain]='%s' is a reverse IP address - SKIPPED", row["domain"]) + logger.debug("row[domain]='%s' is a reverse IP address - SKIPPED!", row["domain"]) continue elif row["domain"].endswith(".tld"): - logger.debug("row[domain]='%s' is a fake domain - SKIPPED", row["domain"]) + logger.debug("row[domain]='%s' is a fake domain - SKIPPED!", row["domain"]) continue elif blacklist.is_blacklisted(row["domain"]): logger.debug("row[domain]='%s' is blacklisted - SKIPPED!", row["domain"]) diff --git a/fba/helpers/processing.py b/fba/helpers/processing.py index b71aa92..8fd5d81 100644 --- a/fba/helpers/processing.py +++ b/fba/helpers/processing.py @@ -207,16 +207,16 @@ def csv_block(blocker: str, url: str, command: str): logger.debug("domain='%s' is empty - SKIPPED!", domain) continue elif domain.endswith(".onion"): - logger.debug("domain='%s' is a TOR .onion domain - SKIPPED", domain) + logger.debug("domain='%s' is a TOR .onion domain - SKIPPED!", domain) continue elif domain.endswith(".i2p") and not config.get("allow_i2p_domain"): - logger.debug("domain='%s' is an I2P .onion domain - SKIPPED", domain) + logger.debug("domain='%s' is an I2P .onion domain - SKIPPED!", domain) continue elif domain.endswith(".arpa"): - logger.debug("domain='%s' is a reverse IP address - SKIPPED", domain) + logger.debug("domain='%s' is a reverse IP address - SKIPPED!", domain) continue elif domain.endswith(".tld"): - logger.debug("domain='%s' is a fake domain - SKIPPED", domain) + logger.debug("domain='%s' is a fake domain - SKIPPED!", domain) continue elif domain.find("*") >= 0 or domain.find("?") >= 0: logger.debug("domain='%s' is obfuscated - Invoking utils.deobfuscate(%s, %s) ...", domain, domain, blocker) diff --git a/fba/http/federation.py b/fba/http/federation.py index 994a32b..d62aca4 100644 --- a/fba/http/federation.py +++ b/fba/http/federation.py @@ -176,16 +176,16 @@ def fetch_instances(domain: str, origin: str, software: str, command: str, path: logger.warning("instance='%s' is not a valid domain - SKIPPED!", instance) continue elif instance.endswith(".onion"): - logger.debug("instance='%s' is a TOR .onion instance - SKIPPED", instance) + logger.debug("instance='%s' is a TOR .onion instance - SKIPPED!", instance) continue elif instance.endswith(".i2p") and not config.get("allow_i2p_domain"): - logger.debug("instance='%s' is an I2P .onion instance - SKIPPED", instance) + logger.debug("instance='%s' is an I2P .onion instance - SKIPPED!", instance) continue elif instance.endswith(".arpa"): - logger.debug("instance='%s' is a reverse IP address - SKIPPED", instance) + logger.debug("instance='%s' is a reverse IP address - SKIPPED!", instance) continue elif instance.endswith(".tld"): - logger.debug("instance='%s' is a fake instance - SKIPPED", instance) + logger.debug("instance='%s' is a fake instance - SKIPPED!", instance) continue logger.debug("instance='%s' - BEFORE!", instance) @@ -588,7 +588,7 @@ def add_peers(rows: dict) -> list: for peer in rows[key]: logger.debug("peer[%s]='%s' - BEFORE!", type(peer), peer) if peer in [None, ""]: - logger.debug("peer is empty - SKIPPED") + logger.debug("peer is empty - SKIPPED!") continue elif isinstance(peer, dict) and "domain" in peer: logger.debug("peer[domain]='%s'", peer["domain"]) diff --git a/fba/models/instances.py b/fba/models/instances.py index b25cd3e..788f403 100644 --- a/fba/models/instances.py +++ b/fba/models/instances.py @@ -600,6 +600,24 @@ def translate_idnas(rows: list, column: str): logger.info("Checking/converting %d domain names ...", len(rows)) for row in rows: logger.debug("row[]='%s'", type(row)) + if row[column] in [None, ""]: + logger.warning("row[%s]='%s' is empty - SKIPPED!", column, row[column]) + continue + elif not validators.domain(row[column].split("/")[0], rfc_2782=True): + logger.warning("row[%s]='%s' is not valid domain - SKIPPED!", column, row[column]) + continue + elif row[column].endswith(".onion"): + logger.debug("row[%s]='%s' is a TOR .onion domain - SKIPPED!", column, row[column]) + continue + elif row[column].endswith(".i2p") and not config.get("allow_i2p_domain"): + logger.debug("row[%s]='%s' is an I2P .onion domain - SKIPPED!", column, row[column]) + continue + elif row[column].endswith(".arpa"): + logger.debug("row[%s]='%s' is a reverse IP address - SKIPPED!", column, row[column]) + continue + elif row[column].endswith(".tld"): + logger.debug("row[%s]='%s' is a fake domain - SKIPPED!", column, row[column]) + continue punycode = domain_helper.encode_idna(row[column]) logger.debug("punycode='%s',row[%s]='%s'", punycode, column, row[column]) diff --git a/fba/networks/peertube.py b/fba/networks/peertube.py index 733a5b6..6eddfec 100644 --- a/fba/networks/peertube.py +++ b/fba/networks/peertube.py @@ -81,10 +81,10 @@ def fetch_peers(domain: str) -> list: for mode2 in ["follower", "following"]: logger.debug("mode=%s,mode2='%s'", mode, mode2) if mode2 not in record: - logger.debug("Array record does not contain element mode2='%s' - SKIPPED", mode2) + logger.debug("Array record does not contain element mode2='%s' - SKIPPED!", mode2) continue elif "host" not in record[mode2]: - logger.debug("record[%s] does not contain element 'host' - SKIPPED", mode2) + logger.debug("record[%s] does not contain element 'host' - SKIPPED!", mode2) continue elif record[mode2]["host"] == domain: logger.debug("record[%s]='%s' matches domain='%s' - SKIPPED!", mode2, record[mode2]["host"], domain) -- 2.39.5