From 19da10cefc258417653ae411ab527d13c4e53b51 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Roland=20H=C3=A4der?= Date: Wed, 22 Nov 2023 23:30:42 +0100 Subject: [PATCH] Continued: - you can now optionally allow I2P domains being crawled (default: forbidden = clear-net) --- config.defaults.json | 1 + fba/commands.py | 12 +++++++++--- fba/helpers/domain.py | 11 +++++++++-- fba/helpers/processing.py | 3 +++ 4 files changed, 22 insertions(+), 5 deletions(-) diff --git a/config.defaults.json b/config.defaults.json index 49c4742..8b72846 100644 --- a/config.defaults.json +++ b/config.defaults.json @@ -26,6 +26,7 @@ "rss_limit" : 50, "api_limit" : 500, "theme" : "light", + "allow_i2p_domain" : "false", "instances_social_api_key": "", "max_crawl_depth" : 2000, "min_peers_length" : 1000 diff --git a/fba/commands.py b/fba/commands.py index 4b41ac4..7fc9836 100644 --- a/fba/commands.py +++ b/fba/commands.py @@ -371,6 +371,9 @@ def fetch_blocks(args: argparse.Namespace) -> int: elif block["blocked"].endswith(".onion"): logger.debug("blocked='%s' is a TOR .onion domain - SKIPPED", block["blocked"]) continue + elif block["blocked"].endswith(".i2p") and config.get("allow_i2p_domain"): + logger.debug("blocked='%s' is an I2P .onion domain - SKIPPED", block["blocked"]) + continue elif block["blocked"].endswith(".arpa"): logger.debug("blocked='%s' is a reverse IP address - SKIPPED", block["blocked"]) continue @@ -1315,15 +1318,18 @@ def recheck_obfuscation(args: argparse.Namespace) -> int: if block["blocked"] == "": logger.debug("block[blocked] is empty - SKIPPED!") continue + elif block["blocked"].endswith(".onion"): + logger.debug("blocked='%s' is a TOR onion domain name - SKIPPED!", block["blocked"]) + continue + elif block["blocked"].endswith(".i2p") and config.get("allow_i2p_domain"): + logger.debug("blocked='%s' is an I2P onion domain name - SKIPPED!", block["blocked"]) + continue elif block["blocked"].endswith(".arpa"): logger.debug("blocked='%s' is a reversed IP address - SKIPPED!", block["blocked"]) continue elif block["blocked"].endswith(".tld"): logger.debug("blocked='%s' is a fake domain name - SKIPPED!", block["blocked"]) continue - elif block["blocked"].endswith(".onion"): - logger.debug("blocked='%s' is a TOR onion domain name - SKIPPED!", block["blocked"]) - continue elif block["blocked"].find("*") >= 0 or block["blocked"].find("?") >= 0: logger.debug("block='%s' is obfuscated.", block["blocked"]) obfuscated = obfuscated + 1 diff --git a/fba/helpers/domain.py b/fba/helpers/domain.py index 84c9bcb..d006cb1 100644 --- a/fba/helpers/domain.py +++ b/fba/helpers/domain.py @@ -21,6 +21,7 @@ from urllib.parse import urlparse import validators from fba.helpers import blacklist +from fba.helpers import config from fba.models import instances @@ -38,10 +39,12 @@ def raise_on(domain: str): raise ValueError(f"Parameter domain='{domain}' must be all lower-case") elif not validators.domain(domain.split("/")[0]): raise ValueError(f"domain='{domain}' is not a valid domain") - elif domain.endswith(".arpa"): - raise ValueError(f"domain='{domain}' is a domain for reversed IP addresses, please don't crawl them!") elif domain.endswith(".onion"): raise ValueError(f"domain='{domain}' is a TOR, please don't crawl them!") + elif domain.endswith(".i2p") and config.get("allow_i2p_domain"): + raise ValueError(f"domain='{domain}' is an I2P, please don't crawl them!") + elif domain.endswith(".arpa"): + raise ValueError(f"domain='{domain}' is a domain for reversed IP addresses, please don't crawl them!") elif domain.endswith(".tld"): raise ValueError(f"domain='{domain}' is a fake domain, please don't crawl them!") @@ -76,6 +79,7 @@ def is_wanted(domain: str) -> bool: wanted = True if domain.lower() != domain: + logger.debug("domain='%s' is not all-lowercase - setting False ...", domain) wanted = False elif not validators.domain(domain.split("/")[0]): logger.debug("domain='%s' is not a valid domain name - setting False ...", domain) @@ -86,6 +90,9 @@ def is_wanted(domain: str) -> bool: elif domain.endswith(".onion"): logger.debug("domain='%s' is a TOR .onion domain - setting False ...", domain) wanted = False + elif domain.endswith(".i2p") and config.get("allow_i2p_domain"): + logger.debug("domain='%s' is an I2P domain - setting False ...", domain) + wanted = False elif domain.endswith(".tld"): logger.debug("domain='%s' is a fake domain - setting False ...", domain) wanted = False diff --git a/fba/helpers/processing.py b/fba/helpers/processing.py index 00f6ae0..dfb6388 100644 --- a/fba/helpers/processing.py +++ b/fba/helpers/processing.py @@ -199,6 +199,9 @@ def csv_block(blocker: str, url: str, command: str): elif domain.endswith(".onion"): logger.debug("domain='%s' is a TOR .onion domain - SKIPPED", domain) continue + elif domain.endswith(".i2p") and config.get("allow_i2p_domain"): + logger.debug("domain='%s' is an I2P .onion domain - SKIPPED", domain) + continue elif domain.endswith(".arpa"): logger.debug("domain='%s' is a reverse IP address - SKIPPED", domain) continue -- 2.39.5