From: Roland Häder Date: Wed, 10 Jul 2024 18:54:25 +0000 (+0200) Subject: Continued: X-Git-Url: https://git.mxchange.org/?a=commitdiff_plain;h=fb238f8c0d848124b7ef3943e5d474ede7becf2e;p=fba.git Continued: - allowed underscore in domains (RFC 2782) - have to check domain validity before domain_helpers.encode_idna() is being invoked --- diff --git a/fba/commands.py b/fba/commands.py index c64c713..30673d1 100644 --- a/fba/commands.py +++ b/fba/commands.py @@ -65,7 +65,7 @@ def check_instance(args: argparse.Namespace) -> int: logger.debug("args.domain='%s' - CALLED!", args.domain) status = 0 - if not validators.domain(args.domain): + if not validators.domain(args.domain, rfc_2782=True): logger.warning("args.domain='%s' is not valid", args.domain) status = 100 elif blacklist.is_blacklisted(args.domain): @@ -270,7 +270,7 @@ def fetch_blocks(args: argparse.Namespace) -> int: logger.debug("args[]='%s' - CALLED!", type(args)) if args.domain is not None and args.domain != "": logger.debug("args.domain='%s' - checking ...", args.domain) - if not validators.domain(args.domain): + if not validators.domain(args.domain, rfc_2782=True): logger.warning("args.domain='%s' is not valid.", args.domain) return 100 elif blacklist.is_blacklisted(args.domain): @@ -970,7 +970,7 @@ def fetch_instances(args: argparse.Namespace) -> int: # Is domain or software set? if args.domain not in [None, ""]: logger.debug("args.domain='%s' - checking ...", args.domain) - if not validators.domain(args.domain): + if not validators.domain(args.domain, rfc_2782=True): logger.warning("args.domain='%s' is not valid.", args.domain) return 100 elif blacklist.is_blacklisted(args.domain): @@ -1322,7 +1322,7 @@ def recheck_obfuscation(args: argparse.Namespace) -> int: if isinstance(args.domain, str) and args.domain != "" and domain_helper.is_wanted(args.domain): logger.debug("Fetching record for args.domain='%s' ...", args.domain) database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE (has_obfuscation = 1 OR has_obfuscation IS NULL) AND domain = ?", [args.domain]) - elif isinstance(args.software, str) and args.software != "" and validators.domain(args.software) == args.software: + elif isinstance(args.software, str) and args.software != "" and validators.domain(args.software, rfc_2782=True) == args.software: logger.debug("Fetching records for args.software='%s' ...", args.software) database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE (has_obfuscation = 1 OR has_obfuscation IS NULL) AND software = ?", [args.software]) else: @@ -2034,7 +2034,7 @@ def remove_invalid(args: argparse.Namespace) -> int: logger.info("Checking %d domains ...", len(rows)) for row in rows: logger.debug("row[domain]='%s'", row["domain"]) - if not validators.domain(row["domain"].split("/")[0]): + if not validators.domain(row["domain"].split("/")[0], rfc_2782=True): logger.info("Invalid row[domain]='%s' found, removing ...", row["domain"]) database.cursor.execute("DELETE FROM blocks WHERE blocker = ? OR blocked = ?", [row["domain"], row["domain"]]) database.cursor.execute("DELETE FROM instances WHERE domain = ? LIMIT 1", [row["domain"]]) diff --git a/fba/helpers/domain.py b/fba/helpers/domain.py index b3b6f51..f056047 100644 --- a/fba/helpers/domain.py +++ b/fba/helpers/domain.py @@ -41,7 +41,7 @@ def raise_on(domain: str): raise ValueError(f"Parameter domain='{domain}' must be all lower-case") elif "?" in domain: raise ValueError(f"Parameter domain='{domain}' contains a question-mark") - elif not validators.domain(domain.split("/")[0]): + elif not validators.domain(domain.split("/")[0], rfc_2782=True): raise ValueError(f"domain='{domain}' is not a valid domain") elif domain.endswith(".onion"): raise ValueError(f"domain='{domain}' is a TOR, please don't crawl them!") @@ -92,7 +92,7 @@ def is_wanted(domain: str) -> bool: if domain.lower() != domain: logger.debug("domain='%s' is not all-lowercase - setting False ...", domain) wanted = False - elif not validators.domain(domain.split("/")[0]): + elif not validators.domain(domain.split("/")[0], rfc_2782=True): logger.debug("domain='%s' is not a valid domain name - setting False ...", domain) wanted = False elif domain.endswith(".arpa"): diff --git a/fba/helpers/processing.py b/fba/helpers/processing.py index 6c62d90..01c66d4 100644 --- a/fba/helpers/processing.py +++ b/fba/helpers/processing.py @@ -226,7 +226,7 @@ def csv_block(blocker: str, url: str, command: str): logger.debug("Marking domain='%s' as handled", domain) domains.append(domain) - if not validators.domain(domain): + if not validators.domain(domain, rfc_2782=True): logger.debug("domain='%s' is not a valid domain - SKIPPED!") continue elif blacklist.is_blacklisted(domain): diff --git a/fba/http/federation.py b/fba/http/federation.py index 8b72de2..8b22ccf 100644 --- a/fba/http/federation.py +++ b/fba/http/federation.py @@ -171,6 +171,11 @@ def fetch_instances(domain: str, origin: str, software: str, command: str, path: logger.warning("instance='%s' contains double-dot, removing ...", instance) instance = instance.replace("..", ".") + logger.debug("instance='%s'", instance) + if not validators.domain(instance, rfc_2782=True): + logger.warning("instance='%s' is not a valid domain - SKIPPED!", instance) + continue + logger.debug("instance='%s' - BEFORE!", instance) instance = domain_helper.encode_idna(instance) logger.debug("instance='%s' - AFTER!", instance) @@ -537,7 +542,7 @@ def find_domains(tag: bs4.element.Tag) -> list: "reason": reason, }) continue - elif not validators.domain(domain.split("/")[0]): + elif not validators.domain(domain.split("/")[0], rfc_2782=True): logger.warning("domain='%s' is not a valid domain - SKIPPED!", domain) continue diff --git a/fba/models/instances.py b/fba/models/instances.py index 9e5cc8d..b25cd3e 100644 --- a/fba/models/instances.py +++ b/fba/models/instances.py @@ -194,7 +194,7 @@ def add(domain: str, origin: str, command: str, path: str = None, software: str raise ValueError(f"software[]='{type(software)}' is not of type 'str'") elif software == "": raise ValueError("Parameter 'software' is empty") - elif origin is not None and not validators.domain(origin.split("/")[0]): + elif origin is not None and not validators.domain(origin.split("/")[0], rfc_2782=True): raise ValueError(f"Bad origin name='{origin}'") elif blacklist.is_blacklisted(domain): raise Exception(f"domain='{domain}' is blacklisted, but function invoked") diff --git a/fba/networks/pleroma.py b/fba/networks/pleroma.py index 08a1b20..07d2a2e 100644 --- a/fba/networks/pleroma.py +++ b/fba/networks/pleroma.py @@ -128,7 +128,7 @@ def fetch_blocks(domain: str) -> list: if blocked in [None, ""]: logger.warning("blocked[%s]='%s' is empty after tidyup.domain(): domain='%s',block_level='%s' - SKIPPED!", type(blocked), blocked, domain, block_level) continue - elif validators.domain(blocked) and blacklist.is_blacklisted(blocked): + elif validators.domain(blocked, rfc_2782=True) and blacklist.is_blacklisted(blocked): logger.debug("blocked='%s' is blacklisted - SKIPPED!") continue diff --git a/fba/utils.py b/fba/utils.py index 747da2b..d56110d 100644 --- a/fba/utils.py +++ b/fba/utils.py @@ -130,7 +130,7 @@ def deobfuscate(domain: str, blocker: str, domain_hash: str = None) -> str: logger.debug("domain='%s',blocker='%s',domain_hash='%s' - CALLED!", domain, blocker, domain_hash) domain_helper.raise_on(blocker) - if validators.domain(domain) and blacklist.is_blacklisted(domain): + if validators.domain(domain, rfc_2782=True) and blacklist.is_blacklisted(domain): raise ValueError(f"domain='{domain}' is blacklisted but function was invoked") elif not isinstance(domain_hash, str) and domain_hash is not None: raise ValueError(f"Parameter domain_hash[]='{type(domain_hash)}' is not of type 'str'")