From dfa5901bb6dfd97ff000b0808161e4cd1cd4a35c Mon Sep 17 00:00:00 2001 From: =?utf8?q?Roland=20H=C3=A4der?= Date: Mon, 1 Jul 2024 23:58:25 +0200 Subject: [PATCH] Continued: - stop question marks in domain.raise_on() function - split off question mark - added space for better readability - fixed syntax error --- fba/commands.py | 2 +- fba/helpers/domain.py | 2 ++ fba/http/federation.py | 5 +++-- fba/models/instances.py | 1 - 4 files changed, 6 insertions(+), 4 deletions(-) diff --git a/fba/commands.py b/fba/commands.py index 05cdd7e..a514c79 100644 --- a/fba/commands.py +++ b/fba/commands.py @@ -1009,7 +1009,7 @@ def fetch_instances(args: argparse.Namespace) -> int: elif row["software"] is not None and software_helper.is_relay(row["software"]) and instances.is_registered(row["domain"]): logger.warning("row[domain]='%s' is of software type '%s' which is not supported by this command. Please invoke fetch_relays instead - SKIPPED!", row["domain"], row["software"]) continue - elif not args.force and not args.software in [None, ""]and instances.is_recent(row["domain"]): + elif not args.force and not args.software in [None, ""] and instances.is_recent(row["domain"]): logger.debug("row[domain]='%s' has recently been crawled - SKIPPED!", row["domain"]) continue diff --git a/fba/helpers/domain.py b/fba/helpers/domain.py index df4c703..fcf672a 100644 --- a/fba/helpers/domain.py +++ b/fba/helpers/domain.py @@ -38,6 +38,8 @@ def raise_on(domain: str): raise ValueError("Parameter 'domain' is empty") elif domain.lower() != domain: raise ValueError(f"Parameter domain='{domain}' must be all lower-case") + elif "?" in domain: + raise ValueError(f"Parameter domain='{domain}' contains a question-mark") elif not validators.domain(domain.split("/")[0]): raise ValueError(f"domain='{domain}' is not a valid domain") elif domain.endswith(".onion"): diff --git a/fba/http/federation.py b/fba/http/federation.py index eae8f8e..6fea3ea 100644 --- a/fba/http/federation.py +++ b/fba/http/federation.py @@ -162,6 +162,7 @@ def fetch_instances(domain: str, origin: str, software: str, command: str, path: logger.debug("instance='%s' - BEFORE!", instance) instance = instance.encode("idna").decode("utf-8") + instance = instance.split("?")[0] logger.debug("instance='%s' - AFTER!", instance) if not domain_helper.is_wanted(instance): @@ -295,8 +296,8 @@ def fetch_generator_from_path(domain: str, path: str = "/") -> str: ) logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text)) - response_url = response.url.split("#")[0], response_url) - logger.debug("response_url='%s'" + response_url = response.url.split("#")[0] + logger.debug("response_url='%s'", response_url) if ((response.ok and response.status_code == 200) or response.status_code == 410) and response.text.find("= 0 and validators.url(response_url) and domain_helper.is_in_url(domain, response_url): logger.debug("Parsing response.text()=%d Bytes ...", len(response.text)) diff --git a/fba/models/instances.py b/fba/models/instances.py index 325be37..f75bd6b 100644 --- a/fba/models/instances.py +++ b/fba/models/instances.py @@ -538,7 +538,6 @@ def set_original_software(domain: str, software: str): _set_data("original_software", domain, software) logger.debug("EXIT!") - def set_software(domain: str, software: str): logger.debug("domain='%s',software='%s' - CALLED!", domain, software) domain_helper.raise_on(domain) -- 2.39.5