From: Roland Häder Date: Mon, 12 Jun 2023 14:59:56 +0000 (+0200) Subject: Continued: X-Git-Url: https://git.mxchange.org/?a=commitdiff_plain;h=7a7c9bb9b9b0b09db7600ebd2f5d08cb6889e05d;p=fba.git Continued: - better checking with .endswith() instead of .split()[-1] - don't crawl '.tld' domains --- diff --git a/fba/csrf.py b/fba/csrf.py index 4602c28..6efcf23 100644 --- a/fba/csrf.py +++ b/fba/csrf.py @@ -26,6 +26,8 @@ def determine(domain: str, headers: dict) -> dict: raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'") elif domain == "": raise ValueError("Parameter 'domain' is empty") + elif domain.endswith(".tld"): + raise ValueException(f"domain='{domain}' is a fake domain, please don't crawl them!") elif not isinstance(headers, dict): raise ValueError(f"Parameter headers[]='{type(headers)}' is not 'dict'") diff --git a/fba/fba.py b/fba/fba.py index 8eb5cd5..f627ffa 100644 --- a/fba/fba.py +++ b/fba/fba.py @@ -147,7 +147,7 @@ def process_domain(domain: str, blocker: str, command: str) -> bool: if not validators.domain(domain): print(f"WARNING: domain='{domain}' is not a valid domain - SKIPPED!") return False - elif domain.split(".")[-1] == "arpa": + elif domain.endswith(".arpa"): print(f"WARNING: domain='{domain}' is a reversed .arpa domain and should not be used generally.") return False elif blacklist.is_blacklisted(domain): diff --git a/fba/federation.py b/fba/federation.py index 045b0d2..371bf90 100644 --- a/fba/federation.py +++ b/fba/federation.py @@ -47,6 +47,8 @@ def fetch_instances(domain: str, origin: str, software: str, command: str, path: raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'") elif domain == "": raise ValueError("Parameter 'domain' is empty") + elif domain.endswith(".tld"): + raise ValueException(f"domain='{domain}' is a fake domain, please don't crawl them!") elif not isinstance(origin, str) and origin is not None: raise ValueError(f"Parameter origin[]='{type(origin)}' is not 'str'") elif software is None: @@ -62,8 +64,7 @@ def fetch_instances(domain: str, origin: str, software: str, command: str, path: raise ValueError(f"Parameter command[]='{type(command)}' is not 'str'") elif command == "": raise ValueError("Parameter 'command' is empty") - - if domain.split(".")[-1] == "arpa": + elif domain.endswith(".arpa"): print(f"WARNING: domain='{domain}' is a reversed .arpa domain and should not be used generally.") return elif not instances.is_registered(domain): @@ -97,7 +98,7 @@ def fetch_instances(domain: str, origin: str, software: str, command: str, path: elif not validators.domain(instance.split("/")[0]): print(f"WARNING: Bad instance='{instance}' from domain='{domain}',origin='{origin}',software='{software}'") continue - elif instance.split(".")[-1] == "arpa": + elif instance.endswith(".arpa"): print(f"WARNING: instance='{instance}' is a reversed .arpa domain and should not be used generally.") continue elif blacklist.is_blacklisted(instance): @@ -105,7 +106,7 @@ def fetch_instances(domain: str, origin: str, software: str, command: str, path: continue # DEBUG: print("DEBUG: Handling instance:", instance) - if instance.split(".")[-1] == "arpa": + if instance.endswith(".arpa"): print(f"WARNING: instance='{instance}' is a reversed .arpa domain and should not be used generally.") continue elif not instances.is_registered(instance): diff --git a/fba/instances.py b/fba/instances.py index 9884014..e8db229 100644 --- a/fba/instances.py +++ b/fba/instances.py @@ -190,8 +190,10 @@ def add(domain: str, origin: str, command: str, path: str = None): raise ValueError(f"command[]='{type(command)}' is not 'str'") elif command == "": raise ValueError("Parameter 'command' is empty") - elif not validators.domain(domain.split("/")[0]) or domain.split(".")[-1] == "arpa": + elif not validators.domain(domain.split("/")[0]): raise ValueError(f"Bad domain name='{domain}'") + elif domain.endswith(".arpa"): + raise ValueError(f"Please don't crawl .arpa domains: domain='{domain}'") elif origin is not None and not validators.domain(origin.split("/")[0]): raise ValueError(f"Bad origin name='{origin}'") elif blacklist.is_blacklisted(domain): diff --git a/fba/network.py b/fba/network.py index 93ea533..821c7ac 100644 --- a/fba/network.py +++ b/fba/network.py @@ -48,6 +48,8 @@ def post_json_api(domain: str, path: str, data: str, headers: dict = {}) -> dict raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'") elif domain == "": raise ValueError("Parameter 'domain' is empty") + elif domain.endswith(".tld"): + raise ValueException(f"domain='{domain}' is a fake domain, please don't crawl them!") elif not isinstance(path, str): raise ValueError(f"path[]='{type(path)}' is not 'str'") elif path == "": @@ -131,6 +133,8 @@ def get_json_api(domain: str, path: str, headers: dict, timeout: tuple) -> dict: raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'") elif domain == "": raise ValueError("Parameter 'domain' is empty") + elif domain.endswith(".tld"): + raise ValueException(f"domain='{domain}' is a fake domain, please don't crawl them!") elif not isinstance(path, str): raise ValueError(f"path[]='{type(path)}' is not 'str'") elif path == "": @@ -179,6 +183,8 @@ def send_bot_post(domain: str, blocklist: dict): raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'") elif domain == "": raise ValueError("Parameter 'domain' is empty") + elif domain.endswith(".tld"): + raise ValueException(f"domain='{domain}' is a fake domain, please don't crawl them!") elif not isinstance(blocklist, dict): raise ValueError(f"Parameter blocklist[]='{type(blocklist)}' is not 'dict'") @@ -224,6 +230,8 @@ def fetch_response(domain: str, path: str, headers: dict, timeout: tuple) -> req raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'") elif domain == "": raise ValueError("Parameter 'domain' is empty") + elif domain.endswith(".tld"): + raise ValueException(f"domain='{domain}' is a fake domain, please don't crawl them!") elif not isinstance(path, str): raise ValueError(f"Parameter path[]='{type(path)}' is not 'str'") elif path == "": diff --git a/fba/networks/mastodon.py b/fba/networks/mastodon.py index 03c5bd5..04a935b 100644 --- a/fba/networks/mastodon.py +++ b/fba/networks/mastodon.py @@ -258,7 +258,7 @@ def fetch_blocks(domain: str, origin: str, nodeinfo_url: str): if not validators.domain(blocked): print(f"WARNING: blocked='{blocked}',software='mastodon' is not a valid domain name - SKIPPED!") continue - elif blocked.split(".")[-1] == "arpa": + elif blocked.endswith(".arpa"): print(f"WARNING: blocked='{blocked}' is a reversed .arpa domain and should not be used generally.") continue elif not instances.is_registered(blocked): @@ -272,7 +272,7 @@ def fetch_blocks(domain: str, origin: str, nodeinfo_url: str): if not validators.domain(blocked): print(f"WARNING: blocked='{blocked}',software='mastodon' is not a valid domain name - SKIPPED!") continue - elif blocked.split(".")[-1] == "arpa": + elif blocked.endswith(".arpa"): print(f"WARNING: blocked='{blocked}' is a reversed .arpa domain and should not be used generally.") continue elif not instances.is_registered(blocked): diff --git a/fba/networks/pleroma.py b/fba/networks/pleroma.py index a216634..9b70245 100644 --- a/fba/networks/pleroma.py +++ b/fba/networks/pleroma.py @@ -113,7 +113,7 @@ def fetch_blocks(domain: str, origin: str, nodeinfo_url: str): if not validators.domain(blocked): print(f"WARNING: blocked='{blocked}',software='pleroma' is not a valid domain name - SKIPPED!") continue - elif blocked.split(".")[-1] == "arpa": + elif blocked.endswith(".arpa"): print(f"WARNING: blocked='{blocked}' is a reversed .arpa domain and should not be used generally.") continue elif not instances.is_registered(blocked): @@ -197,7 +197,7 @@ def fetch_blocks(domain: str, origin: str, nodeinfo_url: str): if not validators.domain(blocked): print(f"WARNING: blocked='{blocked}',software='pleroma' is not a valid domain name - SKIPPED!") continue - elif blocked.split(".")[-1] == "arpa": + elif blocked.endswith(".arpa"): print(f"WARNING: blocked='{blocked}' is a reversed .arpa domain and should not be used generally.") continue elif not instances.is_registered(blocked):