From d1aa7f3dfb18f62cea7881df31bfdd6029b89af6 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Roland=20H=C3=A4der?= Date: Thu, 23 Nov 2023 02:27:30 +0100 Subject: [PATCH] Continued: - check config key against "true" - improved logger messages - added some --- fba/commands.py | 4 +-- fba/helpers/cache.py | 9 ++++-- fba/helpers/domain.py | 4 +-- fba/helpers/locking.py | 1 + fba/helpers/processing.py | 2 +- fba/helpers/software.py | 1 + fba/http/csrf.py | 1 + fba/http/federation.py | 15 +++++---- fba/http/nodeinfo.py | 67 +++++++++++++++++++++------------------ fba/models/blocks.py | 2 ++ 10 files changed, 61 insertions(+), 45 deletions(-) diff --git a/fba/commands.py b/fba/commands.py index 7fc9836..7886d37 100644 --- a/fba/commands.py +++ b/fba/commands.py @@ -371,7 +371,7 @@ def fetch_blocks(args: argparse.Namespace) -> int: elif block["blocked"].endswith(".onion"): logger.debug("blocked='%s' is a TOR .onion domain - SKIPPED", block["blocked"]) continue - elif block["blocked"].endswith(".i2p") and config.get("allow_i2p_domain"): + elif block["blocked"].endswith(".i2p") and config.get("allow_i2p_domain") == "true": logger.debug("blocked='%s' is an I2P .onion domain - SKIPPED", block["blocked"]) continue elif block["blocked"].endswith(".arpa"): @@ -1321,7 +1321,7 @@ def recheck_obfuscation(args: argparse.Namespace) -> int: elif block["blocked"].endswith(".onion"): logger.debug("blocked='%s' is a TOR onion domain name - SKIPPED!", block["blocked"]) continue - elif block["blocked"].endswith(".i2p") and config.get("allow_i2p_domain"): + elif block["blocked"].endswith(".i2p") and config.get("allow_i2p_domain") == "true": logger.debug("blocked='%s' is an I2P onion domain name - SKIPPED!", block["blocked"]) continue elif block["blocked"].endswith(".arpa"): diff --git a/fba/helpers/cache.py b/fba/helpers/cache.py index f15d56f..325e583 100644 --- a/fba/helpers/cache.py +++ b/fba/helpers/cache.py @@ -26,7 +26,11 @@ _cache = {} ##### Cache ##### def key_exists(key: str) -> bool: - return key in _cache + logger.debug("key='%s' - CALLED!", key) + exists = key in _cache + + logger.debug("exists='%s' - EXIT!", exists) + return exists def set_all(key: str, rows: list, value: any): logger.debug("key='%s',rows()=%d,value[]='%s' - CALLED!", key, len(rows), type(value)) @@ -37,6 +41,7 @@ def set_all(key: str, rows: list, value: any): logger.debug("Cache for key='%s' not initialized.", key) _cache[key] = dict() + logger.debug("Setting %d row(s) for key='%s',value[%s]='%s' ...", len(rows), key, type(value), value) for sub in rows: logger.debug("Setting key='%s',sub[%s]='%s'", key, type(sub), sub) if isinstance(sub, sqlite3.Row): @@ -57,7 +62,7 @@ def set_sub_key(key: str, sub: str, value: any): elif not key_exists(key): raise Exception(f"Cache for key='{key}' is not initialized, but function invoked") - logger.debug("Setting key='%s',sub='%s',value[]='%s' ...", key, sub, type(value)) + logger.debug("Setting key='%s',sub='%s',value[%s]='%s' ...", key, sub, type(value), value) _cache[key][sub] = value logger.debug("EXIT!") diff --git a/fba/helpers/domain.py b/fba/helpers/domain.py index da39c80..00bf938 100644 --- a/fba/helpers/domain.py +++ b/fba/helpers/domain.py @@ -41,7 +41,7 @@ def raise_on(domain: str): raise ValueError(f"domain='{domain}' is not a valid domain") elif domain.endswith(".onion"): raise ValueError(f"domain='{domain}' is a TOR, please don't crawl them!") - elif domain.endswith(".i2p") and config.get("allow_i2p_domain"): + elif domain.endswith(".i2p") and config.get("allow_i2p_domain") == "true": raise ValueError(f"domain='{domain}' is an I2P, please don't crawl them!") elif domain.endswith(".arpa"): raise ValueError(f"domain='{domain}' is a domain for reversed IP addresses, please don't crawl them!") @@ -90,7 +90,7 @@ def is_wanted(domain: str) -> bool: elif domain.endswith(".onion"): logger.debug("domain='%s' is a TOR .onion domain - setting False ...", domain) wanted = False - elif domain.endswith(".i2p") and config.get("allow_i2p_domain"): + elif domain.endswith(".i2p") and config.get("allow_i2p_domain") == "true": logger.debug("domain='%s' is an I2P domain - setting False ...", domain) wanted = False elif domain.endswith(".tld"): diff --git a/fba/helpers/locking.py b/fba/helpers/locking.py index 0fc18b9..f553d04 100644 --- a/fba/helpers/locking.py +++ b/fba/helpers/locking.py @@ -47,6 +47,7 @@ def release(): if LOCK is not None: logger.debug("Releasing lock ...") LOCK.close() + logger.debug("Deleting lockfile='%s' ...", lockfile) os.remove(lockfile) diff --git a/fba/helpers/processing.py b/fba/helpers/processing.py index dfb6388..defabdd 100644 --- a/fba/helpers/processing.py +++ b/fba/helpers/processing.py @@ -199,7 +199,7 @@ def csv_block(blocker: str, url: str, command: str): elif domain.endswith(".onion"): logger.debug("domain='%s' is a TOR .onion domain - SKIPPED", domain) continue - elif domain.endswith(".i2p") and config.get("allow_i2p_domain"): + elif domain.endswith(".i2p") and config.get("allow_i2p_domain") == "true": logger.debug("domain='%s' is an I2P .onion domain - SKIPPED", domain) continue elif domain.endswith(".arpa"): diff --git a/fba/helpers/software.py b/fba/helpers/software.py index be0bbb2..f224ba4 100644 --- a/fba/helpers/software.py +++ b/fba/helpers/software.py @@ -208,5 +208,6 @@ def is_relay(software: str) -> bool: raise ValueError(f"software[]='{type(software)}' is not type 'str'") found = software in relays + logger.debug("found='%s' - EXIT!", found) return found diff --git a/fba/http/csrf.py b/fba/http/csrf.py index f629cb8..9f349e7 100644 --- a/fba/http/csrf.py +++ b/fba/http/csrf.py @@ -70,6 +70,7 @@ def determine(domain: str, headers: dict) -> dict: reqheaders["X-CSRF-Token"] = tag["content"] elif not domain_helper.is_in_url(domain, response.url): logger.warning("domain='%s' doesn't match with response.url='%s', maybe redirect to other domain?", domain, response.url) + message = f"Redirect from domain='{domain}' to response.url='{response.url}'" instances.set_last_error(domain, message) raise requests.exceptions.TooManyRedirects(message) diff --git a/fba/http/federation.py b/fba/http/federation.py index 49eee69..fdb1828 100644 --- a/fba/http/federation.py +++ b/fba/http/federation.py @@ -42,6 +42,12 @@ from fba.networks import peertube # Depth counter, being raised and lowered _DEPTH = 0 +# API paths +_api_paths = [ + "/api/v1/instance/peers", + "/api/v3/site", +] + logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) @@ -218,16 +224,11 @@ def fetch_peers(domain: str, software: str, origin: str) -> list: logger.debug("Returning empty list ... - EXIT!") return list() - paths = [ - "/api/v1/instance/peers", - "/api/v3/site", - ] - # Init peers variable peers = list() - logger.debug("Checking %d paths ...", len(paths)) - for path in paths: + logger.debug("Checking %d API paths ...", len(_api_paths)) + for path in _api_paths: logger.debug("Fetching path='%s' from domain='%s',software='%s' ...", path, domain, software) data = network.get_json_api( domain, diff --git a/fba/http/nodeinfo.py b/fba/http/nodeinfo.py index 5810c56..b31e2ac 100644 --- a/fba/http/nodeinfo.py +++ b/fba/http/nodeinfo.py @@ -28,6 +28,29 @@ from fba.models import instances logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) +# Request paths +_request_paths = [ + "/nodeinfo/2.1.json", + "/nodeinfo/2.1", + "/nodeinfo/2.0.json", + "/nodeinfo/2.0", + "/nodeinfo/1.0.json", + "/nodeinfo/1.0", + "/api/v1/instance", +] + +# "rel" identifiers (no real URLs) +_nodeinfo_identifier = [ + "https://nodeinfo.diaspora.software/ns/schema/2.1", + "http://nodeinfo.diaspora.software/ns/schema/2.1", + "https://nodeinfo.diaspora.software/ns/schema/2.0", + "http://nodeinfo.diaspora.software/ns/schema/2.0", + "https://nodeinfo.diaspora.software/ns/schema/1.1", + "http://nodeinfo.diaspora.software/ns/schema/1.1", + "https://nodeinfo.diaspora.software/ns/schema/1.0", + "http://nodeinfo.diaspora.software/ns/schema/1.0", +] + def fetch(domain: str, path: str = None, update_mode: bool = True) -> dict: logger.debug("domain='%s',path='%s',update_mode='%s' - CALLED!", domain, path, update_mode) domain_helper.raise_on(domain) @@ -59,6 +82,7 @@ def fetch(domain: str, path: str = None, update_mode: bool = True) -> dict: try: logger.debug("Checking CSRF for domain='%s'", domain) headers = csrf.determine(domain, dict()) + logger.debug("headers()=%d", len(headers)) except network.exceptions as exception: logger.warning("Exception '%s' during checking CSRF (nodeinfo,%s) - EXIT!", type(exception), __name__) instances.set_last_error(domain, exception) @@ -68,17 +92,8 @@ def fetch(domain: str, path: str = None, update_mode: bool = True) -> dict: "exception" : exception, } - request_paths = [ - "/nodeinfo/2.1.json", - "/nodeinfo/2.1", - "/nodeinfo/2.0.json", - "/nodeinfo/2.0", - "/nodeinfo/1.0.json", - "/nodeinfo/1.0", - "/api/v1/instance", - ] - - for request in request_paths: + logger.debug("Checking %d request paths ...", len(_request_paths)) + for request in _request_paths: logger.debug("request='%s'", request) http_url = f"http://{domain}{str(path) if path is not None else '/'}" https_url = f"https://{domain}{str(path) if path is not None else '/'}" @@ -105,6 +120,8 @@ def fetch(domain: str, path: str = None, update_mode: bool = True) -> dict: logger.debug("domain='%s',request='%s'", domain, request) instances.set_nodeinfo_url(domain, "https://{domain}{request}") + + logger.debug("BREAK!") break logger.warning("Failed fetching nodeinfo from domain='%s',status_code='%s',error_message='%s'", domain, data['status_code'], data['error_message']) @@ -116,18 +133,6 @@ def fetch_wellknown_nodeinfo(domain: str) -> dict: logger.debug("domain='%s' - CALLED!", domain) domain_helper.raise_on(domain) - # "rel" identifiers (no real URLs) - nodeinfo_identifier = [ - "https://nodeinfo.diaspora.software/ns/schema/2.1", - "http://nodeinfo.diaspora.software/ns/schema/2.1", - "https://nodeinfo.diaspora.software/ns/schema/2.0", - "http://nodeinfo.diaspora.software/ns/schema/2.0", - "https://nodeinfo.diaspora.software/ns/schema/1.1", - "http://nodeinfo.diaspora.software/ns/schema/1.1", - "https://nodeinfo.diaspora.software/ns/schema/1.0", - "http://nodeinfo.diaspora.software/ns/schema/1.0", - ] - # No CSRF by default, you don't have to add network.api_headers by yourself here headers = tuple() @@ -157,29 +162,29 @@ def fetch_wellknown_nodeinfo(domain: str) -> dict: logger.debug("data[]='%s'", type(data)) if "error_message" not in data and "json" in data and len(data["json"]) > 0: - logger.debug("path='%s' returned valid json()=%d", path, len(data["json"])) + logger.debug("path='%s' returned valid json()=%d - BREAK!", path, len(data["json"])) break logger.debug("data[]='%s'", type(data)) if "exception" in data: - logger.warning("domain='%s' returned exception '%s'", domain, str(data["exception"])) + logger.warning("domain='%s' returned exception '%s' - RAISE!", domain, str(data["exception"])) raise data["exception"] elif "error_message" in data: - logger.warning("domain='%s' returned error message: '%s'", domain, data["error_message"]) + logger.warning("domain='%s' returned error message: '%s' - EXIT!", domain, data["error_message"]) return data elif "json" not in data: - logger.warning("domain='%s' returned no 'json' key", domain) + logger.warning("domain='%s' returned no 'json' key - EXIT!", domain) return dict() infos = data["json"] - logger.debug("infos()=%d has been returned", len(infos)) + logger.debug("infos(%d)[]='%s' has been returned", len(infos), type(infos)) if "links" in infos: logger.debug("Marking domain='%s' as successfully handled ...", domain) instances.set_success(domain) logger.debug("Found infos[links]()=%d record(s),", len(infos["links"])) - for niid in nodeinfo_identifier: + for niid in _nodeinfo_identifier: data = dict() logger.debug("Checking niid='%s' ...", niid) @@ -231,7 +236,7 @@ def fetch_wellknown_nodeinfo(domain: str) -> dict: instances.set_detection_mode(domain, "AUTO_DISCOVERY") instances.set_nodeinfo_url(domain, link["href"]) - logger.debug("Marking domain='%s' as successfully handled ...", domain) + logger.debug("Marking domain='%s' as successfully handled - BREAK!", domain) instances.set_success(domain) break else: @@ -240,7 +245,7 @@ def fetch_wellknown_nodeinfo(domain: str) -> dict: logger.debug("data()=%d", len(data)) if "error_message" not in data and "json" in data: - logger.debug("Auto-discovery successful: domain='%s'", domain) + logger.debug("Auto-discovery successful: domain='%s' - BREAK!", domain) break elif "server" in infos: logger.debug("Found infos[server][software]='%s'", infos["server"]["software"]) diff --git a/fba/models/blocks.py b/fba/models/blocks.py index 1b61887..70e6eb3 100644 --- a/fba/models/blocks.py +++ b/fba/models/blocks.py @@ -144,7 +144,9 @@ def add(blocker: str, blocked: str, reason: str, block_level: str): if reason is not None: # Maybe needs cleaning + logger.debug("reason='%s' - BEFORE!") reason = tidyup.reason(reason) + logger.debug("reason='%s' - AFTER!") logger.info("New block: blocker='%s',blocked='%s',reason='%s',block_level='%s'", blocker, blocked, reason, block_level) -- 2.39.5