From 68cf3aa7573343e9f9bd923b444b347b810fb1db Mon Sep 17 00:00:00 2001 From: =?utf8?q?Roland=20H=C3=A4der?= Date: Mon, 29 May 2023 01:55:24 +0200 Subject: [PATCH] Continued: - also fetch nodeinfo_url from database and handle it over so no redundant static checks will happen --- fba.py | 22 +++++++++++++--------- fetch_blocks.py | 27 ++++++++++++++++----------- fetch_instances.py | 12 ++++++------ 3 files changed, 35 insertions(+), 26 deletions(-) diff --git a/fba.py b/fba.py index c871b77..9174bba 100644 --- a/fba.py +++ b/fba.py @@ -555,13 +555,13 @@ def post_json_api(domain: str, path: str, parameter: str, extra_headers: dict = data = res.json() except BaseException as e: - print(f"WARNING: Some error during post(): domain='{domain}',path='{path}',parameter()={len(parameter)},exception:'{str(e)}'") + print(f"WARNING: Some error during post(): domain='{domain}',path='{path}',parameter()={len(parameter)},exception[{type(e)}]:'{str(e)}'") # DEBUG: print("DEBUG: Returning data():", len(data)) return data -def fetch_nodeinfo(domain: str) -> list: - # DEBUG: print("DEBUG: Fetching nodeinfo from domain:", domain) +def fetch_nodeinfo(domain: str, path: str = None) -> list: + # DEBUG: print("DEBUG: Fetching nodeinfo from domain,path:", domain, path) nodeinfo = fetch_wellknown_nodeinfo(domain) # DEBUG: print("DEBUG: nodeinfo:", len(nodeinfo)) @@ -581,6 +581,10 @@ def fetch_nodeinfo(domain: str) -> list: data = {} for request in requests: + if path != None and path != "" and request != path: + print(f"DEBUG: path='{path}' does not match request='{request}' - SKIPPED!") + continue + try: # DEBUG: print("DEBUG: Fetching request:", request) res = reqto.get(request, headers=api_headers, timeout=(config["connection_timeout"], config["read_timeout"])) @@ -697,12 +701,12 @@ def fetch_generator_from_path(domain: str, path: str = "/") -> str: # DEBUG: print(f"DEBUG: software='{software}' - EXIT!") return software -def determine_software(domain: str) -> str: - # DEBUG: print("DEBUG: Determining software for domain:", domain) +def determine_software(domain: str, path: str = None) -> str: + # DEBUG: print("DEBUG: Determining software for domain,path:", domain, path) software = None # DEBUG: print(f"DEBUG: Fetching nodeinfo from '{domain}' ...") - data = fetch_nodeinfo(domain) + data = fetch_nodeinfo(domain, path) # DEBUG: print("DEBUG: data[]:", type(data)) if not isinstance(data, dict) or len(data) == 0: @@ -864,8 +868,8 @@ def is_instance_registered(domain: str) -> bool: # DEBUG: print(f"DEBUG: registered='{registered}' - EXIT!") return registered -def add_instance(domain: str, origin: str, originator: str): - # DEBUG: print("DEBUG: domain,origin:", domain, origin, originator) +def add_instance(domain: str, origin: str, originator: str, path: str = None): + # DEBUG: print("DEBUG: domain,origin,originator,path:", domain, origin, originator, path) if not validators.domain(domain.split("/")[0]): print("WARNING: Bad domain name:", domain) raise @@ -873,7 +877,7 @@ def add_instance(domain: str, origin: str, originator: str): print("WARNING: Bad origin name:", origin) raise - software = determine_software(domain) + software = determine_software(domain, path) # DEBUG: print("DEBUG: Determined software:", software) print(f"INFO: Adding instance {domain} (origin: {origin})") diff --git a/fetch_blocks.py b/fetch_blocks.py index 2e0ff2d..bbccb93 100644 --- a/fetch_blocks.py +++ b/fetch_blocks.py @@ -22,13 +22,13 @@ import re import fba fba.cursor.execute( - "SELECT domain, software FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'gotosocial', 'bookwyrm', 'takahe') AND (last_blocked IS NULL OR last_blocked < ?) ORDER BY rowid DESC", [time.time() - fba.config["recheck_block"]] + "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'gotosocial', 'bookwyrm', 'takahe') AND (last_blocked IS NULL OR last_blocked < ?) ORDER BY rowid DESC", [time.time() - fba.config["recheck_block"]] ) rows = fba.cursor.fetchall() print(f"INFO: Checking {len(rows)} entries ...") -for blocker, software in rows: - # NOISY-DEBUG: print("DEBUG: BEFORE blocker,software:", blocker, software) +for blocker, software, origin, nodeinfo_url in rows: + # NOISY-DEBUG: print("DEBUG: BEFORE blocker,software,origin,nodeinfo_url:", blocker, software, origin, nodeinfo_url) blockdict = [] blocker = fba.tidyup(blocker) # NOISY-DEBUG: print("DEBUG: AFTER blocker,software:", blocker, software) @@ -47,7 +47,7 @@ for blocker, software in rows: print("INFO: blocker:", blocker) try: # Blocks - json = fba.fetch_nodeinfo(blocker) + json = fba.fetch_nodeinfo(blocker, nodeinfo_url) if json is None: print("WARNING: Could not fetch nodeinfo from blocker:", blocker) continue @@ -86,18 +86,19 @@ for blocker, software in rows: if blocked.count("*") > 1: # -ACK!-oma also started obscuring domains without hash fba.cursor.execute( - "SELECT domain FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", [blocked.replace("*", "_")] + "SELECT domain, nodeinfo_url FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", [blocked.replace("*", "_")] ) searchres = fba.cursor.fetchone() # NOISY-DEBUG: print("DEBUG: searchres[]:", type(searchres)) if searchres != None: blocked = searchres[0] + nodeinfo_url = searchres[1] # NOISY-DEBUG: print("DEBUG: Looked up domain:", blocked) # NOISY-DEBUG: print("DEBUG: Looking up instance by domain:", blocked) if not fba.is_instance_registered(blocked): # NOISY-DEBUG: print("DEBUG: Domain wasn't found, adding:", blocked, blocker) - fba.add_instance(blocked, blocker, argv[0]) + fba.add_instance(blocked, blocker, origin, nodeinfo_url) fba.cursor.execute( "SELECT * FROM blocks WHERE blocker = ? AND blocked = ? AND block_level = ? LIMIT 1", @@ -249,7 +250,7 @@ for blocker, software in rows: if fba.cursor.fetchone() == None: # NOISY-DEBUG: print("DEBUG: Hash wasn't found, adding:", blocked, blocker) - fba.add_instance(blocked, blocker, argv[0]) + fba.add_instance(blocked, blocker, origin) else: # Doing the hash search for instance names as well to tidy up DB fba.cursor.execute( @@ -332,16 +333,18 @@ for blocker, software in rows: if blocked.count("?") > 0: # Some obscure them with question marks, not sure if that's dependent on version or not fba.cursor.execute( - "SELECT domain FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", [blocked.replace("?", "_")] + "SELECT domain, origin, nodeinfo_url FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", [blocked.replace("?", "_")] ) searchres = fba.cursor.fetchone() if searchres != None: blocked = searchres[0] + origin = searchres[1] + nodeinfo_url = searchres[2] # NOISY-DEBUG: print("DEBUG: AFTER-blocked:", blocked) if not fba.is_instance_registered(blocked): # NOISY-DEBUG: print("DEBUG: Hash wasn't found, adding:", blocked, blocker) - fba.add_instance(blocked, blocker) + fba.add_instance(blocked, blocker, origin, nodeinfo_url) fba.cursor.execute( "SELECT * FROM blocks WHERE blocker = ? AND blocked = ?", @@ -391,16 +394,18 @@ for blocker, software in rows: elif blocked.count("*") > 0: # GTS does not have hashes for obscured domains, so we have to guess it fba.cursor.execute( - "SELECT domain FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", [blocked.replace("*", "_")] + "SELECT domain, origin, nodeinfo_url FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", [blocked.replace("*", "_")] ) searchres = fba.cursor.fetchone() if searchres != None: blocked = searchres[0] + origin = searchres[1] + nodeinfo_url = searchres[2] if not fba.is_instance_registered(blocked): # NOISY-DEBUG: print("DEBUG: Domain wasn't found, adding:", blocked, blocker) - fba.add_instance(blocked, blocker) + fba.add_instance(blocked, blocker, origin, nodeinfo_url) fba.cursor.execute( "SELECT * FROM blocks WHERE blocker = ? AND blocked = ? AND block_level = ? LIMIT 1", diff --git a/fetch_instances.py b/fetch_instances.py index 81deda0..c63803b 100644 --- a/fetch_instances.py +++ b/fetch_instances.py @@ -21,11 +21,11 @@ import time import validators import fba -def fetch_instances(domain: str, origin: str, software: str): - # NOISY-DEBUG: print("DEBUG: domain,origin,software:", domain, origin, software) +def fetch_instances(domain: str, origin: str, software: str, path: str = None): + # NOISY-DEBUG: print("DEBUG: domain,origin,software,path:", domain, origin, software, path) if not fba.is_instance_registered(domain): # NOISY-DEBUG: print("DEBUG: Adding new domain:", domain, origin) - fba.add_instance(domain, origin, sys.argv[0]) + fba.add_instance(domain, origin, sys.argv[0], path) # NOISY-DEBUG: print("DEBUG: Fetching instances for domain:", domain, software) peerlist = fba.get_peers(domain, software) @@ -69,7 +69,7 @@ fetch_instances(instance, None, None) # Loop through some instances fba.cursor.execute( - "SELECT domain,origin,software FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'gotosocial', 'bookwyrm', 'takahe', 'lemmy') AND (last_instance_fetch IS NULL OR last_instance_fetch < ?) ORDER BY rowid DESC", [time.time() - fba.config["recheck_instance"]] + "SELECT domain, origin, software, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'gotosocial', 'bookwyrm', 'takahe', 'lemmy') AND (last_instance_fetch IS NULL OR last_instance_fetch < ?) ORDER BY rowid DESC", [time.time() - fba.config["recheck_instance"]] ) rows = fba.cursor.fetchall() @@ -80,7 +80,7 @@ for row in rows: print("WARNING: domain is blacklisted:", row[0]) continue - print(f"INFO: Fetching instances for instance '{row[0]}'('{row[2]}') of origin '{row[1]}'") - fetch_instances(row[0], row[1], row[2]) + print(f"INFO: Fetching instances for instance '{row[0]}'('{row[2]}') of origin '{row[1]}',nodeinfo_url='{row[3]}'") + fetch_instances(row[0], row[1], row[2], row[3]) fba.connection.close() -- 2.39.5