From f4bea85f7bf4ff04a7fa1096688e7dcea0e30b97 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Roland=20H=C3=A4der?= Date: Thu, 1 Jun 2023 22:14:27 +0200 Subject: [PATCH] Continued: - moved all updates of columns in 'instances' table to central function update_nodeinfos(), other functions are just wrappers to fill proper array elements - also save total found row count --- blocks_empty.db | Bin 32768 -> 32768 bytes fba.py | 220 ++++++++++++++++++++++++++++++--------------- fetch_instances.py | 28 +++--- 3 files changed, 165 insertions(+), 83 deletions(-) diff --git a/blocks_empty.db b/blocks_empty.db index 1854e2ec34f86c7f766076dd243a424f0d3854e2..9e4a4a22ce5f88f22410505f3226faed62ac8f5e 100644 GIT binary patch delta 136 zcmZo@U}|V!njkIsje&uI1BhY3a-xp0^fv}Q?}@zJn;6*H4>0h}<$cF~fa^EcbS@?S zsT&*J*tuHb1=z*qSjTKO&lzI%`Ar|Hh$Wy#5IADtHn})U0hzCvBPomDRx69M$OIdc!i1pWgQt8 diff --git a/fba.py b/fba.py index 1fa0ee7..7a738d8 100644 --- a/fba.py +++ b/fba.py @@ -77,9 +77,23 @@ api_headers = { nodeinfos = { # Detection mode: 'AUTO_DISCOVERY', 'STATIC_CHECKS' or 'GENERATOR' # NULL means all detection methods have failed (maybe still reachable instance) - "detection_mode": {}, + "detection_mode" : {}, # Found nodeinfo URL - "nodeinfo_url": {}, + "nodeinfo_url" : {}, + # Found total peers + "total_peers" : {}, + # Last fetched instances + "last_instance_fetch": {}, + # Last updated + "last_updated" : {}, + # Last blocked + "last_blocked" : {}, + # Last nodeinfo (fetched) + "last_nodeinfo" : {}, + # Last status code + "last_status_code" : {}, + # Last error details + "last_error_details" : {}, } language_mapping = { @@ -303,6 +317,9 @@ def strip_until(software: str, until: str) -> str: return software def is_blacklisted(domain: str) -> bool: + if type(domain) != str: + raise ValueError(f"WARNING: domain[]={type(domain)} is not 'str'") + blacklisted = False for peer in blacklist: if peer in domain: @@ -311,6 +328,9 @@ def is_blacklisted(domain: str) -> bool: return blacklisted def remove_pending_error(domain: str): + if type(domain) != str: + raise ValueError(f"WARNING: domain[]={type(domain)} is not 'str'") + try: # Prevent updating any pending errors, nodeinfo was found del pending_errors[domain] @@ -319,28 +339,30 @@ def remove_pending_error(domain: str): pass def get_hash(domain: str) -> str: + if type(domain) != str: + raise ValueError(f"WARNING: domain[]={type(domain)} is not 'str'") + return hashlib.sha256(domain.encode("utf-8")).hexdigest() def update_last_blocked(domain: str): - # DEBUG: print("DEBUG: Updating last_blocked for domain", domain) - try: - cursor.execute("UPDATE instances SET last_blocked = ?, last_updated = ? WHERE domain = ? LIMIT 1", [ - time.time(), - time.time(), - domain - ]) + if type(domain) != str: + raise ValueError(f"WARNING: domain[]={type(domain)} is not 'str'") - if cursor.rowcount == 0: - print("WARNING: Did not update any rows:", domain) + # DEBUG: print("DEBUG: Updating last_blocked for domain", domain) + nodeinfos["last_blocked"] = time.time() + nodeinfos["last_updated"] = time.time() - except BaseException as e: - print(f"ERROR: failed SQL query: domain='{domain}',exception[{type(e)}]:'{str(e)}'") - sys.exit(255) + # Running pending updated + # DEBUG: print(f"DEBUG: Invoking update_nodeinfos({domain}) ...") + update_nodeinfos(domain) # DEBUG: print("DEBUG: EXIT!") def has_pending_nodeinfos(domain: str) -> bool: # DEBUG: print(f"DEBUG: domain='{domain}' - CALLED!") + if type(domain) != str: + raise ValueError(f"WARNING: domain[]={type(domain)} is not 'str'") + has_pending = False for key in nodeinfos: # DEBUG: print(f"DEBUG: key='{key}',domain='{domain}',nodeinfos[key]='{nodeinfos[key]}'") @@ -352,7 +374,10 @@ def has_pending_nodeinfos(domain: str) -> bool: return has_pending def update_nodeinfos(domain: str): - # DEBUG: print("DEBUG: Updating nodeinfo for domain:", domain) + if type(domain) != str: + raise ValueError(f"WARNING: domain[]={type(domain)} is not 'str'") + + # DEBUG: print(f"DEBUG: Updating nodeinfo for domain='{domain}' ...") sql_string = '' fields = list() for key in nodeinfos: @@ -363,8 +388,11 @@ def update_nodeinfos(domain: str): sql_string += f" {key} = ?," fields.append(domain) - # DEBUG: print(f"DEBUG: sql_string='{sql_string}',fields()={len(fields)}") + if sql_string == '': + raise ValueError(f"WARNING: No fields have been set, but method invoked, domain='{domain}'") + + # DEBUG: print(f"DEBUG: sql_string='{sql_string}',fields()={len(fields)}") sql = "UPDATE instances SET" + sql_string + " last_status_code = NULL, last_error_details = NULL WHERE domain = ? LIMIT 1" # DEBUG: print("DEBUG: sql:", sql) @@ -375,6 +403,8 @@ def update_nodeinfos(domain: str): if cursor.rowcount == 0: print("WARNING: Did not update any rows:", domain) + else: + connection.commit() except BaseException as e: print(f"ERROR: failed SQL query: domain='{domain}',sql='{sql}',exception[{type(e)}]:'{str(e)}'") @@ -392,6 +422,9 @@ def update_nodeinfos(domain: str): def log_error(domain: str, res: any): # DEBUG: print("DEBUG: domain,res[]:", domain, type(res)) + if type(domain) != str: + raise ValueError(f"WARNING: domain[]={type(domain)} is not 'str'") + try: # DEBUG: print("DEBUG: BEFORE res[]:", type(res)) if isinstance(res, BaseException) or isinstance(res, json.JSONDecodeError): @@ -423,79 +456,66 @@ def log_error(domain: str, res: any): def update_last_error(domain: str, res: any): # DEBUG: print("DEBUG: domain,res[]:", domain, type(res)) - try: - # DEBUG: print("DEBUG: BEFORE res[]:", type(res)) - if isinstance(res, BaseException) or isinstance(res, json.JSONDecodeError): - res = str(res) - - # DEBUG: print("DEBUG: AFTER res[]:", type(res)) - if type(res) is str: - # DEBUG: print(f"DEBUG: Setting last_error_details='{res}'"); - cursor.execute("UPDATE instances SET last_status_code = 999, last_error_details = ?, last_updated = ? WHERE domain = ? LIMIT 1", [ - res, - time.time(), - domain - ]) - else: - # DEBUG: print(f"DEBUG: Setting last_error_details='{res.reason}'"); - cursor.execute("UPDATE instances SET last_status_code = ?, last_error_details = ?, last_updated = ? WHERE domain = ? LIMIT 1", [ - res.status_code, - res.reason, - time.time(), - domain - ]) - - if cursor.rowcount == 0: - # DEBUG: print("DEBUG: Did not update any rows:", domain) - pending_errors[domain] = res + if type(domain) != str: + raise ValueError(f"WARNING: domain[]={type(domain)} is not 'str'") + + # DEBUG: print("DEBUG: BEFORE res[]:", type(res)) + if isinstance(res, BaseException) or isinstance(res, json.JSONDecodeError): + res = str(res) + + # DEBUG: print("DEBUG: AFTER res[]:", type(res)) + nodeinfos["last_updated"][domain] = time.time() + if type(res) is str: + # DEBUG: print(f"DEBUG: Setting last_error_details='{res}'"); + nodeinfos["last_status_code"][domain] = 999 + nodeinfos["last_error_details"][domain] = res + else: + # DEBUG: print(f"DEBUG: Setting last_error_details='{res.reason}'"); + nodeinfos["last_status_code"][domain] = res.status_code + nodeinfos["last_error_details"][domain] = res.reason - log_error(domain, res) + # Running pending updated + # DEBUG: print(f"DEBUG: Invoking update_nodeinfos({domain}) ...") + update_nodeinfos(domain) - except BaseException as e: - print(f"ERROR: failed SQL query: domain='{domain}',exception[{type(e)}]:'{str(e)}'") - sys.exit(255) + log_error(domain, res) # DEBUG: print("DEBUG: EXIT!") def update_last_instance_fetch(domain: str): - # DEBUG: print("DEBUG: Updating last_instance_fetch for domain:", domain) - try: - cursor.execute("UPDATE instances SET last_instance_fetch = ?, last_updated = ? WHERE domain = ? LIMIT 1", [ - time.time(), - time.time(), - domain - ]) + if type(domain) != str: + raise ValueError(f"WARNING: domain[]={type(domain)} is not 'str'") - if cursor.rowcount == 0: - print("WARNING: Did not update any rows:", domain) + # DEBUG: print("DEBUG: Updating last_instance_fetch for domain:", domain) + nodeinfos["last_instance_fetch"][domain] = time.time() + nodeinfos["last_updated"][domain] = time.time() - except BaseException as e: - print(f"ERROR: failed SQL query: domain='{domain}',exception[{type(e)}]:'{str(e)}'") - sys.exit(255) + # Running pending updated + # DEBUG: print(f"DEBUG: Invoking update_nodeinfos({domain}) ...") + update_nodeinfos(domain) - connection.commit() # DEBUG: print("DEBUG: EXIT!") def update_last_nodeinfo(domain: str): - # DEBUG: print("DEBUG: Updating last_nodeinfo for domain:", domain) - try: - cursor.execute("UPDATE instances SET last_nodeinfo = ?, last_updated = ? WHERE domain = ? LIMIT 1", [ - time.time(), - time.time(), - domain - ]) + if type(domain) != str: + raise ValueError(f"WARNING: domain[]={type(domain)} is not 'str'") - if cursor.rowcount == 0: - print("WARNING: Did not update any rows:", domain) + # DEBUG: print("DEBUG: Updating last_nodeinfo for domain:", domain) + nodeinfos["last_nodeinfo"][domain] = time.time() + nodeinfos["last_updated"][domain] = time.time() - except BaseException as e: - print(f"ERROR: failed SQL query: domain='{domain}',exception[{type(e)}]:'{str(e)}'") - sys.exit(255) + # Running pending updated + # DEBUG: print(f"DEBUG: Invoking update_nodeinfos({domain}) ...") + update_nodeinfos(domain) - connection.commit() # DEBUG: print("DEBUG: EXIT!") def get_peers(domain: str, software: str) -> list: + if type(domain) != str: + raise ValueError(f"WARNING: domain[]={type(domain)} is not 'str'") + elif type(software) != str and software != None: + raise ValueError(f"WARNING: software[]={type(software)} is not 'str'") + # DEBUG: print(f"DEBUG: domain='{domain}',software='{software}' - CALLED!") peers = list() @@ -546,6 +566,9 @@ def get_peers(domain: str, software: str) -> list: if not "host" in row: print(f"WARNING: row()={len(row)} does not contain element 'host': {row},domain='{domain}'") continue + elif type(row["host"]) != str: + print(f"WARNING: row[host][]={type(row['host'])} is not 'str'") + continue elif is_blacklisted(row["host"]): # DEBUG: print(f"DEBUG: row[host]='{row['host']}' is blacklisted. domain='{domain}'") continue @@ -553,6 +576,9 @@ def get_peers(domain: str, software: str) -> list: # DEBUG: print(f"DEBUG: Adding peer: '{row['host']}'") peers.append(row["host"]) + # DEBUG: print(f"DEBUG: Adding '{len(peers)}' for domain='{domain}'") + nodeinfos["total_peers"][domain] = len(peers) + # DEBUG: print(f"DEBUG: Updating last_instance_fetch for domain='{domain}' ...") update_last_instance_fetch(domain) @@ -582,6 +608,9 @@ def get_peers(domain: str, software: str) -> list: except BaseException as e: print(f"WARNING: Exception during fetching JSON: domain='{domain}',exception[{type(e)}]:'{str(e)}'") + # DEBUG: print(f"DEBUG: Adding '{len(peers)}' for domain='{domain}'") + nodeinfos["total_peers"][domain] = len(peers) + # DEBUG: print(f"DEBUG: Updating last_instance_fetch for domain='{domain}' ...") update_last_instance_fetch(domain) @@ -621,6 +650,9 @@ def get_peers(domain: str, software: str) -> list: except BaseException as e: print(f"WARNING: Exception during fetching JSON: domain='{domain}',exception[{type(e)}]:'{str(e)}'") + # DEBUG: print(f"DEBUG: Adding '{len(peers)}' for domain='{domain}'") + nodeinfos["total_peers"][domain] = len(peers) + # DEBUG: print(f"DEBUG: Updating last_instance_fetch for domain='{domain}' ...") update_last_instance_fetch(domain) @@ -660,6 +692,9 @@ def get_peers(domain: str, software: str) -> list: print("WARNING: Some error during get():", domain, e) update_last_error(domain, e) + # DEBUG: print(f"DEBUG: Adding '{len(peers)}' for domain='{domain}'") + nodeinfos["total_peers"][domain] = len(peers) + # DEBUG: print(f"DEBUG: Updating last_instance_fetch for domain='{domain}' ...") update_last_instance_fetch(domain) @@ -667,6 +702,13 @@ def get_peers(domain: str, software: str) -> list: return peers def post_json_api(domain: str, path: str, parameter: str, extra_headers: dict = {}) -> dict: + if type(domain) != str: + raise ValueError(f"WARNING: domain[]={type(domain)} is not 'str'") + elif type(path) != str: + raise ValueError(f"WARNING: path[]={type(path)} is not 'str'") + elif type(parameter) != str: + raise ValueError(f"WARNING: parameter[]={type(parameter)} is not 'str'") + # DEBUG: print("DEBUG: Sending POST to domain,path,parameter:", domain, path, parameter, extra_headers) data = {} try: @@ -685,6 +727,9 @@ def post_json_api(domain: str, path: str, parameter: str, extra_headers: dict = return data def fetch_nodeinfo(domain: str, path: str = None) -> list: + if type(domain) != str: + raise ValueError(f"WARNING: domain[]={type(domain)} is not 'str'") + # DEBUG: print("DEBUG: Fetching nodeinfo from domain,path:", domain, path) nodeinfo = fetch_wellknown_nodeinfo(domain) @@ -737,6 +782,9 @@ def fetch_nodeinfo(domain: str, path: str = None) -> list: return data def fetch_wellknown_nodeinfo(domain: str) -> list: + if type(domain) != str: + raise ValueError(f"WARNING: domain[]={type(domain)} is not 'str'") + # DEBUG: print("DEBUG: Fetching .well-known info for domain:", domain) data = {} @@ -777,6 +825,11 @@ def fetch_wellknown_nodeinfo(domain: str) -> list: return data def fetch_generator_from_path(domain: str, path: str = "/") -> str: + if type(domain) != str: + raise ValueError(f"WARNING: domain[]={type(domain)} is not 'str'") + elif type(path) != str: + raise ValueError(f"WARNING: path[]={type(path)} is not 'str'") + # DEBUG: print(f"DEBUG: domain='{domain}',path='{path}' - CALLED!") software = None @@ -838,6 +891,9 @@ def fetch_generator_from_path(domain: str, path: str = "/") -> str: return software def determine_software(domain: str, path: str = None) -> str: + if type(domain) != str: + raise ValueError(f"WARNING: domain[]={type(domain)} is not 'str'") + # DEBUG: print("DEBUG: Determining software for domain,path:", domain, path) software = None @@ -990,6 +1046,9 @@ def block_instance(blocker: str, blocked: str, reason: str, block_level: str): # DEBUG: print("DEBUG: EXIT!") def is_instance_registered(domain: str) -> bool: + if type(domain) != str: + raise ValueError(f"WARNING: domain[]={type(domain)} is not 'str'") + # NOISY-DEBUG: print(f"DEBUG: domain='{domain}' - CALLED!") if not is_cache_initialized("is_registered"): # NOISY-DEBUG: print(f"DEBUG: Cache for 'is_registered' not initialized, fetching all rows ...") @@ -1009,6 +1068,13 @@ def is_instance_registered(domain: str) -> bool: return registered def add_instance(domain: str, origin: str, originator: str, path: str = None): + if type(domain) != str: + raise ValueError(f"WARNING: domain[]={type(domain)} is not 'str'") + elif type(origin) != str and origin != None: + raise ValueError(f"WARNING: origin[]={type(origin)} is not 'str'") + elif type(originator) != str: + raise ValueError(f"WARNING: originator[]={type(originator)} is not 'str'") + # DEBUG: print("DEBUG: domain,origin,originator,path:", domain, origin, originator, path) if not validators.domain(domain.split("/")[0]): print("WARNING: Bad domain name:", domain) @@ -1091,6 +1157,9 @@ def send_bot_post(instance: str, blocks: dict): return True def get_mastodon_blocks(domain: str) -> dict: + if type(domain) != str: + raise ValueError(f"WARNING: domain[]={type(domain)} is not 'str'") + # DEBUG: print("DEBUG: Fetching mastodon blocks from domain:", domain) blocks = { "Suspended servers": [], @@ -1135,6 +1204,9 @@ def get_mastodon_blocks(domain: str) -> dict: } def get_friendica_blocks(domain: str) -> dict: + if type(domain) != str: + raise ValueError(f"WARNING: domain[]={type(domain)} is not 'str'") + # DEBUG: print("DEBUG: Fetching friendica blocks from domain:", domain) blocks = [] @@ -1168,6 +1240,9 @@ def get_friendica_blocks(domain: str) -> dict: } def get_misskey_blocks(domain: str) -> dict: + if type(domain) != str: + raise ValueError(f"WARNING: domain[]={type(domain)} is not 'str'") + # DEBUG: print("DEBUG: Fetching misskey blocks from domain:", domain) blocks = { "suspended": [], @@ -1283,6 +1358,9 @@ def get_misskey_blocks(domain: str) -> dict: } def tidyup(string: str) -> str: + if type(string) != str: + raise ValueError(f"WARNING: string[]={type(string)} is not expected") + # some retards put their blocks in variable case string = string.lower().strip() diff --git a/fetch_instances.py b/fetch_instances.py index 63a8fa7..fd217ef 100755 --- a/fetch_instances.py +++ b/fetch_instances.py @@ -25,26 +25,30 @@ import validators import fba def fetch_instances(domain: str, origin: str, software: str, path: str = None): - # NOISY-DEBUG: print("DEBUG: domain,origin,software,path:", domain, origin, software, path) + # DEBUG: print("DEBUG: domain,origin,software,path:", domain, origin, software, path) if not fba.is_instance_registered(domain): - # NOISY-DEBUG: print("DEBUG: Adding new domain:", domain, origin) + # DEBUG: print("DEBUG: Adding new domain:", domain, origin) fba.add_instance(domain, origin, sys.argv[0], path) - # NOISY-DEBUG: print("DEBUG: Fetching instances for domain:", domain, software) + # DEBUG: print("DEBUG: Fetching instances for domain:", domain, software) peerlist = fba.get_peers(domain, software) if (peerlist is None): print("ERROR: Cannot fetch peers:", domain) return elif fba.has_pending_nodeinfos(domain): - # NOISY-DEBUG: print(f"DEBUG: domain='{domain}' has pending nodeinfo data, flushing ...") + # DEBUG: print(f"DEBUG: domain='{domain}' has pending nodeinfo data, flushing ...") fba.update_nodeinfos(domain) print(f"INFO: Checking {len(peerlist)} instances from {domain} ...") for instance in peerlist: - # NOISY-DEBUG: print("DEBUG: BEFORE instance:", instance) + if peerlist == None: + # Skip "None" types as tidup() cannot parse them + continue + + # DEBUG: print(f"DEBUG: instance[{type(instance}]={instance} - BEFORE") instance = fba.tidyup(instance) - # NOISY-DEBUG: print("DEBUG: AFTER instance:", instance) + # DEBUG: print(f"DEBUG: instance[{type(instance}]={instance} - AFTER") if instance == "": print("WARNING: Empty instance after tidyup(), domain:", domain) @@ -53,16 +57,16 @@ def fetch_instances(domain: str, origin: str, software: str, path: str = None): print(f"WARNING: Bad instance='{instance}' from domain='{domain}',origin='{origin}',software='{software}'") continue elif fba.is_blacklisted(instance): - # NOISY-DEBUG: print("DEBUG: instance is blacklisted:", instance) + # DEBUG: print("DEBUG: instance is blacklisted:", instance) continue - # NOISY-DEBUG: print("DEBUG: Handling instance:", instance) + # DEBUG: print("DEBUG: Handling instance:", instance) try: if not fba.is_instance_registered(instance): - # NOISY-DEBUG: print("DEBUG: Adding new instance:", instance, domain) + # DEBUG: print("DEBUG: Adding new instance:", instance, domain) fba.add_instance(instance, domain, sys.argv[0]) except BaseException as e: - print(f"ERROR: instance='{instance}',exception:'{str(e)}'") + print(f"ERROR: instance='{instance}',exception[{type(e)}]:'{str(e)}'") continue instance = sys.argv[1] @@ -78,12 +82,12 @@ fba.cursor.execute( rows = fba.cursor.fetchall() print(f"INFO: Checking {len(rows)} entries ...") for row in rows: - # NOISY-DEBUG: print("DEBUG: domain:", row[0]) + # DEBUG: print("DEBUG: domain:", row[0]) if fba.is_blacklisted(row[0]): print("WARNING: domain is blacklisted:", row[0]) continue - print(f"INFO: Fetching instances for instance '{row[0]}'('{row[2]}') of origin '{row[1]}',nodeinfo_url='{row[3]}'") + print(f"INFO: Fetching instances for instance '{row[0]}' ('{row[2]}') of origin '{row[1]}',nodeinfo_url='{row[3]}'") fetch_instances(row[0], row[1], row[2], row[3]) fba.connection.close() -- 2.39.5