From 4b20e23ce0d5b998820b1855473171e9d16142c1 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Roland=20H=C3=A4der?= Date: Fri, 2 Jun 2023 12:06:42 +0200 Subject: [PATCH] Continued: - renamed more variable/function - only log count, not whole array - some parameters cannot be an empty, too --- fba.py | 110 ++++++++++++++++++++++++++++++++------------- fetch_instances.py | 2 +- 2 files changed, 80 insertions(+), 32 deletions(-) diff --git a/fba.py b/fba.py index 91efa62..c8e56cb 100644 --- a/fba.py +++ b/fba.py @@ -74,7 +74,7 @@ api_headers = { # Found info from node, such as nodeinfo URL, detection mode that needs to be # written to database. Both arrays must be filled at the same time or else # update_instance_data() will fail -nodeinfos = { +instance_data = { # Detection mode: 'AUTO_DISCOVERY', 'STATIC_CHECKS' or 'GENERATOR' # NULL means all detection methods have failed (maybe still reachable instance) "detection_mode" : {}, @@ -319,6 +319,8 @@ def strip_until(software: str, until: str) -> str: def is_blacklisted(domain: str) -> bool: if type(domain) != str: raise ValueError(f"WARNING: domain[]={type(domain)} is not 'str'") + elif domain == "": + raise ValueError(f"WARNING: domain cannot be empty") blacklisted = False for peer in blacklist: @@ -330,6 +332,8 @@ def is_blacklisted(domain: str) -> bool: def remove_pending_error(domain: str): if type(domain) != str: raise ValueError(f"WARNING: domain[]={type(domain)} is not 'str'") + elif domain == "": + raise ValueError(f"WARNING: domain cannot be empty") try: # Prevent updating any pending errors, nodeinfo was found @@ -341,15 +345,19 @@ def remove_pending_error(domain: str): def get_hash(domain: str) -> str: if type(domain) != str: raise ValueError(f"WARNING: domain[]={type(domain)} is not 'str'") + elif domain == "": + raise ValueError(f"WARNING: domain cannot be empty") return hashlib.sha256(domain.encode("utf-8")).hexdigest() def update_last_blocked(domain: str): if type(domain) != str: raise ValueError(f"WARNING: domain[]={type(domain)} is not 'str'") + elif domain == "": + raise ValueError(f"WARNING: domain cannot be empty") # DEBUG: print("DEBUG: Updating last_blocked for domain", domain) - nodeinfos["last_blocked"] = time.time() + instance_data["last_blocked"][domain] = time.time() # Running pending updated # DEBUG: print(f"DEBUG: Invoking update_instance_data({domain}) ...") @@ -357,15 +365,17 @@ def update_last_blocked(domain: str): # DEBUG: print("DEBUG: EXIT!") -def has_pending_nodeinfos(domain: str) -> bool: +def has_pending_instance_data(domain: str) -> bool: # DEBUG: print(f"DEBUG: domain='{domain}' - CALLED!") if type(domain) != str: raise ValueError(f"WARNING: domain[]={type(domain)} is not 'str'") + elif domain == "": + raise ValueError(f"WARNING: domain cannot be empty") has_pending = False - for key in nodeinfos: - # DEBUG: print(f"DEBUG: key='{key}',domain='{domain}',nodeinfos[key]='{nodeinfos[key]}'") - if domain in nodeinfos[key]: + for key in instance_data: + # DEBUG: print(f"DEBUG: key='{key}',domain='{domain}',instance_data[key]()='{len(instance_data[key])}'") + if domain in instance_data[key]: has_pending = True break @@ -375,15 +385,17 @@ def has_pending_nodeinfos(domain: str) -> bool: def update_instance_data(domain: str): if type(domain) != str: raise ValueError(f"WARNING: domain[]={type(domain)} is not 'str'") + elif domain == "": + raise ValueError(f"WARNING: domain cannot be empty") # DEBUG: print(f"DEBUG: Updating nodeinfo for domain='{domain}' ...") sql_string = '' fields = list() - for key in nodeinfos: + for key in instance_data: # DEBUG: print("DEBUG: key:", key) - if domain in nodeinfos[key]: - # DEBUG: print(f"DEBUG: Adding '{nodeinfos[key][domain]}' for key='{key}' ...") - fields.append(nodeinfos[key][domain]) + if domain in instance_data[key]: + # DEBUG: print(f"DEBUG: Adding '{instance_data[key][domain]}' for key='{key}' ...") + fields.append(instance_data[key][domain]) sql_string += f" {key} = ?," fields.append(domain) @@ -406,11 +418,11 @@ def update_instance_data(domain: str): connection.commit() - # DEBUG: print("DEBUG: Deleting nodeinfos for domain:", domain) - for key in nodeinfos: + # DEBUG: print("DEBUG: Deleting instance_data for domain:", domain) + for key in instance_data: try: # DEBUG: print("DEBUG: Deleting key:", key) - del nodeinfos[key][domain] + del instance_data[key][domain] except: pass @@ -424,6 +436,8 @@ def log_error(domain: str, res: any): # DEBUG: print("DEBUG: domain,res[]:", domain, type(res)) if type(domain) != str: raise ValueError(f"WARNING: domain[]={type(domain)} is not 'str'") + elif domain == "": + raise ValueError(f"WARNING: domain cannot be empty") try: # DEBUG: print("DEBUG: BEFORE res[]:", type(res)) @@ -458,6 +472,8 @@ def update_last_error(domain: str, res: any): # DEBUG: print("DEBUG: domain,res[]:", domain, type(res)) if type(domain) != str: raise ValueError(f"WARNING: domain[]={type(domain)} is not 'str'") + elif domain == "": + raise ValueError(f"WARNING: domain cannot be empty") # DEBUG: print("DEBUG: BEFORE res[]:", type(res)) if isinstance(res, BaseException) or isinstance(res, json.JSONDecodeError): @@ -466,12 +482,12 @@ def update_last_error(domain: str, res: any): # DEBUG: print("DEBUG: AFTER res[]:", type(res)) if type(res) is str: # DEBUG: print(f"DEBUG: Setting last_error_details='{res}'"); - nodeinfos["last_status_code"][domain] = 999 - nodeinfos["last_error_details"][domain] = res + instance_data["last_status_code"][domain] = 999 + instance_data["last_error_details"][domain] = res else: # DEBUG: print(f"DEBUG: Setting last_error_details='{res.reason}'"); - nodeinfos["last_status_code"][domain] = res.status_code - nodeinfos["last_error_details"][domain] = res.reason + instance_data["last_status_code"][domain] = res.status_code + instance_data["last_error_details"][domain] = res.reason # Running pending updated # DEBUG: print(f"DEBUG: Invoking update_instance_data({domain}) ...") @@ -484,9 +500,11 @@ def update_last_error(domain: str, res: any): def update_last_instance_fetch(domain: str): if type(domain) != str: raise ValueError(f"WARNING: domain[]={type(domain)} is not 'str'") + elif domain == "": + raise ValueError(f"WARNING: domain cannot be empty") # DEBUG: print("DEBUG: Updating last_instance_fetch for domain:", domain) - nodeinfos["last_instance_fetch"][domain] = time.time() + instance_data["last_instance_fetch"][domain] = time.time() # Running pending updated # DEBUG: print(f"DEBUG: Invoking update_instance_data({domain}) ...") @@ -497,10 +515,12 @@ def update_last_instance_fetch(domain: str): def update_last_nodeinfo(domain: str): if type(domain) != str: raise ValueError(f"WARNING: domain[]={type(domain)} is not 'str'") + elif domain == "": + raise ValueError(f"WARNING: domain cannot be empty") # DEBUG: print("DEBUG: Updating last_nodeinfo for domain:", domain) - nodeinfos["last_nodeinfo"][domain] = time.time() - nodeinfos["last_updated"][domain] = time.time() + instance_data["last_nodeinfo"][domain] = time.time() + instance_data["last_updated"][domain] = time.time() # Running pending updated # DEBUG: print(f"DEBUG: Invoking update_instance_data({domain}) ...") @@ -511,6 +531,8 @@ def update_last_nodeinfo(domain: str): def get_peers(domain: str, software: str) -> list: if type(domain) != str: raise ValueError(f"WARNING: domain[]={type(domain)} is not 'str'") + elif domain == "": + raise ValueError(f"WARNING: domain cannot be empty") elif type(software) != str and software != None: raise ValueError(f"WARNING: software[]={type(software)} is not 'str'") @@ -575,7 +597,7 @@ def get_peers(domain: str, software: str) -> list: peers.append(row["host"]) # DEBUG: print(f"DEBUG: Adding '{len(peers)}' for domain='{domain}'") - nodeinfos["total_peers"][domain] = len(peers) + instance_data["total_peers"][domain] = len(peers) # DEBUG: print(f"DEBUG: Updating last_instance_fetch for domain='{domain}' ...") update_last_instance_fetch(domain) @@ -607,7 +629,7 @@ def get_peers(domain: str, software: str) -> list: print(f"WARNING: Exception during fetching JSON: domain='{domain}',exception[{type(e)}]:'{str(e)}'") # DEBUG: print(f"DEBUG: Adding '{len(peers)}' for domain='{domain}'") - nodeinfos["total_peers"][domain] = len(peers) + instance_data["total_peers"][domain] = len(peers) # DEBUG: print(f"DEBUG: Updating last_instance_fetch for domain='{domain}' ...") update_last_instance_fetch(domain) @@ -649,7 +671,7 @@ def get_peers(domain: str, software: str) -> list: print(f"WARNING: Exception during fetching JSON: domain='{domain}',exception[{type(e)}]:'{str(e)}'") # DEBUG: print(f"DEBUG: Adding '{len(peers)}' for domain='{domain}'") - nodeinfos["total_peers"][domain] = len(peers) + instance_data["total_peers"][domain] = len(peers) # DEBUG: print(f"DEBUG: Updating last_instance_fetch for domain='{domain}' ...") update_last_instance_fetch(domain) @@ -691,7 +713,7 @@ def get_peers(domain: str, software: str) -> list: update_last_error(domain, e) # DEBUG: print(f"DEBUG: Adding '{len(peers)}' for domain='{domain}'") - nodeinfos["total_peers"][domain] = len(peers) + instance_data["total_peers"][domain] = len(peers) # DEBUG: print(f"DEBUG: Updating last_instance_fetch for domain='{domain}' ...") update_last_instance_fetch(domain) @@ -702,8 +724,12 @@ def get_peers(domain: str, software: str) -> list: def post_json_api(domain: str, path: str, parameter: str, extra_headers: dict = {}) -> dict: if type(domain) != str: raise ValueError(f"WARNING: domain[]={type(domain)} is not 'str'") + elif domain == "": + raise ValueError(f"WARNING: domain cannot be empty") elif type(path) != str: raise ValueError(f"WARNING: path[]={type(path)} is not 'str'") + elif path == "": + raise ValueError(f"WARNING: path cannot be empty") elif type(parameter) != str: raise ValueError(f"WARNING: parameter[]={type(parameter)} is not 'str'") @@ -727,6 +753,8 @@ def post_json_api(domain: str, path: str, parameter: str, extra_headers: dict = def fetch_nodeinfo(domain: str, path: str = None) -> list: if type(domain) != str: raise ValueError(f"WARNING: domain[]={type(domain)} is not 'str'") + elif domain == "": + raise ValueError(f"WARNING: domain cannot be empty") # DEBUG: print("DEBUG: Fetching nodeinfo from domain,path:", domain, path) @@ -760,8 +788,8 @@ def fetch_nodeinfo(domain: str, path: str = None) -> list: # DEBUG: print(f"DEBUG: res.ok={res.ok},res.status_code={res.status_code},data[]='{type(data)}'") if res.ok and isinstance(data, dict): # DEBUG: print("DEBUG: Success:", request) - nodeinfos["detection_mode"][domain] = "STATIC_CHECK" - nodeinfos["nodeinfo_url"][domain] = request + instance_data["detection_mode"][domain] = "STATIC_CHECK" + instance_data["nodeinfo_url"][domain] = request break elif res.ok and isinstance(data, list): # DEBUG: print(f"DEBUG: domain='{domain}' returned a list: '{data}'") @@ -782,6 +810,8 @@ def fetch_nodeinfo(domain: str, path: str = None) -> list: def fetch_wellknown_nodeinfo(domain: str) -> list: if type(domain) != str: raise ValueError(f"WARNING: domain[]={type(domain)} is not 'str'") + elif domain == "": + raise ValueError(f"WARNING: domain cannot be empty") # DEBUG: print("DEBUG: Fetching .well-known info for domain:", domain) data = {} @@ -806,8 +836,8 @@ def fetch_wellknown_nodeinfo(domain: str) -> list: # DEBUG: print("DEBUG: href,res.ok,res.status_code:", link["href"], res.ok, res.status_code) if res.ok and isinstance(data, dict): # DEBUG: print("DEBUG: Found JSON nodeinfo():", len(data)) - nodeinfos["detection_mode"][domain] = "AUTO_DISCOVERY" - nodeinfos["nodeinfo_url"][domain] = link["href"] + instance_data["detection_mode"][domain] = "AUTO_DISCOVERY" + instance_data["nodeinfo_url"][domain] = link["href"] break else: print("WARNING: Unknown 'rel' value:", domain, link["rel"]) @@ -825,8 +855,12 @@ def fetch_wellknown_nodeinfo(domain: str) -> list: def fetch_generator_from_path(domain: str, path: str = "/") -> str: if type(domain) != str: raise ValueError(f"WARNING: domain[]={type(domain)} is not 'str'") + elif domain == "": + raise ValueError(f"WARNING: domain cannot be empty") elif type(path) != str: raise ValueError(f"WARNING: path[]={type(path)} is not 'str'") + elif path == "": + raise ValueError(f"WARNING: domain cannot be empty") # DEBUG: print(f"DEBUG: domain='{domain}',path='{path}' - CALLED!") software = None @@ -849,13 +883,13 @@ def fetch_generator_from_path(domain: str, path: str = "/") -> str: # DEBUG: print("DEBUG: Found generator meta tag:", domain) software = tidyup(generator.get("content")) print(f"INFO: domain='{domain}' is generated by '{software}'") - nodeinfos["detection_mode"][domain] = "GENERATOR" + instance_data["detection_mode"][domain] = "GENERATOR" remove_pending_error(domain) elif isinstance(site_name, bs4.element.Tag): # DEBUG: print("DEBUG: Found property=og:site_name:", domain) sofware = tidyup(site_name.get("content")) print(f"INFO: domain='{domain}' has og:site_name='{software}'") - nodeinfos["detection_mode"][domain] = "SITE_NAME" + instance_data["detection_mode"][domain] = "SITE_NAME" remove_pending_error(domain) except BaseException as e: @@ -891,6 +925,8 @@ def fetch_generator_from_path(domain: str, path: str = "/") -> str: def determine_software(domain: str, path: str = None) -> str: if type(domain) != str: raise ValueError(f"WARNING: domain[]={type(domain)} is not 'str'") + elif domain == "": + raise ValueError(f"WARNING: domain cannot be empty") # DEBUG: print("DEBUG: Determining software for domain,path:", domain, path) software = None @@ -1046,6 +1082,8 @@ def block_instance(blocker: str, blocked: str, reason: str, block_level: str): def is_instance_registered(domain: str) -> bool: if type(domain) != str: raise ValueError(f"WARNING: domain[]={type(domain)} is not 'str'") + elif domain == "": + raise ValueError(f"WARNING: domain cannot be empty") # NOISY-DEBUG: print(f"DEBUG: domain='{domain}' - CALLED!") if not is_cache_initialized("is_registered"): @@ -1068,10 +1106,14 @@ def is_instance_registered(domain: str) -> bool: def add_instance(domain: str, origin: str, originator: str, path: str = None): if type(domain) != str: raise ValueError(f"WARNING: domain[]={type(domain)} is not 'str'") + elif domain == "": + raise ValueError(f"WARNING: domain cannot be empty") elif type(origin) != str and origin != None: raise ValueError(f"WARNING: origin[]={type(origin)} is not 'str'") elif type(originator) != str: raise ValueError(f"WARNING: originator[]={type(originator)} is not 'str'") + elif originator == "": + raise ValueError(f"WARNING: originator cannot be empty") # DEBUG: print("DEBUG: domain,origin,originator,path:", domain, origin, originator, path) if not validators.domain(domain.split("/")[0]): @@ -1100,7 +1142,7 @@ def add_instance(domain: str, origin: str, originator: str, path: str = None): set_cache_key("is_registered", domain, True) - if has_pending_nodeinfos(domain): + if has_pending_instance_data(domain): # DEBUG: print(f"DEBUG: domain='{domain}' has pending nodeinfo being updated ...") update_instance_data(domain) remove_pending_error(domain) @@ -1157,6 +1199,8 @@ def send_bot_post(instance: str, blocks: dict): def get_mastodon_blocks(domain: str) -> dict: if type(domain) != str: raise ValueError(f"WARNING: domain[]={type(domain)} is not 'str'") + elif domain == "": + raise ValueError(f"WARNING: domain cannot be empty") # DEBUG: print("DEBUG: Fetching mastodon blocks from domain:", domain) blocks = { @@ -1204,6 +1248,8 @@ def get_mastodon_blocks(domain: str) -> dict: def get_friendica_blocks(domain: str) -> dict: if type(domain) != str: raise ValueError(f"WARNING: domain[]={type(domain)} is not 'str'") + elif domain == "": + raise ValueError(f"WARNING: domain cannot be empty") # DEBUG: print("DEBUG: Fetching friendica blocks from domain:", domain) blocks = [] @@ -1240,6 +1286,8 @@ def get_friendica_blocks(domain: str) -> dict: def get_misskey_blocks(domain: str) -> dict: if type(domain) != str: raise ValueError(f"WARNING: domain[]={type(domain)} is not 'str'") + elif domain == "": + raise ValueError(f"WARNING: domain cannot be empty") # DEBUG: print("DEBUG: Fetching misskey blocks from domain:", domain) blocks = { diff --git a/fetch_instances.py b/fetch_instances.py index 2311e85..a0f89f9 100755 --- a/fetch_instances.py +++ b/fetch_instances.py @@ -36,7 +36,7 @@ def fetch_instances(domain: str, origin: str, software: str, path: str = None): if (peerlist is None): print("ERROR: Cannot fetch peers:", domain) return - elif fba.has_pending_nodeinfos(domain): + elif fba.has_pending_instance_data(domain): # DEBUG: print(f"DEBUG: domain='{domain}' has pending nodeinfo data, flushing ...") fba.update_instance_data(domain) -- 2.39.5