From 13ec61897653c91a12b160bbae44ea52997650c6 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Roland=20H=C3=A4der?= Date: Sun, 4 Jun 2023 00:35:37 +0200 Subject: [PATCH] Continued: - moved instance-related functions and module-wide variables to 'instances.py' --- fba/__init__.py | 2 +- fba/fba.py | 183 +++++++++-------------------------------------- fba/instances.py | 132 ++++++++++++++++++++++++++++++++++ 3 files changed, 168 insertions(+), 149 deletions(-) create mode 100644 fba/instances.py diff --git a/fba/__init__.py b/fba/__init__.py index 803e5ea..638446a 100644 --- a/fba/__init__.py +++ b/fba/__init__.py @@ -1 +1 @@ -__all__ = ['boot', 'cache', 'fba'] +__all__ = ['boot', 'cache', 'fba', 'instances'] diff --git a/fba/fba.py b/fba/fba.py index 228f98c..db002a9 100644 --- a/fba/fba.py +++ b/fba/fba.py @@ -15,7 +15,6 @@ # along with this program. If not, see . import bs4 -from fba import cache import hashlib import re import reqto @@ -25,6 +24,9 @@ import sys import time import validators +from fba import cache +from fba import instances + with open("config.json") as f: config = json.loads(f.read()) @@ -73,31 +75,6 @@ api_headers = { "Content-Type": "application/json", } -# Found info from node, such as nodeinfo URL, detection mode that needs to be -# written to database. Both arrays must be filled at the same time or else -# update_instance_data() will fail -instance_data = { - # Detection mode: 'AUTO_DISCOVERY', 'STATIC_CHECKS' or 'GENERATOR' - # NULL means all detection methods have failed (maybe still reachable instance) - "detection_mode" : {}, - # Found nodeinfo URL - "nodeinfo_url" : {}, - # Found total peers - "total_peers" : {}, - # Last fetched instances - "last_instance_fetch": {}, - # Last updated - "last_updated" : {}, - # Last blocked - "last_blocked" : {}, - # Last nodeinfo (fetched) - "last_nodeinfo" : {}, - # Last status code - "last_status_code" : {}, - # Last error details - "last_error_details" : {}, -} - language_mapping = { # English -> English "Silenced instances" : "Silenced servers", @@ -171,9 +148,9 @@ def fetch_instances(domain: str, origin: str, software: str, script: str, path: if (peerlist is None): print("ERROR: Cannot fetch peers:", domain) return - elif has_pending_instance_data(domain): + elif instances.has_pending_instance_data(domain): # DEBUG: print(f"DEBUG: domain='{domain}' has pending nodeinfo data, flushing ...") - update_instance_data(domain) + instances.update_instance_data(domain) print(f"INFO: Checking {len(peerlist)} instances from {domain} ...") for instance in peerlist: @@ -206,26 +183,6 @@ def fetch_instances(domain: str, origin: str, software: str, script: str, path: # DEBUG: print("DEBUG: EXIT!") -def set_instance_data(key: str, domain: str, value: any): - # NOISY-DEBUG: print(f"DEBUG: key='{key}',domain='{domain}',value[]='{type(value)}' - CALLED!") - if type(key) != str: - raise ValueError("Parameter key[]='{type(key)}' is not 'str'") - elif key == "": - raise ValueError(f"Parameter 'key' cannot be empty") - elif type(domain) != str: - raise ValueError("Parameter domain[]='{type(domain)}' is not 'str'") - elif domain == "": - raise ValueError(f"Parameter 'domain' cannot be empty") - elif not key in instance_data: - raise ValueError(f"key='{key}' not found in instance_data") - elif not is_primitive(value): - raise ValueError(f"value[]='{type(value)}' is not a primitive type") - - # Set it - instance_data[key][domain] = value - - # DEBUG: print("DEBUG: EXIT!") - def add_peers(rows: dict) -> list: # DEBUG: print(f"DEBUG: rows()={len(rows)} - CALLED!") peers = list() @@ -407,81 +364,11 @@ def update_last_blocked(domain: str): raise ValueError(f"Parameter 'domain' cannot be empty") # DEBUG: print("DEBUG: Updating last_blocked for domain", domain) - set_instance_data("last_blocked", domain, time.time()) + instances.set_instance_data("last_blocked", domain, time.time()) # Running pending updated - # DEBUG: print(f"DEBUG: Invoking update_instance_data({domain}) ...") - update_instance_data(domain) - - # DEBUG: print("DEBUG: EXIT!") - -def has_pending_instance_data(domain: str) -> bool: - # DEBUG: print(f"DEBUG: domain='{domain}' - CALLED!") - if type(domain) != str: - raise ValueError(f"Parameter domain[]={type(domain)} is not 'str'") - elif domain == "": - raise ValueError(f"Parameter 'domain' cannot be empty") - - has_pending = False - for key in instance_data: - # DEBUG: print(f"DEBUG: key='{key}',domain='{domain}',instance_data[key]()='{len(instance_data[key])}'") - if domain in instance_data[key]: - has_pending = True - break - - # DEBUG: print(f"DEBUG: has_pending='{has_pending}' - EXIT!") - return has_pending - -def update_instance_data(domain: str): - # DEBUG: print(f"DEBUG: domain={domain} - CALLED!") - if type(domain) != str: - raise ValueError(f"Parameter domain[]={type(domain)} is not 'str'") - elif domain == "": - raise ValueError(f"Parameter 'domain' cannot be empty") - elif not has_pending_instance_data(domain): - raise Exception(f"Domain '{domain}' has no pending instance data, but function invoked") - - # DEBUG: print(f"DEBUG: Updating nodeinfo for domain='{domain}' ...") - sql_string = '' - fields = list() - for key in instance_data: - # DEBUG: print("DEBUG: key:", key) - if domain in instance_data[key]: - # DEBUG: print(f"DEBUG: Adding '{instance_data[key][domain]}' for key='{key}' ...") - fields.append(instance_data[key][domain]) - sql_string += f" {key} = ?," - - fields.append(domain) - - if sql_string == '': - raise ValueError(f"No fields have been set, but method invoked, domain='{domain}'") - - # DEBUG: print(f"DEBUG: sql_string='{sql_string}',fields()={len(fields)}") - sql_string = "UPDATE instances SET" + sql_string + " last_updated = TIME() WHERE domain = ? LIMIT 1" - # DEBUG: print("DEBUG: sql_string:", sql_string) - - try: - # DEBUG: print("DEBUG: Executing SQL:", sql_string) - cursor.execute(sql_string, fields) - - # DEBUG: print(f"DEBUG: Success! (rowcount={cursor.rowcount })") - if cursor.rowcount == 0: - print(f"WARNING: Did not update any rows: domain='{domain}',fields()={len(fields)} - EXIT!") - return - - connection.commit() - - # DEBUG: print("DEBUG: Deleting instance_data for domain:", domain) - for key in instance_data: - try: - # DEBUG: print("DEBUG: Deleting key:", key) - del instance_data[key][domain] - except: - pass - - except BaseException as e: - print(f"ERROR: failed SQL query: domain='{domain}',sql_string='{sql_string}',exception[{type(e)}]:'{str(e)}'") - sys.exit(255) + # DEBUG: print(f"DEBUG: Invoking instances.update_instance_data({domain}) ...") + instances.update_instance_data(domain) # DEBUG: print("DEBUG: EXIT!") @@ -535,16 +422,16 @@ def update_last_error(domain: str, res: any): # DEBUG: print("DEBUG: AFTER res[]:", type(res)) if type(res) is str: # DEBUG: print(f"DEBUG: Setting last_error_details='{res}'"); - set_instance_data("last_status_code" , domain, 999) - set_instance_data("last_error_details", domain, res) + instances.set_instance_data("last_status_code" , domain, 999) + instances.set_instance_data("last_error_details", domain, res) else: # DEBUG: print(f"DEBUG: Setting last_error_details='{res.reason}'"); - set_instance_data("last_status_code" , domain, res.status_code) - set_instance_data("last_error_details", domain, res.reason) + instances.set_instance_data("last_status_code" , domain, res.status_code) + instances.set_instance_data("last_error_details", domain, res.reason) # Running pending updated - # DEBUG: print(f"DEBUG: Invoking update_instance_data({domain}) ...") - update_instance_data(domain) + # DEBUG: print(f"DEBUG: Invoking instances.update_instance_data({domain}) ...") + instances.update_instance_data(domain) log_error(domain, res) @@ -558,11 +445,11 @@ def update_last_instance_fetch(domain: str): raise ValueError(f"Parameter 'domain' cannot be empty") # DEBUG: print("DEBUG: Updating last_instance_fetch for domain:", domain) - set_instance_data("last_instance_fetch", domain, time.time()) + instances.set_instance_data("last_instance_fetch", domain, time.time()) # Running pending updated - # DEBUG: print(f"DEBUG: Invoking update_instance_data({domain}) ...") - update_instance_data(domain) + # DEBUG: print(f"DEBUG: Invoking instances.update_instance_data({domain}) ...") + instances.update_instance_data(domain) # DEBUG: print("DEBUG: EXIT!") @@ -574,12 +461,12 @@ def update_last_nodeinfo(domain: str): raise ValueError(f"Parameter 'domain' cannot be empty") # DEBUG: print("DEBUG: Updating last_nodeinfo for domain:", domain) - set_instance_data("last_nodeinfo", domain, time.time()) - set_instance_data("last_updated" , domain, time.time()) + instances.set_instance_data("last_nodeinfo", domain, time.time()) + instances.set_instance_data("last_updated" , domain, time.time()) # Running pending updated - # DEBUG: print(f"DEBUG: Invoking update_instance_data({domain}) ...") - update_instance_data(domain) + # DEBUG: print(f"DEBUG: Invoking instances.update_instance_data({domain}) ...") + instances.update_instance_data(domain) # DEBUG: print("DEBUG: EXIT!") @@ -653,7 +540,7 @@ def get_peers(domain: str, software: str) -> list: peers.append(row["host"]) # DEBUG: print(f"DEBUG: Adding '{len(peers)}' for domain='{domain}'") - set_instance_data("total_peers", domain, len(peers)) + instances.set_instance_data("total_peers", domain, len(peers)) # DEBUG: print(f"DEBUG: Updating last_instance_fetch for domain='{domain}' ...") update_last_instance_fetch(domain) @@ -685,7 +572,7 @@ def get_peers(domain: str, software: str) -> list: print(f"WARNING: Exception during fetching JSON: domain='{domain}',exception[{type(e)}]:'{str(e)}'") # DEBUG: print(f"DEBUG: Adding '{len(peers)}' for domain='{domain}'") - set_instance_data("total_peers", domain, len(peers)) + instances.set_instance_data("total_peers", domain, len(peers)) # DEBUG: print(f"DEBUG: Updating last_instance_fetch for domain='{domain}' ...") update_last_instance_fetch(domain) @@ -727,7 +614,7 @@ def get_peers(domain: str, software: str) -> list: print(f"WARNING: Exception during fetching JSON: domain='{domain}',exception[{type(e)}]:'{str(e)}'") # DEBUG: print(f"DEBUG: Adding '{len(peers)}' for domain='{domain}'") - set_instance_data("total_peers", domain, len(peers)) + instances.set_instance_data("total_peers", domain, len(peers)) # DEBUG: print(f"DEBUG: Updating last_instance_fetch for domain='{domain}' ...") update_last_instance_fetch(domain) @@ -769,7 +656,7 @@ def get_peers(domain: str, software: str) -> list: update_last_error(domain, e) # DEBUG: print(f"DEBUG: Adding '{len(peers)}' for domain='{domain}'") - set_instance_data("total_peers", domain, len(peers)) + instances.set_instance_data("total_peers", domain, len(peers)) # DEBUG: print(f"DEBUG: Updating last_instance_fetch for domain='{domain}' ...") update_last_instance_fetch(domain) @@ -846,8 +733,8 @@ def fetch_nodeinfo(domain: str, path: str = None) -> list: # DEBUG: print(f"DEBUG: res.ok={res.ok},res.status_code={res.status_code},data[]='{type(data)}'") if res.ok and isinstance(data, dict): # DEBUG: print("DEBUG: Success:", request) - set_instance_data("detection_mode", domain, "STATIC_CHECK") - set_instance_data("nodeinfo_url" , domain, request) + instances.set_instance_data("detection_mode", domain, "STATIC_CHECK") + instances.set_instance_data("nodeinfo_url" , domain, request) break elif res.ok and isinstance(data, list): # DEBUG: print(f"DEBUG: domain='{domain}' returned a list: '{data}'") @@ -895,8 +782,8 @@ def fetch_wellknown_nodeinfo(domain: str) -> list: # DEBUG: print("DEBUG: href,res.ok,res.status_code:", link["href"], res.ok, res.status_code) if res.ok and isinstance(data, dict): # DEBUG: print("DEBUG: Found JSON nodeinfo():", len(data)) - set_instance_data("detection_mode", domain, "AUTO_DISCOVERY") - set_instance_data("nodeinfo_url" , domain, link["href"]) + instances.set_instance_data("detection_mode", domain, "AUTO_DISCOVERY") + instances.set_instance_data("nodeinfo_url" , domain, link["href"]) break else: print("WARNING: Unknown 'rel' value:", domain, link["rel"]) @@ -943,13 +830,13 @@ def fetch_generator_from_path(domain: str, path: str = "/") -> str: # DEBUG: print("DEBUG: Found generator meta tag:", domain) software = tidyup_domain(generator.get("content")) print(f"INFO: domain='{domain}' is generated by '{software}'") - set_instance_data("detection_mode", domain, "GENERATOR") + instances.set_instance_data("detection_mode", domain, "GENERATOR") remove_pending_error(domain) elif isinstance(site_name, bs4.element.Tag): # DEBUG: print("DEBUG: Found property=og:site_name:", domain) sofware = tidyup_domain(site_name.get("content")) print(f"INFO: domain='{domain}' has og:site_name='{software}'") - set_instance_data("detection_mode", domain, "SITE_NAME") + instances.set_instance_data("detection_mode", domain, "SITE_NAME") remove_pending_error(domain) except BaseException as e: @@ -1259,11 +1146,11 @@ def add_instance(domain: str, origin: str, originator: str, path: str = None): cache.set_cache_key("is_registered", domain, True) - if has_pending_instance_data(domain): + if instances.has_pending_instance_data(domain): # DEBUG: print(f"DEBUG: domain='{domain}' has pending nodeinfo being updated ...") - set_instance_data("last_status_code" , domain, None) - set_instance_data("last_error_details", domain, None) - update_instance_data(domain) + instances.set_instance_data("last_status_code" , domain, None) + instances.set_instance_data("last_error_details", domain, None) + instances.update_instance_data(domain) remove_pending_error(domain) if domain in pending_errors: diff --git a/fba/instances.py b/fba/instances.py new file mode 100644 index 0000000..9fa8581 --- /dev/null +++ b/fba/instances.py @@ -0,0 +1,132 @@ +# Fedi API Block - An aggregator for fetching blocking data from fediverse nodes +# Copyright (C) 2023 Free Software Foundation +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published +# by the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +import sys + +# Found info from node, such as nodeinfo URL, detection mode that needs to be +# written to database. Both arrays must be filled at the same time or else +# update_instance_data() will fail +instance_data = { + # Detection mode: 'AUTO_DISCOVERY', 'STATIC_CHECKS' or 'GENERATOR' + # NULL means all detection methods have failed (maybe still reachable instance) + "detection_mode" : {}, + # Found nodeinfo URL + "nodeinfo_url" : {}, + # Found total peers + "total_peers" : {}, + # Last fetched instances + "last_instance_fetch": {}, + # Last updated + "last_updated" : {}, + # Last blocked + "last_blocked" : {}, + # Last nodeinfo (fetched) + "last_nodeinfo" : {}, + # Last status code + "last_status_code" : {}, + # Last error details + "last_error_details" : {}, +} + +def set_instance_data(key: str, domain: str, value: any): + # NOISY-DEBUG: print(f"DEBUG: key='{key}',domain='{domain}',value[]='{type(value)}' - CALLED!") + if type(key) != str: + raise ValueError("Parameter key[]='{type(key)}' is not 'str'") + elif key == "": + raise ValueError(f"Parameter 'key' cannot be empty") + elif type(domain) != str: + raise ValueError("Parameter domain[]='{type(domain)}' is not 'str'") + elif domain == "": + raise ValueError(f"Parameter 'domain' cannot be empty") + elif not key in instance_data: + raise ValueError(f"key='{key}' not found in instance_data") + elif not is_primitive(value): + raise ValueError(f"value[]='{type(value)}' is not a primitive type") + + # Set it + instance_data[key][domain] = value + + # DEBUG: print("DEBUG: EXIT!") + +def has_pending_instance_data(domain: str) -> bool: + # DEBUG: print(f"DEBUG: domain='{domain}' - CALLED!") + if type(domain) != str: + raise ValueError(f"Parameter domain[]={type(domain)} is not 'str'") + elif domain == "": + raise ValueError(f"Parameter 'domain' cannot be empty") + + has_pending = False + for key in instance_data: + # DEBUG: print(f"DEBUG: key='{key}',domain='{domain}',instance_data[key]()='{len(instance_data[key])}'") + if domain in instance_data[key]: + has_pending = True + break + + # DEBUG: print(f"DEBUG: has_pending='{has_pending}' - EXIT!") + return has_pending + +def update_instance_data(domain: str): + # DEBUG: print(f"DEBUG: domain={domain} - CALLED!") + if type(domain) != str: + raise ValueError(f"Parameter domain[]={type(domain)} is not 'str'") + elif domain == "": + raise ValueError(f"Parameter 'domain' cannot be empty") + elif not has_pending_instance_data(domain): + raise Exception(f"Domain '{domain}' has no pending instance data, but function invoked") + + # DEBUG: print(f"DEBUG: Updating nodeinfo for domain='{domain}' ...") + sql_string = '' + fields = list() + for key in instance_data: + # DEBUG: print("DEBUG: key:", key) + if domain in instance_data[key]: + # DEBUG: print(f"DEBUG: Adding '{instance_data[key][domain]}' for key='{key}' ...") + fields.append(instance_data[key][domain]) + sql_string += f" {key} = ?," + + fields.append(domain) + + if sql_string == '': + raise ValueError(f"No fields have been set, but method invoked, domain='{domain}'") + + # DEBUG: print(f"DEBUG: sql_string='{sql_string}',fields()={len(fields)}") + sql_string = "UPDATE instances SET" + sql_string + " last_updated = TIME() WHERE domain = ? LIMIT 1" + # DEBUG: print("DEBUG: sql_string:", sql_string) + + try: + # DEBUG: print("DEBUG: Executing SQL:", sql_string) + cursor.execute(sql_string, fields) + + # DEBUG: print(f"DEBUG: Success! (rowcount={cursor.rowcount })") + if cursor.rowcount == 0: + print(f"WARNING: Did not update any rows: domain='{domain}',fields()={len(fields)} - EXIT!") + return + + connection.commit() + + # DEBUG: print("DEBUG: Deleting instance_data for domain:", domain) + for key in instance_data: + try: + # DEBUG: print("DEBUG: Deleting key:", key) + del instance_data[key][domain] + except: + pass + + except BaseException as e: + print(f"ERROR: failed SQL query: domain='{domain}',sql_string='{sql_string}',exception[{type(e)}]:'{str(e)}'") + sys.exit(255) + + # DEBUG: print("DEBUG: EXIT!") -- 2.39.5