From cda899732c4311ce300d5623cefe9eead55d6e15 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Roland=20H=C3=A4der?= Date: Thu, 25 May 2023 22:07:18 +0200 Subject: [PATCH] Continued: - try to strip off version numbers from software name - remove_version() will output a warning and return 'software' unmodified if it fails to match version number against regex --- fba.py | 49 ++++++++++++++++++++++++++++++++++++++++++++-- fetch_instances.py | 4 ++-- 2 files changed, 49 insertions(+), 4 deletions(-) diff --git a/fba.py b/fba.py index b4714dd..502b9b2 100644 --- a/fba.py +++ b/fba.py @@ -88,6 +88,40 @@ get_peers_url = "/api/v1/instance/peers" connection = sqlite3.connect("blocks.db") cursor = connection.cursor() +# Pattern instance for version numbers +pattern = re.compile("^(?Pv|V{0,1})(\.{0,1})(?P0|[1-9]\d*)\.(?P0|[1-9]\d*)(\.(?P0|[1-9]\d*)(?:-(?P(?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*)(?:\.(?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*))*))?(?:\+(?P[0-9a-zA-Z-]+(?:\.[0-9a-zA-Z-]+)*))?)?$") + +def remove_version(software: str) -> str: + # NOISY-DEBUG: print(f"DEBUG: software='{software}' - CALLED!") + if not "." in software: + print(f"WARNING: software='{software}' does not contain a version number.") + raise + + version = None + if " " in software: + version = software.split(" ")[-1] + elif "/" in software: + version = software.split("/")[-1] + elif "-" in software: + version = software.split("-")[-1] + + # Run match() + matches = pattern.match(version) + + # NOISY-DEBUG: print(f"DEBUG: version[{type(version)}]='{version}',matches='{matches}'") + if type(matches) is not re.Match: + print(f"WARNING: version='{version}' does not match regex, leaving software='{software}' untouched.") + return software + + # NOISY-DEBUG: print(f"DEBUG: Found valid version number: '{version}', removing it ...") + end = len(software) - len(version) + + # NOISY-DEBUG: print(f"DEBUG: end[{type(end)}]={end}") + software = software[0:end].strip() + + # NOISY-DEBUG: print(f"DEBUG: software='{software}' - EXIT!") + return software + def is_blacklisted(domain: str) -> bool: blacklisted = False for peer in blacklist: @@ -151,7 +185,7 @@ def update_nodeinfos(domain: str): print("WARNING: Did not update any rows:", domain) except BaseException as e: - print(f"ERROR: failed SQL query: domain='{domain}',sql='{sql}',exception='{e}'") + print(f"ERROR: failed SQL query: domain='{domain}',sql='{sql}',exception:'{e}'") sys.exit(255) # NOISY-DEBUG: print("DEBUG: Deleting nodeinfos for domain:", domain) @@ -421,7 +455,7 @@ def fetch_generator_from_path(domain: str, path: str = "/") -> str: # NOISY-DEBUG: print(f"DEBUG: Fetching '{path}' from '{domain}' ...") res = reqto.get(f"https://{domain}{path}", headers=headers, timeout=(config["connection_timeout"], config["read_timeout"])) - # NOISY-DEBUG: print("DEBUG: domain,res.ok,res.status_code:", domain, res.ok, res.status_code) + # NOISY-DEBUG: print("DEBUG: domain,res.ok,res.status_code,res.text[]:", domain, res.ok, res.status_code, type(res.text)) if res.ok and res.status_code < 300 and len(res.text) > 0: # NOISY-DEBUG: print("DEBUG: Search for :", domain) doc = bs4.BeautifulSoup(res.text, "html.parser") @@ -441,9 +475,17 @@ def fetch_generator_from_path(domain: str, path: str = "/") -> str: update_last_error(domain, e) pass + # NOISY-DEBUG: print(f"DEBUG: software[]={type(software)}") + if type(software) is str and software == "": + software = None + elif type(software) is str and "." in software: + # NOISY-DEBUG: print(f"DEBUG: software='{software}' may contain a version number, domain='{domain}', removing it ...") + software = remove_version(software) + # NOISY-DEBUG: print(f"DEBUG: software='{software}' - EXIT!") return software + def determine_software(domain: str) -> str: # NOISY-DEBUG: print("DEBUG: Determining software for domain:", domain) software = None @@ -496,6 +538,9 @@ def determine_software(domain: str) -> str: if str(software) == "": # NOISY-DEBUG: print(f"DEBUG: software for '{domain}' was not detected, trying generator ...") software = fetch_generator_from_path(domain) + elif len(str(software)) > 0 and "." in software: + # NOISY-DEBUG: print(f"DEBUG: software='{software}' may contain a version number, domain='{domain}', removing it ...") + software = remove_version(software) # NOISY-DEBUG: print("DEBUG: Returning domain,software:", domain, software) return software diff --git a/fetch_instances.py b/fetch_instances.py index 9aebc35..20383cf 100644 --- a/fetch_instances.py +++ b/fetch_instances.py @@ -53,8 +53,8 @@ def fetch_instances(domain: str, origin: str, software: str): fba.connection.commit() - except Exception as e: - print("ERROR:", e, instance) + except BaseException as e: + print(f"ERROR: instance='{instance}',exception:'{e}'") continue instance = sys.argv[1] -- 2.39.5