From: Roland Häder Date: Fri, 26 May 2023 04:41:53 +0000 (+0200) Subject: Continued: X-Git-Url: https://git.mxchange.org/?a=commitdiff_plain;h=059a2b26540fba6415492894939da7754df9d928;p=fba.git Continued: - also strip out " by " and " see " (self-advertisement) - same with " version" - some version numbers had uncommon long patch levels, e.g. 8.0.0000 --- diff --git a/fba.py b/fba.py index 08e489f..14caf61 100644 --- a/fba.py +++ b/fba.py @@ -92,9 +92,9 @@ cursor = connection.cursor() # Pattern instance for version numbers patterns = [ # semantic version number (with v|V) prefix) - re.compile("^(?Pv|V{0,1})(\.{0,1})(?P0|[1-9]\d*)\.(?P0|[1-9]\d*)(\.(?P0|[1-9]\d*)(?:-(?P(?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*)(?:\.(?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*))*))?(?:\+(?P[0-9a-zA-Z-]+(?:\.[0-9a-zA-Z-]+)*))?)?$"), + re.compile("^(?Pv|V{0,1})(\.{0,1})(?P0|[1-9]\d*)\.(?P0+|[1-9]\d*)(\.(?P0+|[1-9]\d*)(?:-(?P(?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*)(?:\.(?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*))*))?(?:\+(?P[0-9a-zA-Z-]+(?:\.[0-9a-zA-Z-]+)*))?)?$"), # non-sematic, e.g. 1.2.3.4 - re.compile("^(?Pv|V{0,1})(\.{0,1})(?P0|[1-9]\d*)\.(?P0|[1-9]\d*)(\.(?P0|[1-9]\d*)(\.(?P0|[1-9]\d*))?)$"), + re.compile("^(?Pv|V{0,1})(\.{0,1})(?P0|[1-9]\d*)\.(?P0+|[1-9]\d*)(\.(?P0+|[1-9]\d*)(\.(?P0|[1-9]\d*))?)$"), # non-sematic, e.g. 2023-05 re.compile("^(?P[1-9]{1}[0-9]{3})\.(?P[0-9]{2})$") ] @@ -136,13 +136,19 @@ def remove_version(software: str) -> str: # NOISY-DEBUG: print(f"DEBUG: end[{type(end)}]={end}") software = software[0:end].strip() + if " version" in software: + # NOISY-DEBUG: print(f"DEBUG: software='{software}' contains word ' version'") + software = strip_until(software, " version") # NOISY-DEBUG: print(f"DEBUG: software='{software}' - EXIT!") return software def strip_powered_by(software: str) -> str: # NOISY-DEBUG: print(f"DEBUG: software='{software}' - CALLED!") - if not "powered by" in software: + if software == "": + print(f"ERROR: Bad method call, 'software' is empty") + raise Exception("Parameter 'software' is empty") + elif not "powered by" in software: print(f"WARNING: Cannot find 'powered by' in '{software}'!") return software @@ -152,8 +158,26 @@ def strip_powered_by(software: str) -> str: software = software[start + 11:].strip() # NOISY-DEBUG: print(f"DEBUG: software='{software}'") - # Next, strip of ' - ' part - end = software.find(" - ") + software = strip_until(software, " - ") + + # NOISY-DEBUG: print(f"DEBUG: software='{software}' - EXIT!") + return software + +def strip_until(software: str, until: str) -> str: + # NOISY-DEBUG: print(f"DEBUG: software='{software}',until='{until}' - CALLED!") + if software == "": + print(f"ERROR: Bad method call, 'software' is empty") + raise Exception("Parameter 'software' is empty") + elif until == "": + print(f"ERROR: Bad method call, 'until' is empty") + raise Exception("Parameter 'until' is empty") + elif not until in software: + print(f"WARNING: Cannot find 'powered by' in '{software}'!") + return software + + # Next, strip until part + end = software.find(until) + # NOISY-DEBUG: print(f"DEBUG: end[{type(end)}]='{end}'") if end > 0: software = software[0:end].strip() @@ -527,6 +551,12 @@ def fetch_generator_from_path(domain: str, path: str = "/") -> str: if type(software) is str and "powered by" in software: # NOISY-DEBUG: print(f"DEBUG: software='{software}' has 'powered by' in it") software = remove_version(strip_powered_by(software)) + elif type(software) is str and " by " in software: + # NOISY-DEBUG: print(f"DEBUG: software='{software}' has ' by ' in it") + software = strip_until(software, " by ") + elif type(software) is str and " see " in software: + # NOISY-DEBUG: print(f"DEBUG: software='{software}' has ' see ' in it") + software = strip_until(software, " see ") # NOISY-DEBUG: print(f"DEBUG: software='{software}' - EXIT!") return software @@ -574,8 +604,14 @@ def determine_software(domain: str) -> str: print("WARNING: Spliting of pipe:", software) software = tidyup(software.split("|")[0]); elif "powered by" in software: - print(f"DEBUG: software='{software}' has 'powered by' in it") + # NOISY-DEBUG: print(f"DEBUG: software='{software}' has 'powered by' in it") software = strip_powered_by(software) + elif type(software) is str and " by " in software: + # NOISY-DEBUG: print(f"DEBUG: software='{software}' has ' by ' in it") + software = strip_until(software, " by ") + elif type(software) is str and " see " in software: + # NOISY-DEBUG: print(f"DEBUG: software='{software}' has ' see ' in it") + software = strip_until(software, " see ") # NOISY-DEBUG: print(f"DEBUG: software[]={type(software)}") if software == "":