From 8d3e53de54a52744e2245e23ec357571aa8cccf2 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Roland=20H=C3=A4der?= Date: Mon, 12 Jun 2023 03:51:23 +0200 Subject: [PATCH] Continued: - moved version-related stuff to own helpers module --- fba/fba.py | 133 ------------------------------------ fba/federation.py | 22 +++--- fba/helpers/__init__.py | 1 + fba/helpers/version.py | 148 ++++++++++++++++++++++++++++++++++++++++ fba/networks/pleroma.py | 3 - 5 files changed, 160 insertions(+), 147 deletions(-) create mode 100644 fba/helpers/version.py diff --git a/fba/fba.py b/fba/fba.py index 4ae3c51..9cd6b9c 100644 --- a/fba/fba.py +++ b/fba/fba.py @@ -14,7 +14,6 @@ # along with this program. If not, see . import hashlib -import re import json import sqlite3 import time @@ -30,144 +29,12 @@ from fba import network connection = sqlite3.connect("blocks.db") cursor = connection.cursor() -# Pattern instance for version numbers -patterns = [ - # semantic version number (with v|V) prefix) - re.compile("^(?Pv|V{0,1})(\.{0,1})(?P0|[1-9]\d*)\.(?P0+|[1-9]\d*)(\.(?P0+|[1-9]\d*)(?:-(?P(?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*)(?:\.(?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*))*))?(?:\+(?P[0-9a-zA-Z-]+(?:\.[0-9a-zA-Z-]+)*))?)?$"), - # non-sematic, e.g. 1.2.3.4 - re.compile("^(?Pv|V{0,1})(\.{0,1})(?P0|[1-9]\d*)\.(?P0+|[1-9]\d*)(\.(?P0+|[1-9]\d*)(\.(?P0|[1-9]\d*))?)$"), - # non-sematic, e.g. 2023-05[-dev] - re.compile("^(?P[1-9]{1}[0-9]{3})\.(?P[0-9]{2})(-dev){0,1}$"), - # non-semantic, e.g. abcdef0 - re.compile("^[a-f0-9]{7}$"), -] - ##### Other functions ##### def is_primitive(var: any) -> bool: # DEBUG: print(f"DEBUG: var[]='{type(var)}' - CALLED!") return type(var) in {int, str, float, bool} or var is None -def remove_version(software: str) -> str: - # DEBUG: print(f"DEBUG: software='{software}' - CALLED!") - if not "." in software and " " not in software: - print(f"WARNING: software='{software}' does not contain a version number.") - return software - - temp = software - if ";" in software: - temp = software.split(";")[0] - elif "," in software: - temp = software.split(",")[0] - elif " - " in software: - temp = software.split(" - ")[0] - - # DEBUG: print(f"DEBUG: software='{software}'") - version = None - if " " in software: - version = temp.split(" ")[-1] - elif "/" in software: - version = temp.split("/")[-1] - elif "-" in software: - version = temp.split("-")[-1] - else: - # DEBUG: print(f"DEBUG: Was not able to find common seperator, returning untouched software='{software}'") - return software - - match = None - # DEBUG: print(f"DEBUG: Checking {len(patterns)} patterns ...") - for pattern in patterns: - # Run match() - match = pattern.match(version) - - # DEBUG: print(f"DEBUG: match[]='{type(match)}'") - if isinstance(match, re.Match): - # DEBUG: print(f"DEBUG: version='{version}' is matching pattern='{pattern}'") - break - - # DEBUG: print(f"DEBUG: version[{type(version)}]='{version}',match='{match}'") - if not isinstance(match, re.Match): - print(f"WARNING: version='{version}' does not match regex, leaving software='{software}' untouched.") - return software - - # DEBUG: print(f"DEBUG: Found valid version number: '{version}', removing it ...") - end = len(temp) - len(version) - 1 - - # DEBUG: print(f"DEBUG: end[{type(end)}]={end}") - software = temp[0:end].strip() - if " version" in software: - # DEBUG: print(f"DEBUG: software='{software}' contains word ' version'") - software = strip_until(software, " version") - - # DEBUG: print(f"DEBUG: software='{software}' - EXIT!") - return software - -def strip_powered_by(software: str) -> str: - # DEBUG: print(f"DEBUG: software='{software}' - CALLED!") - if not isinstance(software, str): - raise ValueError(f"Parameter software[]='{type(software)}' is not 'str'") - elif software == "": - raise ValueError("Parameter 'software' is empty") - elif "powered by" not in software: - print(f"WARNING: Cannot find 'powered by' in software='{software}'!") - return software - - start = software.find("powered by ") - # DEBUG: print(f"DEBUG: start[{type(start)}]='{start}'") - - software = software[start + 11:].strip() - # DEBUG: print(f"DEBUG: software='{software}'") - - software = strip_until(software, " - ") - - # DEBUG: print(f"DEBUG: software='{software}' - EXIT!") - return software - -def strip_hosted_on(software: str) -> str: - # DEBUG: print(f"DEBUG: software='{software}' - CALLED!") - if not isinstance(software, str): - raise ValueError(f"Parameter software[]='{type(software)}' is not 'str'") - elif software == "": - raise ValueError("Parameter 'software' is empty") - elif "hosted on" not in software: - print(f"WARNING: Cannot find 'hosted on' in '{software}'!") - return software - - end = software.find("hosted on ") - # DEBUG: print(f"DEBUG: end[{type(end)}]='{end}'") - - software = software[0, end].strip() - # DEBUG: print(f"DEBUG: software='{software}'") - - software = strip_until(software, " - ") - - # DEBUG: print(f"DEBUG: software='{software}' - EXIT!") - return software - -def strip_until(software: str, until: str) -> str: - # DEBUG: print(f"DEBUG: software='{software}',until='{until}' - CALLED!") - if not isinstance(software, str): - raise ValueError(f"Parameter software[]='{type(software)}' is not 'str'") - elif software == "": - raise ValueError("Parameter 'software' is empty") - elif not isinstance(until, str): - raise ValueError(f"Parameter until[]='{type(until)}' is not 'str'") - elif until == "": - raise ValueError("Parameter 'until' is empty") - elif not until in software: - print(f"WARNING: Cannot find '{until}' in '{software}'!") - return software - - # Next, strip until part - end = software.find(until) - - # DEBUG: print(f"DEBUG: end[{type(end)}]='{end}'") - if end > 0: - software = software[0:end].strip() - - # DEBUG: print(f"DEBUG: software='{software}' - EXIT!") - return software - def get_hash(domain: str) -> str: if not isinstance(domain, str): raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'") diff --git a/fba/federation.py b/fba/federation.py index da7277c..985d71a 100644 --- a/fba/federation.py +++ b/fba/federation.py @@ -19,11 +19,11 @@ import validators from fba import blacklist from fba import config from fba import csrf -from fba import fba from fba import instances from fba import network from fba.helpers import tidyup +from fba.helpers import version from fba.networks import lemmy from fba.networks import misskey @@ -354,21 +354,21 @@ def fetch_generator_from_path(domain: str, path: str = "/") -> str: software = None elif isinstance(software, str) and ("." in software or " " in software): # DEBUG: print(f"DEBUG: software='{software}' may contain a version number, domain='{domain}', removing it ...") - software = fba.remove_version(software) + software = version.remove(software) # DEBUG: print(f"DEBUG: software[]='{type(software)}'") if isinstance(software, str) and " powered by " in software: # DEBUG: print(f"DEBUG: software='{software}' has 'powered by' in it") - software = fba.remove_version(fba.strip_powered_by(software)) + software = version.remove(version.strip_powered_by(software)) elif isinstance(software, str) and " hosted on " in software: # DEBUG: print(f"DEBUG: software='{software}' has 'hosted on' in it") - software = fba.remove_version(fba.strip_hosted_on(software)) + software = version.remove(version.strip_hosted_on(software)) elif isinstance(software, str) and " by " in software: # DEBUG: print(f"DEBUG: software='{software}' has ' by ' in it") - software = fba.strip_until(software, " by ") + software = version.strip_until(software, " by ") elif isinstance(software, str) and " see " in software: # DEBUG: print(f"DEBUG: software='{software}' has ' see ' in it") - software = fba.strip_until(software, " see ") + software = version.strip_until(software, " see ") # DEBUG: print(f"DEBUG: software='{software}' - EXIT!") return software @@ -430,13 +430,13 @@ def determine_software(domain: str, path: str = None) -> str: software = tidyup.domain(software.split("|")[0]) elif "powered by" in software: # DEBUG: print(f"DEBUG: software='{software}' has 'powered by' in it") - software = fba.strip_powered_by(software) + software = version.strip_powered_by(software) elif isinstance(software, str) and " by " in software: # DEBUG: print(f"DEBUG: software='{software}' has ' by ' in it") - software = fba.strip_until(software, " by ") + software = version.strip_until(software, " by ") elif isinstance(software, str) and " see " in software: # DEBUG: print(f"DEBUG: software='{software}' has ' see ' in it") - software = fba.strip_until(software, " see ") + software = version.strip_until(software, " see ") # DEBUG: print(f"DEBUG: software[]='{type(software)}'") if software == "": @@ -449,12 +449,12 @@ def determine_software(domain: str, path: str = None) -> str: software = fetch_generator_from_path(domain) elif len(str(software)) > 0 and ("." in software or " " in software): # DEBUG: print(f"DEBUG: software='{software}' may contain a version number, domain='{domain}', removing it ...") - software = fba.remove_version(software) + software = version.remove(software) # DEBUG: print(f"DEBUG: software[]='{type(software)}'") if isinstance(software, str) and "powered by" in software: # DEBUG: print(f"DEBUG: software='{software}' has 'powered by' in it") - software = fba.remove_version(fba.strip_powered_by(software)) + software = version.remove(version.strip_powered_by(software)) # DEBUG: print("DEBUG: Returning domain,software:", domain, software) return software diff --git a/fba/helpers/__init__.py b/fba/helpers/__init__.py index 9f8b143..e9ddf42 100644 --- a/fba/helpers/__init__.py +++ b/fba/helpers/__init__.py @@ -16,4 +16,5 @@ __all__ = [ 'dicts', 'tidyup', + 'version', ] diff --git a/fba/helpers/version.py b/fba/helpers/version.py new file mode 100644 index 0000000..6e16661 --- /dev/null +++ b/fba/helpers/version.py @@ -0,0 +1,148 @@ +# Copyright (C) 2023 Free Software Foundation +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published +# by the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +import re + +# Pattern instance for version numbers +patterns = [ + # semantic version number (with v|V) prefix) + re.compile("^(?Pv|V{0,1})(\.{0,1})(?P0|[1-9]\d*)\.(?P0+|[1-9]\d*)(\.(?P0+|[1-9]\d*)(?:-(?P(?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*)(?:\.(?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*))*))?(?:\+(?P[0-9a-zA-Z-]+(?:\.[0-9a-zA-Z-]+)*))?)?$"), + # non-sematic, e.g. 1.2.3.4 + re.compile("^(?Pv|V{0,1})(\.{0,1})(?P0|[1-9]\d*)\.(?P0+|[1-9]\d*)(\.(?P0+|[1-9]\d*)(\.(?P0|[1-9]\d*))?)$"), + # non-sematic, e.g. 2023-05[-dev] + re.compile("^(?P[1-9]{1}[0-9]{3})\.(?P[0-9]{2})(-dev){0,1}$"), + # non-semantic, e.g. abcdef0 + re.compile("^[a-f0-9]{7}$"), +] + +def remove(software: str) -> str: + # DEBUG: print(f"DEBUG: software='{software}' - CALLED!") + if not "." in software and " " not in software: + print(f"WARNING: software='{software}' does not contain a version number.") + return software + + temp = software + if ";" in software: + temp = software.split(";")[0] + elif "," in software: + temp = software.split(",")[0] + elif " - " in software: + temp = software.split(" - ")[0] + + # DEBUG: print(f"DEBUG: software='{software}'") + version = None + if " " in software: + version = temp.split(" ")[-1] + elif "/" in software: + version = temp.split("/")[-1] + elif "-" in software: + version = temp.split("-")[-1] + else: + # DEBUG: print(f"DEBUG: Was not able to find common seperator, returning untouched software='{software}'") + return software + + match = None + # DEBUG: print(f"DEBUG: Checking {len(patterns)} patterns ...") + for pattern in patterns: + # Run match() + match = pattern.match(version) + + # DEBUG: print(f"DEBUG: match[]='{type(match)}'") + if isinstance(match, re.Match): + # DEBUG: print(f"DEBUG: version='{version}' is matching pattern='{pattern}'") + break + + # DEBUG: print(f"DEBUG: version[{type(version)}]='{version}',match='{match}'") + if not isinstance(match, re.Match): + print(f"WARNING: version='{version}' does not match regex, leaving software='{software}' untouched.") + return software + + # DEBUG: print(f"DEBUG: Found valid version number: '{version}', removing it ...") + end = len(temp) - len(version) - 1 + + # DEBUG: print(f"DEBUG: end[{type(end)}]={end}") + software = temp[0:end].strip() + if " version" in software: + # DEBUG: print(f"DEBUG: software='{software}' contains word ' version'") + software = strip_until(software, " version") + + # DEBUG: print(f"DEBUG: software='{software}' - EXIT!") + return software + +def strip_powered_by(software: str) -> str: + # DEBUG: print(f"DEBUG: software='{software}' - CALLED!") + if not isinstance(software, str): + raise ValueError(f"Parameter software[]='{type(software)}' is not 'str'") + elif software == "": + raise ValueError("Parameter 'software' is empty") + elif "powered by" not in software: + print(f"WARNING: Cannot find 'powered by' in software='{software}'!") + return software + + start = software.find("powered by ") + # DEBUG: print(f"DEBUG: start[{type(start)}]='{start}'") + + software = software[start + 11:].strip() + # DEBUG: print(f"DEBUG: software='{software}'") + + software = strip_until(software, " - ") + + # DEBUG: print(f"DEBUG: software='{software}' - EXIT!") + return software + +def strip_hosted_on(software: str) -> str: + # DEBUG: print(f"DEBUG: software='{software}' - CALLED!") + if not isinstance(software, str): + raise ValueError(f"Parameter software[]='{type(software)}' is not 'str'") + elif software == "": + raise ValueError("Parameter 'software' is empty") + elif "hosted on" not in software: + print(f"WARNING: Cannot find 'hosted on' in '{software}'!") + return software + + end = software.find("hosted on ") + # DEBUG: print(f"DEBUG: end[{type(end)}]='{end}'") + + software = software[0, end].strip() + # DEBUG: print(f"DEBUG: software='{software}'") + + software = strip_until(software, " - ") + + # DEBUG: print(f"DEBUG: software='{software}' - EXIT!") + return software + +def strip_until(software: str, until: str) -> str: + # DEBUG: print(f"DEBUG: software='{software}',until='{until}' - CALLED!") + if not isinstance(software, str): + raise ValueError(f"Parameter software[]='{type(software)}' is not 'str'") + elif software == "": + raise ValueError("Parameter 'software' is empty") + elif not isinstance(until, str): + raise ValueError(f"Parameter until[]='{type(until)}' is not 'str'") + elif until == "": + raise ValueError("Parameter 'until' is empty") + elif not until in software: + print(f"WARNING: Cannot find '{until}' in '{software}'!") + return software + + # Next, strip until part + end = software.find(until) + + # DEBUG: print(f"DEBUG: end[{type(end)}]='{end}'") + if end > 0: + software = software[0:end].strip() + + # DEBUG: print(f"DEBUG: software='{software}' - EXIT!") + return software diff --git a/fba/networks/pleroma.py b/fba/networks/pleroma.py index 186c8d4..36f8535 100644 --- a/fba/networks/pleroma.py +++ b/fba/networks/pleroma.py @@ -17,11 +17,8 @@ import inspect import validators -import bs4 - from fba import blacklist from fba import blocks -from fba import config from fba import fba from fba import federation from fba import instances -- 2.39.5