From: Roland Häder Date: Sat, 17 Jun 2023 09:47:19 +0000 (+0200) Subject: Continued: X-Git-Url: https://git.mxchange.org/?a=commitdiff_plain;h=5d699e50168a56c86cd35c9962f51730053d9585;p=fba.git Continued: - in the end, a missing "t" caused a lot of 'sofware' to be None (NULL) now --- diff --git a/fba/federation.py b/fba/federation.py index eebbfd7..55a0b21 100644 --- a/fba/federation.py +++ b/fba/federation.py @@ -364,16 +364,20 @@ def fetch_generator_from_path(domain: str, path: str = "/") -> str: site_name = doc.find("meta", {"property": "og:site_name"}) # DEBUG: print(f"DEBUG: generator='{generator}',site_name='{site_name}'") - if isinstance(generator, bs4.element.Tag): - # DEBUG: print("DEBUG: Found generator meta tag:", domain) + if isinstance(generator, bs4.element.Tag) and isinstance(generator.get("content"), str): + print("DEBUG: Found generator meta tag:", domain) software = tidyup.domain(generator.get("content")) - print(f"INFO: domain='{domain}' is generated by '{software}'") - instances.set_detection_mode(domain, "GENERATOR") + # DEBUG: print(f"DEBUG: software[{type(software)}]='{software}'") + if software is not None and software != "": + print(f"INFO: domain='{domain}' is generated by '{software}'") + instances.set_detection_mode(domain, "GENERATOR") elif isinstance(site_name, bs4.element.Tag) and isinstance(site_name.get("content"), str): # DEBUG: print("DEBUG: Found property=og:site_name:", domain) - sofware = tidyup.domain(site_name.get("content")) - print(f"INFO: domain='{domain}' has og:site_name='{software}'") - instances.set_detection_mode(domain, "SITE_NAME") + software = tidyup.domain(site_name.get("content")) + # DEBUG: print(f"DEBUG: software[{type(software)}]='{software}'") + if software is not None and software != "": + print(f"INFO: domain='{domain}' has og:site_name='{software}'") + instances.set_detection_mode(domain, "SITE_NAME") # DEBUG: print(f"DEBUG: software[]='{type(software)}'") if isinstance(software, str) and software == "": diff --git a/fba/helpers/tidyup.py b/fba/helpers/tidyup.py index 32f3e9e..cdbfa40 100644 --- a/fba/helpers/tidyup.py +++ b/fba/helpers/tidyup.py @@ -33,22 +33,28 @@ def domain(string: str) -> str: # All lower-case and strip spaces out + last dot string = string.lower().strip().rstrip(".") + # DEBUG: print(f"DEBUG: string='{string}' - #1") # No port number string = re.sub("\:\d+$", "", string) + # DEBUG: print(f"DEBUG: string='{string}' - #2") # No protocol, sometimes without the slashes string = re.sub("^https?\:(\/*)", "", string) + # DEBUG: print(f"DEBUG: string='{string}' - #3") # No trailing slash string = re.sub("\/$", "", string) + # DEBUG: print(f"DEBUG: string='{string}' - #4") # No @ or : sign string = re.sub("^\@", "", string) string = string.split(":")[0] + # DEBUG: print(f"DEBUG: string='{string}' - #4") # No individual users in block lists string = re.sub("(.+)\@", "", string) + # DEBUG: print(f"DEBUG: string='{string}' - #5") if string.find("/profile/"): string = string.split("/profile/")[0] elif string.find("/users/"):