]> git.mxchange.org Git - fba.git/commitdiff
Continued:
authorRoland Häder <roland@mxchange.org>
Sat, 17 Jun 2023 09:47:19 +0000 (11:47 +0200)
committerRoland Häder <roland@mxchange.org>
Sat, 17 Jun 2023 09:47:19 +0000 (11:47 +0200)
- in the end, a missing "t" caused a lot of 'sofware' to be None (NULL) now

fba/federation.py
fba/helpers/tidyup.py

index eebbfd72d5a0ade9e98c947be34c59d8e7d74861..55a0b21ae30cb4aab5d10aa6db33dfbe6f8e9f22 100644 (file)
@@ -364,16 +364,20 @@ def fetch_generator_from_path(domain: str, path: str = "/") -> str:
         site_name = doc.find("meta", {"property": "og:site_name"})
 
         # DEBUG: print(f"DEBUG: generator='{generator}',site_name='{site_name}'")
-        if isinstance(generator, bs4.element.Tag):
-            # DEBUG: print("DEBUG: Found generator meta tag:", domain)
+        if isinstance(generator, bs4.element.Tag) and isinstance(generator.get("content"), str):
+            print("DEBUG: Found generator meta tag:", domain)
             software = tidyup.domain(generator.get("content"))
-            print(f"INFO: domain='{domain}' is generated by '{software}'")
-            instances.set_detection_mode(domain, "GENERATOR")
+            # DEBUG: print(f"DEBUG: software[{type(software)}]='{software}'")
+            if software is not None and software != "":
+                print(f"INFO: domain='{domain}' is generated by '{software}'")
+                instances.set_detection_mode(domain, "GENERATOR")
         elif isinstance(site_name, bs4.element.Tag) and isinstance(site_name.get("content"), str):
             # DEBUG: print("DEBUG: Found property=og:site_name:", domain)
-            sofware = tidyup.domain(site_name.get("content"))
-            print(f"INFO: domain='{domain}' has og:site_name='{software}'")
-            instances.set_detection_mode(domain, "SITE_NAME")
+            software = tidyup.domain(site_name.get("content"))
+            # DEBUG: print(f"DEBUG: software[{type(software)}]='{software}'")
+            if software is not None and software != "":
+                print(f"INFO: domain='{domain}' has og:site_name='{software}'")
+                instances.set_detection_mode(domain, "SITE_NAME")
 
     # DEBUG: print(f"DEBUG: software[]='{type(software)}'")
     if isinstance(software, str) and software == "":
index 32f3e9e4f6c8818a0ad3be9d300db008d75fb12f..cdbfa409a6008f25e01be4ffaece92c6d9df7a34 100644 (file)
@@ -33,22 +33,28 @@ def domain(string: str) -> str:
 
     # All lower-case and strip spaces out + last dot
     string = string.lower().strip().rstrip(".")
+    # DEBUG: print(f"DEBUG: string='{string}' - #1")
 
     # No port number
     string = re.sub("\:\d+$", "", string)
+    # DEBUG: print(f"DEBUG: string='{string}' - #2")
 
     # No protocol, sometimes without the slashes
     string = re.sub("^https?\:(\/*)", "", string)
+    # DEBUG: print(f"DEBUG: string='{string}' - #3")
 
     # No trailing slash
     string = re.sub("\/$", "", string)
+    # DEBUG: print(f"DEBUG: string='{string}' - #4")
 
     # No @ or : sign
     string = re.sub("^\@", "", string)
     string = string.split(":")[0]
+    # DEBUG: print(f"DEBUG: string='{string}' - #4")
 
     # No individual users in block lists
     string = re.sub("(.+)\@", "", string)
+    # DEBUG: print(f"DEBUG: string='{string}' - #5")
     if string.find("/profile/"):
         string = string.split("/profile/")[0]
     elif string.find("/users/"):