site_name = doc.find("meta", {"property": "og:site_name"})
# DEBUG: print(f"DEBUG: generator='{generator}',site_name='{site_name}'")
- if isinstance(generator, bs4.element.Tag):
- # DEBUG: print("DEBUG: Found generator meta tag:", domain)
+ if isinstance(generator, bs4.element.Tag) and isinstance(generator.get("content"), str):
+ print("DEBUG: Found generator meta tag:", domain)
software = tidyup.domain(generator.get("content"))
- print(f"INFO: domain='{domain}' is generated by '{software}'")
- instances.set_detection_mode(domain, "GENERATOR")
+ # DEBUG: print(f"DEBUG: software[{type(software)}]='{software}'")
+ if software is not None and software != "":
+ print(f"INFO: domain='{domain}' is generated by '{software}'")
+ instances.set_detection_mode(domain, "GENERATOR")
elif isinstance(site_name, bs4.element.Tag) and isinstance(site_name.get("content"), str):
# DEBUG: print("DEBUG: Found property=og:site_name:", domain)
- sofware = tidyup.domain(site_name.get("content"))
- print(f"INFO: domain='{domain}' has og:site_name='{software}'")
- instances.set_detection_mode(domain, "SITE_NAME")
+ software = tidyup.domain(site_name.get("content"))
+ # DEBUG: print(f"DEBUG: software[{type(software)}]='{software}'")
+ if software is not None and software != "":
+ print(f"INFO: domain='{domain}' has og:site_name='{software}'")
+ instances.set_detection_mode(domain, "SITE_NAME")
# DEBUG: print(f"DEBUG: software[]='{type(software)}'")
if isinstance(software, str) and software == "":
# All lower-case and strip spaces out + last dot
string = string.lower().strip().rstrip(".")
+ # DEBUG: print(f"DEBUG: string='{string}' - #1")
# No port number
string = re.sub("\:\d+$", "", string)
+ # DEBUG: print(f"DEBUG: string='{string}' - #2")
# No protocol, sometimes without the slashes
string = re.sub("^https?\:(\/*)", "", string)
+ # DEBUG: print(f"DEBUG: string='{string}' - #3")
# No trailing slash
string = re.sub("\/$", "", string)
+ # DEBUG: print(f"DEBUG: string='{string}' - #4")
# No @ or : sign
string = re.sub("^\@", "", string)
string = string.split(":")[0]
+ # DEBUG: print(f"DEBUG: string='{string}' - #4")
# No individual users in block lists
string = re.sub("(.+)\@", "", string)
+ # DEBUG: print(f"DEBUG: string='{string}' - #5")
if string.find("/profile/"):
string = string.split("/profile/")[0]
elif string.find("/users/"):