From c8effc80bb534acec55edd0705be4e5d3a964b32 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Roland=20H=C3=A4der?= Date: Tue, 4 Jul 2023 13:43:07 +0200 Subject: [PATCH] Continued: - also check og:platform - dont' set None for detected software type, maybe the website is just down for maintenance --- fba/commands.py | 2 +- fba/http/federation.py | 11 ++++++++++- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/fba/commands.py b/fba/commands.py index e6cfd2f..d1a09c0 100644 --- a/fba/commands.py +++ b/fba/commands.py @@ -1537,7 +1537,7 @@ def update_nodeinfo(args: argparse.Namespace) -> int: software = federation.determine_software(row["domain"]) logger.debug("Determined software='%s'", software) - if software != row["software"]: + if software != row["software"] and software is not None: logger.warning("Software type has changed from '%s' to '%s'!", row["software"], software) instances.set_software(row["domain"], software) diff --git a/fba/http/federation.py b/fba/http/federation.py index 4508bf7..763ea60 100644 --- a/fba/http/federation.py +++ b/fba/http/federation.py @@ -420,8 +420,9 @@ def fetch_generator_from_path(domain: str, path: str = "/") -> str: logger.debug("doc[]='%s'", type(doc)) generator = doc.find("meta", {"name" : "generator"}) site_name = doc.find("meta", {"property": "og:site_name"}) + platform = doc.find("meta", {"property": "og:platform"}) - logger.debug("generator[]='%s',site_name[]='%s'", type(generator), type(site_name)) + logger.debug("generator[]='%s',site_name[]='%s',platform[]='%s'", type(generator), type(site_name), type(platform)) if isinstance(generator, bs4.element.Tag) and isinstance(generator.get("content"), str): logger.debug("Found generator meta tag: domain='%s'", domain) software = tidyup.domain(generator.get("content")) @@ -438,6 +439,14 @@ def fetch_generator_from_path(domain: str, path: str = "/") -> str: if software is not None and software != "": logger.debug("domain='%s' has og:site_name='%s' - Setting detection_mode=SITE_NAME ...", domain, software) instances.set_detection_mode(domain, "SITE_NAME") + elif isinstance(platform, bs4.element.Tag) and isinstance(platform.get("content"), str): + logger.debug("Found property=og:platform, domain='%s'", domain) + software = tidyup.domain(platform.get("content")) + + logger.debug("software[%s]='%s'", type(software), software) + if software is not None and software != "": + logger.debug("domain='%s' has og:platform='%s' - Setting detection_mode=PLATFORM ...", domain, software) + instances.set_detection_mode(domain, "PLATFORM") elif domain != components.netloc: logger.warning("domain='%s' doesn't match components.netloc='%s', maybe redirect to other domain?", domain, components.netloc) message = f"Redirect from domain='{domain}' to components.netloc='{components.netloc}'" -- 2.39.5