From d923c70809e7946c1b738ca549db2bdd0894b873 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Roland=20H=C3=A4der?= Date: Wed, 24 May 2023 18:23:05 +0200 Subject: [PATCH] Continued: - "guessing" it didn't really work, let's take as the software type, some people even change that and have no /.well-known/ path enabled. Then they cannot federate anymore, at least not by auto-discovery --- fba.py | 38 ++++++++++++++++++++++++-------------- 1 file changed, 24 insertions(+), 14 deletions(-) diff --git a/fba.py b/fba.py index 9427b14..27ee3ba 100644 --- a/fba.py +++ b/fba.py @@ -424,20 +424,30 @@ def determine_software(domain: str) -> str: update_last_error(domain, json["message"]) return None elif "software" not in json or "name" not in json["software"]: - print("WARNING: JSON response does not include [software][name], guessing ...") - found = 0 - for element in {"uri", "title", "short_description", "description", "email", "version", "urls", "stats", "thumbnail", "languages", "contact_account", "registrations", "approval_required"}: - # NOISY-DEBUG: print("DEBUG: element:", element) - if element in json: - found = found + 1 - - # NOISY-DEBUG: print("DEBUG: Found elements:", found) - if found == len(json): - # NOISY-DEBUG: print("DEBUG: Maybe is Mastodon:", domain) - return "mastodon" - - print(f"WARNING: Cannot guess software type: domain='{domain}',found={found},json()={len(json)}") - return None + # NOISY-DEBUG: print(f"DEBUG: JSON response from {domain} does not include [software][name], fetching / ...") + try: + res = reqto.get(f"https://{domain}/", headers=headers, timeout=(config["connection_timeout"], config["read_timeout"])) + + # NOISY-DEBUG: print("DEBUG: domain,res.ok,res.status_code:", domain, res.ok, res.status_code) + if res.ok and res.status_code < 300 and len(res.text) > 0: + # NOISY-DEBUG: print("DEBUG: Search for :", domain) + + doc = bs4.BeautifulSoup(res.text, "html.parser") + # NOISY-DEBUG: print("DEBUG: doc[]:", type(doc)) + + tag = doc.find("meta", {"name": "generator"}) + # NOISY-DEBUG: print(f"DEBUG: tag[{type(tag)}: {tag}") + if isinstance(tag, bs4.element.Tag): + # NOISY-DEBUG: print("DEBUG: Found generator meta tag:", domain) + software = tidyup(tag.get("content")) + + except BaseException as e: + print(f"WARNING: Cannot fetch / from '{domain}':", e) + update_last_error(domain, e) + pass + + # NOISY-DEBUG: print(f"DEBUG: Generator for domain='{domain}' is: {software}, EXIT!") + return software software = tidyup(json["software"]["name"]) -- 2.39.5