From c25d369f3fb535dc38cd63f7d1edf5ac2d7a6f7d Mon Sep 17 00:00:00 2001 From: =?utf8?q?Roland=20H=C3=A4der?= Date: Mon, 3 Jul 2023 23:30:15 +0200 Subject: [PATCH] Continued: - rewrote a bit to get lesser nested blocks - "href" might not be set while "rel" is set ... to much broken /.well-known/nodeinfo replies! --- fba/http/federation.py | 85 +++++++++++++++++++++++------------------- 1 file changed, 46 insertions(+), 39 deletions(-) diff --git a/fba/http/federation.py b/fba/http/federation.py index 07cbe0a..4508bf7 100644 --- a/fba/http/federation.py +++ b/fba/http/federation.py @@ -334,45 +334,52 @@ def fetch_wellknown_nodeinfo(domain: str) -> dict: for link in nodeinfo["links"]: logger.debug("link[%s]='%s'", type(link), link) if not isinstance(link, dict) or not "rel" in link: - logger.warning("link[]='%s' is not 'dict' or no element 'rel' found", type(link)) - elif link["rel"] == niid: - # Default is that 'href' has a complete URL, but some hosts don't send that - logger.debug("link[href]='%s' matches niid='%s'", link["href"], niid) - url = link["href"] - components = urlparse(link["href"]) - - logger.debug("components[%s]='%s'", type(components), components) - if components.scheme == "" and components.netloc == "": - logger.warning("link[href]='%s' has no scheme and host name in it, prepending from domain='%s'", link['href'], domain) - url = f"https://{domain}{url}" - components = urlparse(url) - elif components.netloc == "": - logger.warning("link[href]='%s' has no netloc set, setting domain='%s'", link["href"], domain) - url = f"{components.scheme}://{domain}{components.path}" - components = urlparse(url) - - if not utils.is_domain_wanted(components.netloc): - logger.debug("components.netloc='%s' is not wanted - SKIPPED!", components.netloc) - continue - - logger.debug("Fetching nodeinfo from url='%s' ...", url) - data = network.fetch_api_url( - url, - (config.get("connection_timeout"), config.get("read_timeout")) - ) - - logger.debug("link[href]='%s',data[]='%s'", link["href"], type(data)) - if "error_message" not in data and "json" in data: - logger.debug("Found JSON data()=%d,link[href]='%s' - Setting detection_mode=AUTO_DISCOVERY ...", len(data), link["href"]) - instances.set_detection_mode(domain, "AUTO_DISCOVERY") - instances.set_nodeinfo_url(domain, link["href"]) - - logger.debug("Marking domain='%s' as successfully handled ...", domain) - instances.set_success(domain) - break - else: - logger.debug("Setting last error for domain='%s',data[]='%s'", domain, type(data)) - instances.set_last_error(domain, data) + logger.debug("link[]='%s' is not 'dict' or no element 'rel' found - SKIPPED!", type(link)) + continue + elif link["rel"] != niid: + logger.debug("link[re]='%s' does not matched niid='%s' - SKIPPED!", link["rel"], niid) + continue + elif "href" not in link: + logger.warning("link[rel]='%s' has no element 'href' - SKIPPED!", link["rel"]) + continue + + # Default is that 'href' has a complete URL, but some hosts don't send that + logger.debug("link[rel]='%s' matches niid='%s'", link["rel"], niid) + url = link["href"] + components = urlparse(link["href"]) + + logger.debug("components[%s]='%s'", type(components), components) + if components.scheme == "" and components.netloc == "": + logger.warning("link[href]='%s' has no scheme and host name in it, prepending from domain='%s'", link['href'], domain) + url = f"https://{domain}{url}" + components = urlparse(url) + elif components.netloc == "": + logger.warning("link[href]='%s' has no netloc set, setting domain='%s'", link["href"], domain) + url = f"{components.scheme}://{domain}{components.path}" + components = urlparse(url) + + if not utils.is_domain_wanted(components.netloc): + logger.debug("components.netloc='%s' is not wanted - SKIPPED!", components.netloc) + continue + + logger.debug("Fetching nodeinfo from url='%s' ...", url) + data = network.fetch_api_url( + url, + (config.get("connection_timeout"), config.get("read_timeout")) + ) + + logger.debug("link[href]='%s',data[]='%s'", link["href"], type(data)) + if "error_message" not in data and "json" in data: + logger.debug("Found JSON data()=%d,link[href]='%s' - Setting detection_mode=AUTO_DISCOVERY ...", len(data), link["href"]) + instances.set_detection_mode(domain, "AUTO_DISCOVERY") + instances.set_nodeinfo_url(domain, link["href"]) + + logger.debug("Marking domain='%s' as successfully handled ...", domain) + instances.set_success(domain) + break + else: + logger.debug("Setting last error for domain='%s',data[]='%s'", domain, type(data)) + instances.set_last_error(domain, data) logger.debug("data()=%d", len(data)) if "error_message" not in data and "json" in data: -- 2.39.5