From fbe3f0b87f4b790cb210dbe0051826717690c051 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Roland=20H=C3=A4der?= Date: Mon, 24 Jul 2023 08:16:17 +0200 Subject: [PATCH] Continued: - added support for x-nodeinfo2 which can be directly fetched from /.well-known/ "directory" - also rewrote fetching well-known nodeinfo URLs to more flexible way --- fba/http/federation.py | 175 ++++++++++++++++++++++++----------------- 1 file changed, 102 insertions(+), 73 deletions(-) diff --git a/fba/http/federation.py b/fba/http/federation.py index 21e2670..540f741 100644 --- a/fba/http/federation.py +++ b/fba/http/federation.py @@ -244,7 +244,7 @@ def fetch_nodeinfo(domain: str, path: str = None) -> dict: logger.debug("Fetching nodeinfo from domain='%s' ...", domain) nodeinfo = fetch_wellknown_nodeinfo(domain) - logger.debug("nodeinfo[%s](%d='%s'", type(nodeinfo), len(nodeinfo), nodeinfo) + logger.debug("nodeinfo[%s](%d)='%s'", type(nodeinfo), len(nodeinfo), nodeinfo) if "error_message" not in nodeinfo and "json" in nodeinfo and len(nodeinfo["json"]) > 0: logger.debug("Invoking instances.set_last_nodeinfo(%s) ...", domain) instances.set_last_nodeinfo(domain) @@ -346,88 +346,113 @@ def fetch_wellknown_nodeinfo(domain: str) -> dict: "exception" : exception, } + data = dict() + logger.debug("Fetching .well-known info for domain='%s'", domain) - data = network.get_json_api( - domain, - "/.well-known/nodeinfo", - headers, - (config.get("nodeinfo_connection_timeout"), config.get("nodeinfo_read_timeout")) - ) + for path in ["/.well-known/nodeinfo", "/.well-known/x-nodeinfo2"]: + logger.debug("Fetching path='%s' from domain='%s' ...", path, domain) + data = network.get_json_api( + domain, + path, + headers, + (config.get("nodeinfo_connection_timeout"), config.get("nodeinfo_read_timeout")) + ) + logger.debug("data[]='%s'", type(data)) + + if "error_message" not in data and "json" in data: + logger.debug("path='%s' returned valid json()=%d", path, len(data["json"])) + break logger.debug("data[]='%s'", type(data)) - if "error_message" not in data: - nodeinfo = data["json"] + if "exception" in data: + logger.warning("domain='%s' returned exception '%s'", domain, str(data["exception"])) + raise data["exception"] + elif "error_message" in data: + logger.warning("domain='%s' returned error message: '%s'", domain, data["error_message"]) + return data + elif "json" not in data: + logger.warning("domain='%s' returned no 'json' key", domain) + return dict() + + nodeinfo = data["json"] + logger.debug("nodeinfo()=%d has been returned", len(nodeinfo)) + if "links" in nodeinfo: logger.debug("Marking domain='%s' as successfully handled ...", domain) instances.set_success(domain) - logger.debug("Found entries: nodeinfo()=%d,domain='%s'", len(nodeinfo), domain) - if "links" in nodeinfo: - logger.debug("Found nodeinfo[links]()=%d record(s),", len(nodeinfo["links"])) - for niid in nodeinfo_identifier: - data = dict() - - logger.debug("Checking niid='%s' ...", niid) - for link in nodeinfo["links"]: - logger.debug("link[%s]='%s'", type(link), link) - if not isinstance(link, dict) or not "rel" in link: - logger.debug("link[]='%s' is not of type 'dict' or no element 'rel' found - SKIPPED!", type(link)) - continue - elif link["rel"] != niid: - logger.debug("link[re]='%s' does not matched niid='%s' - SKIPPED!", link["rel"], niid) - continue - elif "href" not in link: - logger.warning("link[rel]='%s' has no element 'href' - SKIPPED!", link["rel"]) - continue - elif link["href"] is None: - logger.debug("link[href] is None, link[rel]='%s' - SKIPPED!", link["rel"]) - continue - - # Default is that 'href' has a complete URL, but some hosts don't send that - logger.debug("link[rel]='%s' matches niid='%s'", link["rel"], niid) - url = link["href"] + logger.debug("Found nodeinfo[links]()=%d record(s),", len(nodeinfo["links"])) + for niid in nodeinfo_identifier: + data = dict() + + logger.debug("Checking niid='%s' ...", niid) + for link in nodeinfo["links"]: + logger.debug("link[%s]='%s'", type(link), link) + if not isinstance(link, dict) or not "rel" in link: + logger.debug("link[]='%s' is not of type 'dict' or no element 'rel' found - SKIPPED!", type(link)) + continue + elif link["rel"] != niid: + logger.debug("link[re]='%s' does not matched niid='%s' - SKIPPED!", link["rel"], niid) + continue + elif "href" not in link: + logger.warning("link[rel]='%s' has no element 'href' - SKIPPED!", link["rel"]) + continue + elif link["href"] is None: + logger.debug("link[href] is None, link[rel]='%s' - SKIPPED!", link["rel"]) + continue + + # Default is that 'href' has a complete URL, but some hosts don't send that + logger.debug("link[rel]='%s' matches niid='%s'", link["rel"], niid) + url = link["href"] + components = urlparse(url) + + logger.debug("components[%s]='%s'", type(components), components) + if components.scheme == "" and components.netloc == "": + logger.warning("link[href]='%s' has no scheme and host name in it, prepending from domain='%s'", link['href'], domain) + url = f"https://{domain}{url}" components = urlparse(url) + elif components.netloc == "": + logger.warning("link[href]='%s' has no netloc set, setting domain='%s'", link["href"], domain) + url = f"{components.scheme}://{domain}{components.path}" + components = urlparse(url) + + logger.debug("components.netloc[]='%s'", type(components.netloc)) + if not utils.is_domain_wanted(components.netloc): + logger.debug("components.netloc='%s' is not wanted - SKIPPED!", components.netloc) + continue - logger.debug("components[%s]='%s'", type(components), components) - if components.scheme == "" and components.netloc == "": - logger.warning("link[href]='%s' has no scheme and host name in it, prepending from domain='%s'", link['href'], domain) - url = f"https://{domain}{url}" - components = urlparse(url) - elif components.netloc == "": - logger.warning("link[href]='%s' has no netloc set, setting domain='%s'", link["href"], domain) - url = f"{components.scheme}://{domain}{components.path}" - components = urlparse(url) - - logger.debug("components.netloc[]='%s'", type(components.netloc)) - if not utils.is_domain_wanted(components.netloc): - logger.debug("components.netloc='%s' is not wanted - SKIPPED!", components.netloc) - continue - - logger.debug("Fetching nodeinfo from url='%s' ...", url) - data = network.fetch_api_url( - url, - (config.get("connection_timeout"), config.get("read_timeout")) - ) - - logger.debug("link[href]='%s',data[]='%s'", link["href"], type(data)) - if "error_message" not in data and "json" in data: - logger.debug("Found JSON data()=%d,link[href]='%s' - Setting detection_mode=AUTO_DISCOVERY ...", len(data), link["href"]) - instances.set_detection_mode(domain, "AUTO_DISCOVERY") - instances.set_nodeinfo_url(domain, link["href"]) - - logger.debug("Marking domain='%s' as successfully handled ...", domain) - instances.set_success(domain) - break - else: - logger.debug("Setting last error for domain='%s',data[]='%s'", domain, type(data)) - instances.set_last_error(domain, data) - - logger.debug("data()=%d", len(data)) + logger.debug("Fetching nodeinfo from url='%s' ...", url) + data = network.fetch_api_url( + url, + (config.get("connection_timeout"), config.get("read_timeout")) + ) + + logger.debug("link[href]='%s',data[]='%s'", link["href"], type(data)) if "error_message" not in data and "json" in data: - logger.debug("Auto-discovery successful: domain='%s'", domain) + logger.debug("Found JSON data()=%d,link[href]='%s' - Setting detection_mode=AUTO_DISCOVERY ...", len(data), link["href"]) + instances.set_detection_mode(domain, "AUTO_DISCOVERY") + instances.set_nodeinfo_url(domain, link["href"]) + + logger.debug("Marking domain='%s' as successfully handled ...", domain) + instances.set_success(domain) break - else: - logger.warning("nodeinfo does not contain 'links': domain='%s'", domain) + else: + logger.debug("Setting last error for domain='%s',data[]='%s'", domain, type(data)) + instances.set_last_error(domain, data) + + logger.debug("data()=%d", len(data)) + if "error_message" not in data and "json" in data: + logger.debug("Auto-discovery successful: domain='%s'", domain) + break + elif "server" in nodeinfo: + logger.debug("Found nodeinfo[server][software]='%s'", nodeinfo["server"]["software"]) + instances.set_detection_mode(domain, "AUTO_DISCOVERY") + instances.set_nodeinfo_url(domain, f"https://{domain}/.well-known/x-nodeinfo2") + + logger.debug("Marking domain='%s' as successfully handled ...", domain) + instances.set_success(domain) + else: + logger.warning("nodeinfo does not contain 'links' or 'server': domain='%s'", domain) logger.debug("Returning data[]='%s' - EXIT!", type(data)) return data @@ -557,7 +582,7 @@ def determine_software(domain: str, path: str = None) -> str: logger.debug("domain='%s',path='%s',data[json] found ...", domain, path) data = data["json"] else: - logger.debug("JSON response from domain='%s' does not include [software][name], fetching / ...", domain) + logger.debug("Auto-detection for domain='%s' was failing, fetching / ...", domain) software = fetch_generator_from_path(domain) logger.debug("Generator for domain='%s' is: '%s'", domain, software) @@ -581,6 +606,10 @@ def determine_software(domain: str, path: str = None) -> str: logger.debug("Invoking fetch_generator_from_path(%s) ...", domain) software = fetch_generator_from_path(domain) logger.debug("Generator for domain='%s' is: '%s'", domain, software) + elif "server" in data and "software" in data["server"]: + logger.debug("Found data[server][software]='%s' for domain='%s'", data["server"]["software"].lower(), domain) + software = data["server"]["software"].lower() + logger.debug("Detected software for domain='%s' is: '%s'", domain, software) elif "software" not in data or "name" not in data["software"]: logger.debug("JSON response from domain='%s' does not include [software][name] - Resetting detection_mode,nodeinfo_url ...", domain) instances.set_detection_mode(domain, None) -- 2.39.5