From bcb74a15969c5e29681466a379a02a7084077f30 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Roland=20H=C3=A4der?= Date: Thu, 24 Aug 2023 20:35:55 +0200 Subject: [PATCH] Continued: - renamed fetch_nodeinfo() to fetch() as it is already part of module nodeinfo - added 3rd optional parameter to it, fetching of /.well-known/* isn't then required anymore and saves another request - also the wanted URL can be directly used --- fba/helpers/blacklist.py | 1 + fba/http/federation.py | 12 +++++------ fba/http/nodeinfo.py | 46 +++++++++++++++++++++------------------- fba/networks/pleroma.py | 3 ++- 4 files changed, 33 insertions(+), 29 deletions(-) diff --git a/fba/helpers/blacklist.py b/fba/helpers/blacklist.py index 2a65fbc..bbd8d03 100644 --- a/fba/helpers/blacklist.py +++ b/fba/helpers/blacklist.py @@ -61,6 +61,7 @@ def is_blacklisted(domain: str) -> bool: if blocked in domain: logger.debug("domain='%s' is blacklisted.", domain) blacklisted = True + break logger.debug("blacklisted='%s' - EXIT!", blacklisted) return blacklisted diff --git a/fba/http/federation.py b/fba/http/federation.py index e726ae5..ba6ac67 100644 --- a/fba/http/federation.py +++ b/fba/http/federation.py @@ -354,19 +354,19 @@ def fetch_generator_from_path(domain: str, path: str = "/") -> str: logger.debug("software='%s' - EXIT!", software) return software -def determine_software(domain: str, path: str = None) -> str: - logger.debug("domain='%s',path='%s' - CALLED!", domain, path) +def determine_software(domain: str, path: str = None, nodeinfo_url: str = None) -> str: + logger.debug("domain='%s',path='%s',nodeinfo_url='%s' - CALLED!", domain, path, nodeinfo_url) domain_helper.raise_on(domain) if not isinstance(path, str) and path is not None: raise ValueError(f"Parameter path[]='{type(path)}' is not of type 'str'") + elif not isinstance(nodeinfo_url, str) and nodeinfo_url is not None: + raise ValueError(f"Parameter nodeinfo_url[]='{type(nodeinfo_url)}' is not of type 'str'") - logger.debug("Determining software for domain='%s',path='%s'", domain, path) + logger.debug("Fetching nodeinfo from domain='%s',path='%s',nodeinfo_url='%s' ...", domain, path, nodeinfo_url) + data = nodeinfo.fetch(domain, path, nodeinfo_url) software = None - logger.debug("Fetching nodeinfo from domain='%s' ...", domain) - data = nodeinfo.fetch_nodeinfo(domain, path) - logger.debug("data[%s]='%s'", type(data), data) if "exception" in data: # Continue raising it diff --git a/fba/http/nodeinfo.py b/fba/http/nodeinfo.py index ec78862..724c875 100644 --- a/fba/http/nodeinfo.py +++ b/fba/http/nodeinfo.py @@ -14,6 +14,7 @@ # along with this program. If not, see . import logging +import validators from urllib.parse import urlparse @@ -31,26 +32,33 @@ _DEPTH = 0 logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) -def fetch_nodeinfo(domain: str, path: str = None) -> dict: - logger.debug("domain='%s',path='%s' - CALLED!", domain, path) +def fetch(domain: str, path: str = None, nodeinfo_url: str = None) -> dict: + logger.debug("domain='%s',path='%s',nodeinfo_url='%s' - CALLED!", domain, path, nodeinfo_url) domain_helper.raise_on(domain) if not isinstance(path, str) and path is not None: raise ValueError(f"Parameter path[]='{type(path)}' is not of type 'str'") + elif not isinstance(nodeinfo_url, str) and nodeinfo_url is not None: + raise ValueError(f"Parameter nodeinfo_url[]='{type(nodeinfo_url)}' is not of type 'str'") - logger.debug("Fetching nodeinfo from domain='%s' ...", domain) - data = fetch_wellknown_nodeinfo(domain) + logger.debug("nodeinfo_url='%s'", nodeinfo_url) + is_url = nodeinfo_url is not None and validators.url(nodeinfo_url) - logger.debug("data[%s](%d)='%s'", type(data), len(data), data) - if "exception" in data: - logger.warning("Exception returned: '%s', raising again ...", type(data["exception"])) - raise data["exception"] - elif "error_message" not in data and "json" in data and len(data["json"]) > 0: - logger.debug("Invoking instances.set_last_nodeinfo(%s) ...", domain) - instances.set_last_nodeinfo(domain) + logger.debug("is_url='%s'", is_url) + if not is_url: + logger.debug("Fetching well-known nodeinfo from domain='%s' ...", domain) + data = fetch_wellknown_nodeinfo(domain) - logger.debug("Found data[json]()=%d - EXIT!", len(data['json'])) - return data + logger.debug("data[%s](%d)='%s'", type(data), len(data), data) + if "exception" in data: + logger.warning("Exception returned: '%s', raising again ...", type(data["exception"])) + raise data["exception"] + elif "error_message" not in data and "json" in data and len(data["json"]) > 0: + logger.debug("Invoking instances.set_last_nodeinfo(%s) ...", domain) + instances.set_last_nodeinfo(domain) + + logger.debug("Found data[json]()=%d - EXIT!", len(data['json'])) + return data # No CSRF by default, you don't have to add network.api_headers by yourself here headers = tuple() @@ -80,17 +88,11 @@ def fetch_nodeinfo(domain: str, path: str = None) -> dict: for request in request_paths: logger.debug("request='%s'", request) - http_url = f"http://{domain}{str(path)}" - https_url = f"https://{domain}{str(path)}" + http_url = f"http://{domain}{str(path) if path is not None else '/'}" + https_url = f"https://{domain}{str(path) if path is not None else '/'}" logger.debug("path[%s]='%s',request='%s',http_url='%s',https_url='%s'", type(path), path, request, http_url, https_url) - if path is None or path in [request, http_url, https_url]: - logger.debug("path='%s',http_url='%s',https_url='%s'", path, http_url, https_url) - if path in [http_url, https_url]: - logger.debug("domain='%s',path='%s' has protocol in path, splitting ...", domain, path) - components = urlparse(path) - path = components.path - + if (path is None and nodeinfo_url is None) or path in [request, http_url, https_url] or (is_url and nodeinfo_url.endswith(request)): logger.debug("Fetching request='%s' from domain='%s' ...", request, domain) data = network.get_json_api( domain, diff --git a/fba/networks/pleroma.py b/fba/networks/pleroma.py index 8352915..bd0ae04 100644 --- a/fba/networks/pleroma.py +++ b/fba/networks/pleroma.py @@ -63,11 +63,12 @@ def fetch_blocks(domain: str, nodeinfo_url: str) -> list: rows = None try: logger.debug("Fetching nodeinfo: domain='%s',nodeinfo_url='%s'", domain, nodeinfo_url) - rows = nodeinfo.fetch_nodeinfo(domain, nodeinfo_url) + rows = nodeinfo.fetch(domain, nodeinfo_url=nodeinfo_url) if "error_message" in rows: logger.warning("Error message '%s' during fetching nodeinfo for domain='%s',nodeinfo_url='%s'", rows["error_message"], domain, nodeinfo_url) instances.set_last_error(domain, rows) + instances.update_data(domain) logger.debug("Returning empty list ... - EXIT!") return list() -- 2.39.5