From ee7a3406ee9c8a001a562e1f959454bab1f204b6 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Roland=20H=C3=A4der?= Date: Sun, 10 Dec 2023 09:04:23 +0100 Subject: [PATCH] Continued: - check validity of href URL before parsing it (controlled skip instead of uncontrolled raised exception) --- fba/http/nodeinfo.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/fba/http/nodeinfo.py b/fba/http/nodeinfo.py index 5f78a44..979173e 100644 --- a/fba/http/nodeinfo.py +++ b/fba/http/nodeinfo.py @@ -14,6 +14,7 @@ # along with this program. If not, see . import logging +import validators from urllib.parse import urlparse @@ -191,11 +192,11 @@ def fetch_wellknown_nodeinfo(domain: str) -> dict: logger.debug("Marking domain='%s' as successfully handled ...", domain) instances.set_success(domain) - logger.debug("Found infos[links]()=%d record(s),", len(infos["links"])) + logger.debug("Checking %d nodeinfo ids ...", len(_nodeinfo_identifier)) for niid in _nodeinfo_identifier: data = dict() - logger.debug("Checking niid='%s' ...", niid) + logger.debug("Checking niid='%s' for infos[links]()=%d ...", niid, len(infos["links"])) for link in infos["links"]: logger.debug("link[%s]='%s'", type(link), link) if not isinstance(link, dict) or not "rel" in link: @@ -208,12 +209,17 @@ def fetch_wellknown_nodeinfo(domain: str) -> dict: logger.warning("link[rel]='%s' has no element 'href' - SKIPPED!", link["rel"]) continue elif link["href"] in [None, ""]: - logger.debug("link[href]='%s',link[rel]='%s' - SKIPPED!", link["href"], link["rel"]) + logger.debug("link[href]='%s' is empty, link[rel]='%s' - SKIPPED!", link["href"], link["rel"]) + continue + elif not validators.url(link["href"]): + logger.warning("link[href]='%s' is not a valid domain - SKIPPED!", link["href"]) continue # Default is that 'href' has a complete URL, but some hosts don't send that logger.debug("link[rel]='%s' matches niid='%s'", link["rel"], niid) url = link["href"].lower() + + logger.debug("Parsing url='%s' ...", url) components = urlparse(url) logger.debug("components[%s]='%s'", type(components), components) -- 2.39.5