]> git.mxchange.org Git - fba.git/commitdiff
Continued:
authorRoland Häder <roland@mxchange.org>
Sun, 10 Dec 2023 08:04:23 +0000 (09:04 +0100)
committerRoland Häder <roland@mxchange.org>
Sun, 10 Dec 2023 08:04:23 +0000 (09:04 +0100)
- check validity of href URL before parsing it (controlled skip instead of
  uncontrolled raised exception)

fba/http/nodeinfo.py

index 5f78a44050e80e68d74586db23147f0b43119850..979173eba078b9874a5041bbc644b435f65ea175 100644 (file)
@@ -14,6 +14,7 @@
 # along with this program.  If not, see <https://www.gnu.org/licenses/>.
 
 import logging
+import validators
 
 from urllib.parse import urlparse
 
@@ -191,11 +192,11 @@ def fetch_wellknown_nodeinfo(domain: str) -> dict:
         logger.debug("Marking domain='%s' as successfully handled ...", domain)
         instances.set_success(domain)
 
-        logger.debug("Found infos[links]()=%d record(s),", len(infos["links"]))
+        logger.debug("Checking %d nodeinfo ids ...", len(_nodeinfo_identifier))
         for niid in _nodeinfo_identifier:
             data = dict()
 
-            logger.debug("Checking niid='%s' ...", niid)
+            logger.debug("Checking niid='%s' for infos[links]()=%d ...", niid, len(infos["links"]))
             for link in infos["links"]:
                 logger.debug("link[%s]='%s'", type(link), link)
                 if not isinstance(link, dict) or not "rel" in link:
@@ -208,12 +209,17 @@ def fetch_wellknown_nodeinfo(domain: str) -> dict:
                     logger.warning("link[rel]='%s' has no element 'href' - SKIPPED!", link["rel"])
                     continue
                 elif link["href"] in [None, ""]:
-                    logger.debug("link[href]='%s',link[rel]='%s' - SKIPPED!", link["href"], link["rel"])
+                    logger.debug("link[href]='%s' is empty, link[rel]='%s' - SKIPPED!", link["href"], link["rel"])
+                    continue
+                elif not validators.url(link["href"]):
+                    logger.warning("link[href]='%s' is not a valid domain - SKIPPED!", link["href"])
                     continue
 
                 # Default is that 'href' has a complete URL, but some hosts don't send that
                 logger.debug("link[rel]='%s' matches niid='%s'", link["rel"], niid)
                 url = link["href"].lower()
+
+                logger.debug("Parsing url='%s' ...", url)
                 components = urlparse(url)
 
                 logger.debug("components[%s]='%s'", type(components), components)