Continued:

author Roland Häder <roland@mxchange.org>

Sun, 10 Dec 2023 08:04:23 +0000 (09:04 +0100)

committer Roland Häder <roland@mxchange.org>

Sun, 10 Dec 2023 08:04:23 +0000 (09:04 +0100)
author Roland Häder <roland@mxchange.org>
Sun, 10 Dec 2023 08:04:23 +0000 (09:04 +0100)
committer Roland Häder <roland@mxchange.org>
Sun, 10 Dec 2023 08:04:23 +0000 (09:04 +0100)
diff --git a/fba/http/nodeinfo.py b/fba/http/nodeinfo.py

index 5f78a44050e80e68d74586db23147f0b43119850..979173eba078b9874a5041bbc644b435f65ea175 100644 (file)
--- a/fba/http/nodeinfo.py
+++ b/fba/http/nodeinfo.py
@@ -14,6 +14,7 @@
  # along with this program.  If not, see <https://www.gnu.org/licenses/>.
  
  import logging
+import validators
  
  from urllib.parse import urlparse
  
@@ -191,11 +192,11 @@ def fetch_wellknown_nodeinfo(domain: str) -> dict:
          logger.debug("Marking domain='%s' as successfully handled ...", domain)
          instances.set_success(domain)
  
-        logger.debug("Found infos[links]()=%d record(s),", len(infos["links"]))
+        logger.debug("Checking %d nodeinfo ids ...", len(_nodeinfo_identifier))
          for niid in _nodeinfo_identifier:
              data = dict()
  
-            logger.debug("Checking niid='%s' ...", niid)
+            logger.debug("Checking niid='%s' for infos[links]()=%d ...", niid, len(infos["links"]))
              for link in infos["links"]:
                  logger.debug("link[%s]='%s'", type(link), link)
                  if not isinstance(link, dict) or not "rel" in link:
@@ -208,12 +209,17 @@ def fetch_wellknown_nodeinfo(domain: str) -> dict:
                      logger.warning("link[rel]='%s' has no element 'href' - SKIPPED!", link["rel"])
                      continue
                  elif link["href"] in [None, ""]:
-                    logger.debug("link[href]='%s',link[rel]='%s' - SKIPPED!", link["href"], link["rel"])
+                    logger.debug("link[href]='%s' is empty, link[rel]='%s' - SKIPPED!", link["href"], link["rel"])
+                    continue
+                elif not validators.url(link["href"]):
+                    logger.warning("link[href]='%s' is not a valid domain - SKIPPED!", link["href"])
                      continue
  
                  # Default is that 'href' has a complete URL, but some hosts don't send that
                  logger.debug("link[rel]='%s' matches niid='%s'", link["rel"], niid)
                  url = link["href"].lower()
+
+                logger.debug("Parsing url='%s' ...", url)
                  components = urlparse(url)
  
                  logger.debug("components[%s]='%s'", type(components), components)
author	Roland Häder <roland@mxchange.org>
	Sun, 10 Dec 2023 08:04:23 +0000 (09:04 +0100)
committer	Roland Häder <roland@mxchange.org>
	Sun, 10 Dec 2023 08:04:23 +0000 (09:04 +0100)