From: Roland Häder <roland@mxchange.org>
Date: Wed, 4 Jun 2025 10:07:38 +0000 (+0200)
Subject: Continued:
X-Git-Url: https://git.mxchange.org/?a=commitdiff_plain;h=refs%2Fheads%2Fmaster;p=fba.git

Continued:
- federation.fetch_instances() must encode domain to IDNA, too
- renamed variables
---

diff --git a/fba/helpers/processing.py b/fba/helpers/processing.py
index f21ff53..09f223c 100644
--- a/fba/helpers/processing.py
+++ b/fba/helpers/processing.py
@@ -339,8 +339,8 @@ def csv_instance(instance: str, url: str, command: str) -> None:
         logger.debug("Invoking commit() ...")
         database.connection.commit()
 
-    logger.debug("Invoking instances.set_total_instances(%s, domains()=%d) ...", instance, len(domains))
-    instances.set_total_instances(instance, domains)
+    logger.debug("Invoking instances.set_total_peers(%s, domains()=%d) ...", instance, len(domains))
+    instances.set_total_peers(instance, domains)
 
     logger.debug("Checking if instance='%s' has pending updates ...", instance)
     if instances.has_pending(instance):
diff --git a/fba/http/federation.py b/fba/http/federation.py
index 7c996e9..805adce 100644
--- a/fba/http/federation.py
+++ b/fba/http/federation.py
@@ -92,13 +92,17 @@ def fetch_instances(domain: str, origin: str, software: str, command: str, path:
     elif not isinstance(software, str):
         raise TypeError(f"Parameter software[]='{type(software)}' has not expected type 'str'")
 
+    logger.debug("domain='%s' - BEFORE!", domain)
+    instance = domain_helper.encode_idna(domain.split("?")[0])
+    logger.debug("instance='%s' - AFTER!", instance)
+
     # Increase depth
     _DEPTH = _DEPTH + 1
 
-    logger.debug("Checking if domain='%s' is registered ...", domain)
-    if not instances.is_registered(domain):
-        logger.debug("Adding new domain='%s',origin='%s',command='%s',path='%s',software='%s'", domain, origin, command, path, software)
-        instances.add(domain, origin, command, path, software)
+    logger.debug("Checking if instance='%s' is registered ...", instance)
+    if not instances.is_registered(instance):
+        logger.debug("Adding new instance='%s',origin='%s',command='%s',path='%s',software='%s'", instance, origin, command, path, software)
+        instances.add(instance, origin, command, path, software)
 
         logger.debug("software='%s'", software)
         if software is not None and software_helper.is_relay(software):
@@ -106,78 +110,78 @@ def fetch_instances(domain: str, origin: str, software: str, command: str, path:
             _DEPTH = _DEPTH - 1
             return
 
-    logger.debug("Updating last_instance_fetch for domain='%s' ...", domain)
-    instances.set_last_instance_fetch(domain)
+    logger.debug("Updating last_instance_fetch for instance='%s' ...", instance)
+    instances.set_last_instance_fetch(instance)
 
     peerlist = []
     logger.debug("software='%s'", software)
     if software is not None and not software_helper.is_relay(software):
         try:
-            logger.debug("Fetching instances for domain='%s',software='%s',origin='%s'", domain, software, origin)
-            peerlist = fetch_peers(domain, software, origin)
+            logger.debug("Fetching instances for instance='%s',software='%s',origin='%s'", instance, software, origin)
+            peerlist = fetch_peers(instance, software, origin)
         except network.exceptions as exception:
             _DEPTH = _DEPTH - 1
             raise exception
 
     logger.debug("peerlist[]='%s'", type(peerlist))
     if isinstance(peerlist, list):
-        logger.debug("Invoking instances.set_total_peerlist(%s,%d) ...", domain, len(peerlist))
-        instances.set_total_peers(domain, peerlist)
+        logger.debug("Invoking instances.set_total_peerlist(%s,%d) ...", instance, len(peerlist))
+        instances.set_total_peers(instance, peerlist)
 
-    logger.debug("Invoking cookies.clear(%s) ...", domain)
-    cookies.clear(domain)
+    logger.debug("Invoking cookies.clear(%s) ...", instance)
+    cookies.clear(instance)
 
     logger.debug("peerlist[]='%s'", type(peerlist))
     if peerlist is None:
-        logger.warning("Cannot fetch peers: domain='%s',software='%s'", domain, software)
-        if instances.has_pending(domain):
-            logger.debug("Flushing updates for domain='%s' ...", domain)
-            instances.update(domain)
+        logger.warning("Cannot fetch peers: instance='%s',software='%s'", instance, software)
+        if instances.has_pending(instance):
+            logger.debug("Flushing updates for instance='%s' ...", instance)
+            instances.update(instance)
 
         _DEPTH = _DEPTH - 1
         logger.debug("EXIT!")
         return
     elif len(peerlist) == 0:
-        logger.info("domain='%s' returned an empty peer list.", domain)
-        if instances.has_pending(domain):
-            logger.debug("Flushing updates for domain='%s' ...", domain)
-            instances.update(domain)
+        logger.info("instance='%s' returned an empty peer list.", instance)
+        if instances.has_pending(instance):
+            logger.debug("Flushing updates for instance='%s' ...", instance)
+            instances.update(instance)
 
         _DEPTH = _DEPTH - 1
-        logger.debug("domain='%s',software='%s' has an empty peer list returned - EXIT!", domain, software)
+        logger.debug("instance='%s',software='%s' has an empty peer list returned - EXIT!", instance, software)
         return
 
-    logger.info("Checking %d instance(s) from domain='%s',software='%s',depth=%d ...", len(peerlist), domain, software, _DEPTH)
-    for instance in peerlist:
-        logger.debug("instance[%s]='%s'", type(instance), instance)
-        if instance in [None, ""]:
-            logger.debug("instance[%s]='%s' is either None or empty - SKIPPED!", type(instance), instance)
+    logger.info("Checking %d instance(s) from instance='%s',software='%s',depth=%d ...", len(peerlist), instance, software, _DEPTH)
+    for peer in peerlist:
+        logger.debug("peer[%s]='%s'", type(peer), peer)
+        if peer in [None, ""]:
+            logger.debug("peer[%s]='%s' is either None or empty - SKIPPED!", type(peer), peer)
             continue
-        elif isinstance(instance, dict) and "url" in instance:
-            logger.debug("Found instance[url]='%s', extracting domain/host name ...", instance["url"])
-            if not validators.url(instance["url"]):
-                logger.warning("instance[url]='%s' is not a valid URL - SKIPPED!", instance["url"])
+        elif isinstance(peer, dict) and "url" in peer:
+            logger.debug("Found peer[url]='%s', extracting domain/host name ...", peer["url"])
+            if not validators.url(peer["url"]):
+                logger.warning("peer[url]='%s' is not a valid URL - SKIPPED!", peer["url"])
                 continue
 
-            components = urllib.parse.urlparse(instance["url"])
+            components = urllib.parse.urlparse(peer["url"])
             logger.debug("components[%s]()=%d", type(components), len(components))
 
-            instance = components.netloc.lower().split(":")[0]
-            logger.debug("instance='%s'", instance)
+            peer = components.netloc.lower().split(":")[0]
+            logger.debug("peer='%s'", peer)
 
-        logger.debug("instance='%s' - BEFORE!", instance)
-        instance = tidyup.domain(instance) if isinstance(instance, str) and instance != "" else None
-        logger.debug("instance='%s' - AFTER!", instance)
+        logger.debug("peer='%s' - BEFORE!", peer)
+        peer = tidyup.domain(peer) if isinstance(peer, str) and peer != "" else None
+        logger.debug("peer='%s' - AFTER!", peer)
 
-        if instance in [None, ""]:
-            logger.debug("instance[%s]='%s' is empty after tidyup.domain(), domain='%s'", type(instance), instance, domain)
+        if peer in [None, ""]:
+            logger.debug("peer[%s]='%s' is empty after tidyup.domain(), domain='%s'", type(peer), peer, domain)
             continue
-        elif ".." in instance:
-            logger.warning("instance='%s' contains double-dot, removing ...", instance)
-            instance = instance.replace("..", ".")
+        elif ".." in peer:
+            logger.warning("peer='%s' contains double-dot, removing ...", peer)
+            peer = peer.replace("..", ".")
 
-        probe = instance.split("/")[0]
-        logger.debug("instance='%s',probe='%s'", instance, probe)
+        probe = peer.split("/")[0]
+        logger.debug("peer='%s',probe='%s'", peer, probe)
         if not validators.domain(probe, rfc_2782=True):
             logger.warning("probe='%s' is not a valid domain - SKIPPED!", probe)
             continue
@@ -185,32 +189,32 @@ def fetch_instances(domain: str, origin: str, software: str, command: str, path:
             logger.debug("probe='%s' has an unwanted TLD - SKIPPED!", probe)
             continue
 
-        logger.debug("instance='%s' - BEFORE!", instance)
-        instance = domain_helper.encode_idna(instance.split("?")[0])
-        logger.debug("instance='%s' - AFTER!", instance)
+        logger.debug("peer='%s' - BEFORE!", peer)
+        peer = domain_helper.encode_idna(peer.split("?")[0])
+        logger.debug("peer='%s' - AFTER!", peer)
 
-        if not domain_helper.is_wanted(instance):
-            logger.debug("instance='%s' is not wanted - SKIPPED!", instance)
+        if not domain_helper.is_wanted(peer):
+            logger.debug("peer='%s' is not wanted - SKIPPED!", peer)
             continue
-        elif instance.find("/profile/") > 0 or instance.find("/users/") > 0 or (instances.is_registered(instance.split("/")[0]) and instance.find("/c/") > 0):
-            logger.debug("instance='%s' is a link to a single user profile - SKIPPED!", instance)
+        elif peer.find("/profile/") > 0 or peer.find("/users/") > 0 or (instances.is_registered(peer.split("/")[0]) and peer.find("/c/") > 0):
+            logger.debug("peer='%s' is a link to a single user profile - SKIPPED!", peer)
             continue
-        elif instance.find("/tag/") > 0:
-            logger.debug("instance='%s' is a link to a tag - SKIPPED!", instance)
+        elif peer.find("/tag/") > 0:
+            logger.debug("peer='%s' is a link to a tag - SKIPPED!", peer)
             continue
-        elif not instances.is_registered(instance):
+        elif not instances.is_registered(peer):
             logger.debug("Checking if domain='%s' has pending updates ...", domain)
             if instances.has_pending(domain):
                 logger.debug("Flushing updates for domain='%s' ...", domain)
                 instances.update(domain)
 
-            logger.debug("instance='%s',origin='%s',_DEPTH=%d reached!", instance, origin, _DEPTH)
+            logger.debug("peer='%s',origin='%s',_DEPTH=%d reached!", peer, origin, _DEPTH)
             if _DEPTH <= _max_crawl_depth and len(peerlist) >= _min_peers_length:
-                logger.debug("Fetching instance='%s',origin='%s',command='%s',path='%s',_DEPTH=%d ...", instance, domain, command, path, _DEPTH)
-                fetch_instances(instance, domain, None, command, path)
+                logger.debug("Fetching peer='%s',origin='%s',command='%s',path='%s',_DEPTH=%d ...", peer, domain, command, path, _DEPTH)
+                fetch_instances(peer, domain, None, command, path)
             else:
-                logger.debug("Adding instance='%s',domain='%s',command='%s',_DEPTH=%d ...", instance, domain, command, _DEPTH)
-                instances.add(instance, domain, command)
+                logger.debug("Adding peer='%s',domain='%s',command='%s',_DEPTH=%d ...", peer, domain, command, _DEPTH)
+                instances.add(peer, domain, command)
 
     logger.debug("Checking if domain='%s' has pending updates ...", domain)
     if instances.has_pending(domain):