From: Roland Häder <roland@mxchange.org> Date: Wed, 4 Jun 2025 10:07:38 +0000 (+0200) Subject: Continued: X-Git-Url: https://git.mxchange.org/?a=commitdiff_plain;h=refs%2Fheads%2Fmaster;p=fba.git Continued: - federation.fetch_instances() must encode domain to IDNA, too - renamed variables --- diff --git a/fba/helpers/processing.py b/fba/helpers/processing.py index f21ff53..09f223c 100644 --- a/fba/helpers/processing.py +++ b/fba/helpers/processing.py @@ -339,8 +339,8 @@ def csv_instance(instance: str, url: str, command: str) -> None: logger.debug("Invoking commit() ...") database.connection.commit() - logger.debug("Invoking instances.set_total_instances(%s, domains()=%d) ...", instance, len(domains)) - instances.set_total_instances(instance, domains) + logger.debug("Invoking instances.set_total_peers(%s, domains()=%d) ...", instance, len(domains)) + instances.set_total_peers(instance, domains) logger.debug("Checking if instance='%s' has pending updates ...", instance) if instances.has_pending(instance): diff --git a/fba/http/federation.py b/fba/http/federation.py index 7c996e9..805adce 100644 --- a/fba/http/federation.py +++ b/fba/http/federation.py @@ -92,13 +92,17 @@ def fetch_instances(domain: str, origin: str, software: str, command: str, path: elif not isinstance(software, str): raise TypeError(f"Parameter software[]='{type(software)}' has not expected type 'str'") + logger.debug("domain='%s' - BEFORE!", domain) + instance = domain_helper.encode_idna(domain.split("?")[0]) + logger.debug("instance='%s' - AFTER!", instance) + # Increase depth _DEPTH = _DEPTH + 1 - logger.debug("Checking if domain='%s' is registered ...", domain) - if not instances.is_registered(domain): - logger.debug("Adding new domain='%s',origin='%s',command='%s',path='%s',software='%s'", domain, origin, command, path, software) - instances.add(domain, origin, command, path, software) + logger.debug("Checking if instance='%s' is registered ...", instance) + if not instances.is_registered(instance): + logger.debug("Adding new instance='%s',origin='%s',command='%s',path='%s',software='%s'", instance, origin, command, path, software) + instances.add(instance, origin, command, path, software) logger.debug("software='%s'", software) if software is not None and software_helper.is_relay(software): @@ -106,78 +110,78 @@ def fetch_instances(domain: str, origin: str, software: str, command: str, path: _DEPTH = _DEPTH - 1 return - logger.debug("Updating last_instance_fetch for domain='%s' ...", domain) - instances.set_last_instance_fetch(domain) + logger.debug("Updating last_instance_fetch for instance='%s' ...", instance) + instances.set_last_instance_fetch(instance) peerlist = [] logger.debug("software='%s'", software) if software is not None and not software_helper.is_relay(software): try: - logger.debug("Fetching instances for domain='%s',software='%s',origin='%s'", domain, software, origin) - peerlist = fetch_peers(domain, software, origin) + logger.debug("Fetching instances for instance='%s',software='%s',origin='%s'", instance, software, origin) + peerlist = fetch_peers(instance, software, origin) except network.exceptions as exception: _DEPTH = _DEPTH - 1 raise exception logger.debug("peerlist[]='%s'", type(peerlist)) if isinstance(peerlist, list): - logger.debug("Invoking instances.set_total_peerlist(%s,%d) ...", domain, len(peerlist)) - instances.set_total_peers(domain, peerlist) + logger.debug("Invoking instances.set_total_peerlist(%s,%d) ...", instance, len(peerlist)) + instances.set_total_peers(instance, peerlist) - logger.debug("Invoking cookies.clear(%s) ...", domain) - cookies.clear(domain) + logger.debug("Invoking cookies.clear(%s) ...", instance) + cookies.clear(instance) logger.debug("peerlist[]='%s'", type(peerlist)) if peerlist is None: - logger.warning("Cannot fetch peers: domain='%s',software='%s'", domain, software) - if instances.has_pending(domain): - logger.debug("Flushing updates for domain='%s' ...", domain) - instances.update(domain) + logger.warning("Cannot fetch peers: instance='%s',software='%s'", instance, software) + if instances.has_pending(instance): + logger.debug("Flushing updates for instance='%s' ...", instance) + instances.update(instance) _DEPTH = _DEPTH - 1 logger.debug("EXIT!") return elif len(peerlist) == 0: - logger.info("domain='%s' returned an empty peer list.", domain) - if instances.has_pending(domain): - logger.debug("Flushing updates for domain='%s' ...", domain) - instances.update(domain) + logger.info("instance='%s' returned an empty peer list.", instance) + if instances.has_pending(instance): + logger.debug("Flushing updates for instance='%s' ...", instance) + instances.update(instance) _DEPTH = _DEPTH - 1 - logger.debug("domain='%s',software='%s' has an empty peer list returned - EXIT!", domain, software) + logger.debug("instance='%s',software='%s' has an empty peer list returned - EXIT!", instance, software) return - logger.info("Checking %d instance(s) from domain='%s',software='%s',depth=%d ...", len(peerlist), domain, software, _DEPTH) - for instance in peerlist: - logger.debug("instance[%s]='%s'", type(instance), instance) - if instance in [None, ""]: - logger.debug("instance[%s]='%s' is either None or empty - SKIPPED!", type(instance), instance) + logger.info("Checking %d instance(s) from instance='%s',software='%s',depth=%d ...", len(peerlist), instance, software, _DEPTH) + for peer in peerlist: + logger.debug("peer[%s]='%s'", type(peer), peer) + if peer in [None, ""]: + logger.debug("peer[%s]='%s' is either None or empty - SKIPPED!", type(peer), peer) continue - elif isinstance(instance, dict) and "url" in instance: - logger.debug("Found instance[url]='%s', extracting domain/host name ...", instance["url"]) - if not validators.url(instance["url"]): - logger.warning("instance[url]='%s' is not a valid URL - SKIPPED!", instance["url"]) + elif isinstance(peer, dict) and "url" in peer: + logger.debug("Found peer[url]='%s', extracting domain/host name ...", peer["url"]) + if not validators.url(peer["url"]): + logger.warning("peer[url]='%s' is not a valid URL - SKIPPED!", peer["url"]) continue - components = urllib.parse.urlparse(instance["url"]) + components = urllib.parse.urlparse(peer["url"]) logger.debug("components[%s]()=%d", type(components), len(components)) - instance = components.netloc.lower().split(":")[0] - logger.debug("instance='%s'", instance) + peer = components.netloc.lower().split(":")[0] + logger.debug("peer='%s'", peer) - logger.debug("instance='%s' - BEFORE!", instance) - instance = tidyup.domain(instance) if isinstance(instance, str) and instance != "" else None - logger.debug("instance='%s' - AFTER!", instance) + logger.debug("peer='%s' - BEFORE!", peer) + peer = tidyup.domain(peer) if isinstance(peer, str) and peer != "" else None + logger.debug("peer='%s' - AFTER!", peer) - if instance in [None, ""]: - logger.debug("instance[%s]='%s' is empty after tidyup.domain(), domain='%s'", type(instance), instance, domain) + if peer in [None, ""]: + logger.debug("peer[%s]='%s' is empty after tidyup.domain(), domain='%s'", type(peer), peer, domain) continue - elif ".." in instance: - logger.warning("instance='%s' contains double-dot, removing ...", instance) - instance = instance.replace("..", ".") + elif ".." in peer: + logger.warning("peer='%s' contains double-dot, removing ...", peer) + peer = peer.replace("..", ".") - probe = instance.split("/")[0] - logger.debug("instance='%s',probe='%s'", instance, probe) + probe = peer.split("/")[0] + logger.debug("peer='%s',probe='%s'", peer, probe) if not validators.domain(probe, rfc_2782=True): logger.warning("probe='%s' is not a valid domain - SKIPPED!", probe) continue @@ -185,32 +189,32 @@ def fetch_instances(domain: str, origin: str, software: str, command: str, path: logger.debug("probe='%s' has an unwanted TLD - SKIPPED!", probe) continue - logger.debug("instance='%s' - BEFORE!", instance) - instance = domain_helper.encode_idna(instance.split("?")[0]) - logger.debug("instance='%s' - AFTER!", instance) + logger.debug("peer='%s' - BEFORE!", peer) + peer = domain_helper.encode_idna(peer.split("?")[0]) + logger.debug("peer='%s' - AFTER!", peer) - if not domain_helper.is_wanted(instance): - logger.debug("instance='%s' is not wanted - SKIPPED!", instance) + if not domain_helper.is_wanted(peer): + logger.debug("peer='%s' is not wanted - SKIPPED!", peer) continue - elif instance.find("/profile/") > 0 or instance.find("/users/") > 0 or (instances.is_registered(instance.split("/")[0]) and instance.find("/c/") > 0): - logger.debug("instance='%s' is a link to a single user profile - SKIPPED!", instance) + elif peer.find("/profile/") > 0 or peer.find("/users/") > 0 or (instances.is_registered(peer.split("/")[0]) and peer.find("/c/") > 0): + logger.debug("peer='%s' is a link to a single user profile - SKIPPED!", peer) continue - elif instance.find("/tag/") > 0: - logger.debug("instance='%s' is a link to a tag - SKIPPED!", instance) + elif peer.find("/tag/") > 0: + logger.debug("peer='%s' is a link to a tag - SKIPPED!", peer) continue - elif not instances.is_registered(instance): + elif not instances.is_registered(peer): logger.debug("Checking if domain='%s' has pending updates ...", domain) if instances.has_pending(domain): logger.debug("Flushing updates for domain='%s' ...", domain) instances.update(domain) - logger.debug("instance='%s',origin='%s',_DEPTH=%d reached!", instance, origin, _DEPTH) + logger.debug("peer='%s',origin='%s',_DEPTH=%d reached!", peer, origin, _DEPTH) if _DEPTH <= _max_crawl_depth and len(peerlist) >= _min_peers_length: - logger.debug("Fetching instance='%s',origin='%s',command='%s',path='%s',_DEPTH=%d ...", instance, domain, command, path, _DEPTH) - fetch_instances(instance, domain, None, command, path) + logger.debug("Fetching peer='%s',origin='%s',command='%s',path='%s',_DEPTH=%d ...", peer, domain, command, path, _DEPTH) + fetch_instances(peer, domain, None, command, path) else: - logger.debug("Adding instance='%s',domain='%s',command='%s',_DEPTH=%d ...", instance, domain, command, _DEPTH) - instances.add(instance, domain, command) + logger.debug("Adding peer='%s',domain='%s',command='%s',_DEPTH=%d ...", peer, domain, command, _DEPTH) + instances.add(peer, domain, command) logger.debug("Checking if domain='%s' has pending updates ...", domain) if instances.has_pending(domain):