Continued:

author Roland Häder <roland@mxchange.org>

Sun, 20 Apr 2025 23:55:31 +0000 (01:55 +0200)

committer Roland Häder <roland@mxchange.org>

Sun, 20 Apr 2025 23:55:31 +0000 (01:55 +0200)
author Roland Häder <roland@mxchange.org>
Sun, 20 Apr 2025 23:55:31 +0000 (01:55 +0200)
committer Roland Häder <roland@mxchange.org>
Sun, 20 Apr 2025 23:55:31 +0000 (01:55 +0200)
diff --git a/fba/commands.py b/fba/commands.py

index 9730c2acfaf9ac46d367ed55eaba1a3968334602..011cae1c0d1a3c5bbb26c4c9ba73cf569e622209 100644 (file)
--- a/fba/commands.py
+++ b/fba/commands.py
@@ -19,8 +19,7 @@ import json
  import logging
  import numpy
  import time
-
-from urllib.parse import urlparse
+import urllib
  
  import argparse
  import atoma
@@ -838,16 +837,16 @@ def fetch_fba_rss(args: argparse.Namespace) -> int:
      logger.debug("Invoking locking.acquire() ...")
      locking.acquire()
  
-    components = urlparse(args.feed)
-    domain = components.netloc.lower().split(":")[0]
+    components = urllib.parse.urlparse(args.feed)
+    hostname = components.netloc.lower().split(":")[0]
  
-    logger.debug("domain='%s'", domain)
-    if sources.is_recent(domain):
-        logger.info("API from domain='%s' has recently being accessed - EXIT!", domain)
+    logger.debug("hostname='%s'", hostname)
+    if sources.is_recent(hostname):
+        logger.info("API from hostname='%s' has recently being accessed - EXIT!", hostname)
          return 0
      else:
-        logger.debug("domain='%s' has not been recently used, marking ...", domain)
-        sources.update(domain)
+        logger.debug("hostname='%s' has not been recently used, marking ...", hostname)
+        sources.update(hostname)
  
      logger.info("Fetch FBA-specific RSS args.feed='%s' ...", args.feed)
      response = network.fetch_url(
@@ -865,16 +864,21 @@ def fetch_fba_rss(args: argparse.Namespace) -> int:
          for item in rss.items:
              logger.debug("item[%s]='%s'", type(item), item)
              domain = item.link.split("=")[1]
+
+            logger.debug("domain='%s',tidyup - BEFORE!", domain)
              domain = tidyup.domain(domain) if domain not in[None, ""] else None
-            logger.debug("domain='%s' - AFTER!", domain)
+            logger.debug("domain='%s',tidyup - AFTER!", domain)
  
              if domain in [None, ""]:
                  logger.debug("domain[%s]='%s' is empty after tidyup.domain() - SKIPPED!", type(domain), domain)
                  continue
+            elif not domain_helper.is_wanted(domain):
+                logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
+                continue
  
-            logger.debug("domain='%s' - BEFORE!", domain)
+            logger.debug("domain='%s',idna - BEFORE!", domain)
              domain = domain_helper.encode_idna(domain)
-            logger.debug("domain='%s' - AFTER!", domain)
+            logger.debug("domain='%s',idna - AFTER!", domain)
  
              if not domain_helper.is_wanted(domain):
                  logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
@@ -921,7 +925,7 @@ def fetch_fbabot_atom(args: argparse.Namespace) -> int:
      if args.feed is not None and validators.url(args.feed):
          logger.debug("Setting feed='%s' ...", args.feed)
          feed = str(args.feed)
-        source_domain = urlparse(args.feed).netloc
+        source_domain = urllib.parse.urlparse(args.feed).netloc
  
      if sources.is_recent(source_domain):
          logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
@@ -1740,7 +1744,7 @@ def fetch_relaylist(args: argparse.Namespace) -> int:
              continue
  
          logger.debug("row[url]='%s' - BEFORE!", row["url"])
-        domain = urlparse(row["url"]).netloc.lower().split(":")[0]
+        domain = urllib.parse.urlparse(row["url"]).netloc.lower().split(":")[0]
          logger.debug("domain='%s' - AFTER!", domain)
  
          if domain in [None, ""]:
@@ -1776,6 +1780,9 @@ def fetch_relays(args: argparse.Namespace) -> int:
      logger.debug("Invoking locking.acquire() ...")
      locking.acquire()
  
+    # Init domain list
+    domains = []
+
      if args.domain not in [None, ""]:
          logger.debug("Fetching instances record for args.domain='%s' ...", args.domain)
          database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE software IN ('activityrelay', 'aoderelay', 'selective-relay', 'pub-relay') AND domain = ? LIMIT 1", [args.domain])
@@ -1786,7 +1793,6 @@ def fetch_relays(args: argparse.Namespace) -> int:
          logger.debug("Fetch all relay instances ...")
          database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE software IN ('activityrelay', 'aoderelay', 'selective-relay', 'pub-relay') AND nodeinfo_url IS NOT NULL ORDER BY last_updated DESC")
  
-    domains = []
      rows = database.cursor.fetchall()
  
      logger.info("Checking %d relays ...", len(rows))
@@ -1796,7 +1802,10 @@ def fetch_relays(args: argparse.Namespace) -> int:
              logger.debug("row[domain]='%s' has recently been fetched - SKIPPED!", row["domain"])
              continue
  
+        # Init variables
          peers = []
+        doc = None
+
          try:
              logger.debug("row[domain]='%s',row[software]='%s' - checking ....", row["domain"], row["software"])
              if row["software"] == "pub-relay":
@@ -1909,7 +1918,7 @@ def fetch_relays(args: argparse.Namespace) -> int:
                      logger.warning("tag[%s]='%s' is not type of 'bs4.element.Tag' - SKIPPED!", type(tag), tag)
                      continue
  
-                components = urlparse(link.get("href"))
+                components = urllib.parse.urlparse(link.get("href"))
                  logger.debug("components(%d)='%s'", len(components), components)
                  domain = components.netloc.lower().split(":")[0]
  
diff --git a/fba/helpers/domain.py b/fba/helpers/domain.py

index 2daf419949d2cc6964dc39cd47c832f25ca0baf5..965796a08a166aadd8ecc833cd0498f8b17d83dd 100644 (file)
--- a/fba/helpers/domain.py
+++ b/fba/helpers/domain.py
@@ -15,9 +15,9 @@
  # along with this program.  If not, see <https://www.gnu.org/licenses/>.
  
  import logging
+import urllib
  
  from functools import lru_cache
-from urllib.parse import urlparse
  
  import validators
  
@@ -71,7 +71,7 @@ def is_in_url(domain: str, url: str) -> bool:
      punycode = encode_idna(domain)
      logger.debug("punycode='%s'", punycode)
  
-    components = urlparse(url)
+    components = urllib.parse.urlparse(url)
      logger.debug("components[]='%s',punycode='%s'", type(components), punycode)
  
      is_found = (punycode in [components.netloc, components.hostname])
@@ -147,7 +147,7 @@ def encode_idna(domain: str) -> str:
      logger.debug("punycode='%s' - AFTER!", punycode)
  
      if "/" in punycode:
-        components = urlparse("https://" + punycode)
+        components = urllib.parse.urlparse("https://" + punycode)
          logger.debug("components[%s](%d)='%s'", type(components), len(components), components)
  
          punycode = components.netloc.encode("idna").decode("utf-8") + components.path
diff --git a/fba/http/federation.py b/fba/http/federation.py

index 7a8b4ce79d738f1b3ca0dcb5b2c2211eba6ee764..0b9eced0ef1a5fd1a0c965a8ca65dfa024092c0e 100644 (file)
--- a/fba/http/federation.py
+++ b/fba/http/federation.py
@@ -14,8 +14,7 @@
  # along with this program.  If not, see <https://www.gnu.org/licenses/>.
  
  import logging
-
-from urllib.parse import urlparse
+import urllib
  
  import bs4
  import requests
@@ -159,7 +158,7 @@ def fetch_instances(domain: str, origin: str, software: str, command: str, path:
                  logger.warning("instance[url]='%s' is not a valid URL - SKIPPED!", instance["url"])
                  continue
  
-            components = urlparse(instance["url"])
+            components = urllib.parse.urlparse(instance["url"])
              logger.debug("components[%s]()=%d", type(components), len(components))
  
              instance = components.netloc.lower().split(":")[0]
@@ -366,7 +365,7 @@ def fetch_generator_from_path(domain: str, path: str = "/") -> str:
      elif not domain_helper.is_in_url(domain, response_url):
          logger.warning("domain='%s' doesn't match response.url='%s', maybe redirect to other domain?", domain, response.url)
  
-        components = urlparse(response.url)
+        components = urllib.parse.urlparse(response.url)
          domain2 = components.netloc.lower().split(":")[0]
  
          logger.debug("domain2='%s'", domain2)
diff --git a/fba/http/network.py b/fba/http/network.py

index bf121beddfebb78fa888868e696d620336762129..006234f0ffc9c928bff0e45f23536e69d67b587a 100644 (file)
--- a/fba/http/network.py
+++ b/fba/http/network.py
@@ -17,8 +17,7 @@
  import csv
  import logging
  import time
-
-from urllib.parse import urlparse
+import urllib
  
  import eventlet
  import reqto
@@ -348,7 +347,7 @@ def fetch_url(url: str, headers: dict, timeout: tuple, allow_redirects: bool = T
          raise TypeError(f"Parameter allow_redirects[]='{type(allow_redirects)}' has not expected type 'bool'")
  
      logger.debug("Parsing url='%s' ...", url)
-    components = urlparse(url)
+    components = urllib.parse.urlparse(url)
  
      # Invoke other function, avoid trailing ?
      logger.debug("components[%s]='%s'", type(components), components)
@@ -365,7 +364,7 @@ def fetch_url(url: str, headers: dict, timeout: tuple, allow_redirects: bool = T
          logger.debug("Fetching path='%s' from netloc='%s' ...", components.path, components.netloc)
          response = _fetch_response(
              components.netloc.split(":")[0],
-            components.path if isinstance(components.path, str) and components.path != '' else '/',
+            components.path if isinstance(components.path, str) and components.path != "" else "/",
              headers=headers,
              timeout=timeout,
              allow_redirects=allow_redirects
diff --git a/fba/http/nodeinfo.py b/fba/http/nodeinfo.py

index 6d0995ec7dfc6e58098d353578c0a0548d2264e3..b656ff4385dd4dbf764211ba21f0a17055b5e741 100644 (file)
--- a/fba/http/nodeinfo.py
+++ b/fba/http/nodeinfo.py
@@ -14,10 +14,9 @@
  # along with this program.  If not, see <https://www.gnu.org/licenses/>.
  
  import logging
+import urllib
  import validators
  
-from urllib.parse import urlparse
-
  from fba.helpers import blacklist
  from fba.helpers import config
  from fba.helpers import domain as domain_helper
@@ -227,17 +226,17 @@ def fetch_wellknown_nodeinfo(domain: str) -> dict:
                  url = link["href"].lower()
  
                  logger.debug("Parsing url='%s' ...", url)
-                components = urlparse(url)
+                components = urllib.parse.urlparse(url)
  
                  logger.debug("components[%s]='%s'", type(components), components)
                  if components.scheme == "" and components.netloc == "":
                      logger.warning("link[href]='%s' has no scheme and host name in it, prepending from domain='%s'", link['href'], domain)
                      url = f"https://{domain}{url}"
-                    components = urlparse(url)
+                    components = urllib.parse.urlparse(url)
                  elif components.netloc == "":
                      logger.warning("link[href]='%s' has no netloc set, setting domain='%s'", link["href"], domain)
                      url = f"{components.scheme}://{domain}{components.path}"
-                    components = urlparse(url)
+                    components = urllib.parse.urlparse(url)
  
                  domain2 = components.netloc.lower().split(":")[0]
                  logger.debug("domain2='%s'", domain2)
author	Roland Häder <roland@mxchange.org>
	Sun, 20 Apr 2025 23:55:31 +0000 (01:55 +0200)
committer	Roland Häder <roland@mxchange.org>
	Sun, 20 Apr 2025 23:55:31 +0000 (01:55 +0200)
fba/commands.py		patch \| blob \| history
fba/helpers/domain.py		patch \| blob \| history
fba/http/federation.py		patch \| blob \| history
fba/http/network.py		patch \| blob \| history
fba/http/nodeinfo.py		patch \| blob \| history