]> git.mxchange.org Git - fba.git/commitdiff
Continued:
authorRoland Häder <roland@mxchange.org>
Sat, 16 Dec 2023 08:51:12 +0000 (09:51 +0100)
committerRoland Häder <roland@mxchange.org>
Sat, 16 Dec 2023 08:56:08 +0000 (09:56 +0100)
- added missing 'continue'
- added some debug messages
- skip empty/NoneType domain names

fba/commands.py
fba/networks/lemmy.py

index 2d2635703a0e391882c5b31e0c9f618bb9a22b20..5ddb44adecf267782e13f72ae1b9af18e169ca90 100644 (file)
@@ -1611,7 +1611,7 @@ def fetch_instances_social(args: argparse.Namespace) -> int:
             logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
             continue
 
-        logger.info("Fetching instances from domain='%s'", domain)
+        logger.info("Fetching instances from domain='%s' ...", domain)
         federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
 
     logger.debug("Success! - EXIT!")
index d216cf8cb461c74924ce768f22dabfb8c6f4ee1d..7d058d596e0bb381286b1cb8a40fa0d973e41501 100644 (file)
@@ -35,7 +35,7 @@ logger = logging.getLogger(__name__)
 #logger.setLevel(logging.DEBUG)
 
 # Lemmy translations
-translations = [
+_translations = [
     "Blocked Instances".lower(),
     "Instàncies bloquejades".lower(),
     "Blocáilte Ásc".lower(),
@@ -113,6 +113,7 @@ def fetch_peers(domain: str, origin: str) -> list:
             logger.debug("Marking domain='%s' as successfully handled ...", domain)
             instances.set_success(domain)
 
+        logger.debug("peers()=%d", len(peers))
         if len(peers) == 0:
             logger.debug("Fetching instances for domain='%s' from /instances ...", domain)
             peers = fetch_instances(domain, origin)
@@ -173,7 +174,7 @@ def fetch_blocks(domain: str) -> list:
                         elif not isinstance(content, str):
                             logger.debug("content[]='%s' is not supported/wanted type 'str' - SKIPPED!", type(content))
                             continue
-                        elif content.lower() in translations:
+                        elif content.lower() in _translations:
                             logger.debug("Found header='%s' with blocked instances - BREAK(3) !", header)
                             found = header
                             break
@@ -278,7 +279,7 @@ def fetch_instances(domain: str, origin: str) -> list:
                         logger.debug("tag[]='%s'", type(tag))
                         text = tag.contents[0] if isinstance(tag.contents[0], str) else tag.contents[0].text
 
-                        logger.debug("text='%s' - BEFORE!", text)
+                        logger.debug("text[%s]='%s' - BEFORE!", type(text), text)
                         peer = tidyup.domain(text) if text != "" else None
                         logger.debug("peer='%s' - AFTER", peer)
 
@@ -299,6 +300,7 @@ def fetch_instances(domain: str, origin: str) -> list:
             if len(peers) == 0:
                 logger.debug("Found no peers for domain='%s', trying script tag ...", domain)
                 peers = parse_script(doc)
+                logger.debug("Parsing doc()=%d returned %d peer(s).", len(doc), len(peers))
         else:
             logger.warning("Cannot fetch /instances due to error: response.ok='%s',response.status_code=%d,response.details='%s'", response.ok, response.status_code, response.reason)
             instances.set_last_error(domain, response)
@@ -366,7 +368,7 @@ def parse_script(doc: bs4.BeautifulSoup, only: str = None) -> list:
         data = parsed["routeData"]["federatedInstancesResponse"]["data"]["federated_instances"]
         logger.debug("Checking %d data elements ...", len(data))
         for element in data:
-            logger.debug("element='%s'", element)
+            logger.debug("element[%s]='%s'", type(element), element)
             if isinstance(only, str) and only != element:
                 logger.debug("Skipping unwanted element='%s',only='%s'", element, only)
                 continue
@@ -377,6 +379,9 @@ def parse_script(doc: bs4.BeautifulSoup, only: str = None) -> list:
                 if "domain" not in row:
                     logger.warning("row()=%d has no element 'domain' - SKIPPED!", len(row))
                     continue
+                elif row["domain"] in [None, ""]:
+                    logger.debug("row[domain]='%s' is empty - SKIPPED!", row["domain"])
+                    continue
 
                 logger.debug("row[domain]='%s' - BEFORE!", row["domain"])
                 peer = tidyup.domain(row["domain"])
@@ -387,6 +392,7 @@ def parse_script(doc: bs4.BeautifulSoup, only: str = None) -> list:
                     continue
                 elif not domain_helper.is_wanted(peer):
                     logger.debug("peer='%s' is not wanted - SKIPPED!", peer)
+                    continue
                 elif peer in peers:
                     logger.debug("peer='%s' already added - SKIPPED!", peer)
                     continue