Continued:

[fba.git] / fba / networks / lemmy.py
diff --git a/fba/networks/lemmy.py b/fba/networks/lemmy.py

index 18cf2c474a5c5a35a0e4b91cc1608413a1d23087..7b427fc91ca05db6a85238be3015bb2b76b41294 100644 (file)
--- a/fba/networks/lemmy.py
+++ b/fba/networks/lemmy.py
@@ -20,7 +20,6 @@ import logging
  import bs4
  
  from fba import csrf
-from fba import utils
  
  from fba.helpers import config
  from fba.helpers import domain as domain_helper
@@ -48,8 +47,10 @@ def fetch_peers(domain: str, origin: str) -> list:
          logger.debug("Checking CSRF for domain='%s'", domain)
          headers = csrf.determine(domain, dict())
      except network.exceptions as exception:
-        logger.warning("Exception '%s' during checking CSRF (fetch_peers,%s) - EXIT!", type(exception), __name__)
+        logger.warning("Exception '%s' during checking CSRF (fetch_peers,%s)", type(exception), __name__)
          instances.set_last_error(domain, exception)
+
+        logger.debug("Returning empty list ... - EXIT!")
          return list()
  
      try:
@@ -145,27 +146,41 @@ def fetch_blocks(domain: str, nodeinfo_url: str) -> list:
              logger.debug("doc[]='%s'", type(doc))
  
              found = None
-            for container in [{"class": "home-instances container-lg"}, {"class": "container"}]:
-                logger.debug("container='%s'", container)
-                headers = doc.findAll("div", container)
-
-                logger.debug("Checking %d header(s) ...", len(headers))
-                for header in headers:
-                    logger.debug("header[]='%s'", type(header))
-                    content = header.find(["h2", "h3", "h4", "h5"]).contents[0]
-
-                    logger.debug("content[%s]='%s'", type(content), content)
-                    if content is None:
-                        logger.debug("domain='%s' has returned empty header='%s' - SKIPPED!", domain, header)
-                        continue
-                    elif not isinstance(content, str):
-                        logger.debug("content[]='%s' is not supported/wanted type 'str' - SKIPPED!", type(content))
-                        continue
-                    elif content.lower() in translations:
-                        logger.debug("Found header with blocked instances - BREAK!")
-                        found = header
+            for criteria in [{"class": "home-instances container-lg"}, {"class": "container"}]:
+                logger.debug("criteria='%s'", criteria)
+                containers = doc.findAll("div", criteria)
+
+                logger.debug("Checking %d containers ...", len(containers))
+                for container in containers:
+                    logger.debug("container[]='%s'", type(container))
+                    for header in container.find_all(["h2", "h3", "h4", "h5"]):
+                        content = header
+                        logger.debug("header[%s]='%s' - BEFORE!", type(header), header)
+                        if header is not None:
+                            content = str(header.contents[0])
+                        logger.debug("content[%s]='%s' - AFTER!", type(content), content)
+
+                        if content is None:
+                            logger.debug("domain='%s' has returned empty header='%s' - SKIPPED!", domain, header)
+                            continue
+                        elif not isinstance(content, str):
+                            logger.debug("content[]='%s' is not supported/wanted type 'str' - SKIPPED!", type(content))
+                            continue
+                        elif content.lower() in translations:
+                            logger.debug("Found header='%s' with blocked instances - BREAK(3) !", header)
+                            found = header
+                            break
+
+                    logger.debug("found[]='%s'", type(found))
+                    if found is not None:
+                        logger.debug("Found header with blocked instances - BREAK(2) !")
                          break
  
+                logger.debug("found[]='%s'", type(found))
+                if found is not None:
+                    logger.debug("Found header with blocked instances - BREAK(1) !")
+                    break
+
              logger.debug("found[]='%s'", type(found))
              if found is None:
                  logger.info("domain='%s' has no HTML blocklist, checking scripts ...", domain)
@@ -184,7 +199,7 @@ def fetch_blocks(domain: str, nodeinfo_url: str) -> list:
                  logger.debug("blocklist()=%d - EXIT!", len(blocklist))
                  return blocklist
  
-            blocking = found.find_next(["ul","table"]).findAll("a")
+            blocking = found.find_next(["ul", "table"]).findAll("a")
              logger.debug("Found %d blocked instance(s) ...", len(blocking))
              for tag in blocking:
                  logger.debug("tag[]='%s'", type(tag))
@@ -194,7 +209,7 @@ def fetch_blocks(domain: str, nodeinfo_url: str) -> list:
                  if blocked == "":
                      logger.warning("blocked='%s' is empty after tidyup.domain() - SKIPPED!", tag.contents[0])
                      continue
-                elif not utils.is_domain_wanted(blocked):
+                elif not domain_helper.is_wanted(blocked):
                      logger.debug("blocked='%s' is not wanted - SKIPPED!", blocked)
                      continue
  
@@ -236,16 +251,16 @@ def fetch_instances(domain: str, origin: str) -> list:
              doc = bs4.BeautifulSoup(response.text, "html.parser")
              logger.debug("doc[]='%s'", type(doc))
  
-            for container in [{"class": "home-instances container-lg"}, {"class": "container"}]:
-                logger.debug("container='%s'", container)
-                headers = doc.findAll("div", container)
+            for criteria in [{"class": "home-instances container-lg"}, {"class": "container"}]:
+                logger.debug("criteria='%s'", criteria)
+                containers = doc.findAll("div", criteria)
  
-                logger.debug("Checking %d headers ...", len(headers))
-                for header in headers:
+                logger.debug("Checking %d containers ...", len(containers))
+                for header in containers:
                      logger.debug("header[%s]='%s'", type(header), header)
  
                      rows = header.find_next(["ul","table"]).findAll("a")
-                    logger.debug("Found %d blocked instance(s) ...", len(rows))
+                    logger.debug("Found %d instance(s) ...", len(rows))
                      for tag in rows:
                          logger.debug("tag[]='%s'", type(tag))
                          text = tag.contents[0] if isinstance(tag.contents[0], str) else tag.contents[0].text
@@ -255,7 +270,7 @@ def fetch_instances(domain: str, origin: str) -> list:
                          if peer == "":
                              logger.debug("peer is empty - SKIPPED!")
                              continue
-                        elif not utils.is_domain_wanted(peer):
+                        elif not domain_helper.is_wanted(peer):
                              logger.debug("peer='%s' is not wanted - SKIPPED!", peer)
                              continue
                          elif peer in peers:
@@ -284,7 +299,7 @@ def parse_script(doc: bs4.BeautifulSoup, only: str = None) -> list:
      logger.debug("doc[]='%s',only='%s' - CALLED!")
      if not isinstance(doc, bs4.BeautifulSoup):
          raise ValueError(f"Parameter doc[]='{type(only)}' is not of type 'bs4.BeautifulSoup'")
-    elif not isinstance(only, str) and only != None:
+    elif not isinstance(only, str) and only is not None:
          raise ValueError(f"Parameter only[]='{type(only)}' is not of type 'str'")
      elif isinstance(only, str) and only == "":
          raise ValueError("Parameter 'only' is empty")
@@ -304,14 +319,14 @@ def parse_script(doc: bs4.BeautifulSoup, only: str = None) -> list:
  
          logger.debug("script.contents[0][]='%s'", type(script.contents[0]))
  
-        isoData = script.contents[0].split("=")[1].strip().replace(":undefined", ":\"undefined\"")
-        logger.debug("isoData[%s]='%s'", type(isoData), isoData)
+        iso_data = script.contents[0].split("=")[1].strip().replace(":undefined", ":\"undefined\"")
+        logger.debug("iso_data[%s]='%s'", type(iso_data), iso_data)
  
          parsed = None
          try:
-            parsed = json.loads(isoData)
+            parsed = json.loads(iso_data)
          except json.decoder.JSONDecodeError as exception:
-            logger.warning("Exception '%s' during parsing %d Bytes: '%s'", type(exception), len(isoData), str(exception))
+            logger.warning("Exception '%s' during parsing %d Bytes: '%s' - EXIT!", type(exception), len(iso_data), str(exception))
              return list()
  
          logger.debug("parsed[%s]()=%d", type(parsed), len(parsed))
@@ -351,7 +366,7 @@ def parse_script(doc: bs4.BeautifulSoup, only: str = None) -> list:
                  if peer == "":
                      logger.debug("peer is empty - SKIPPED!")
                      continue
-                elif not utils.is_domain_wanted(peer):
+                elif not domain_helper.is_wanted(peer):
                      logger.debug("peer='%s' is not wanted - SKIPPED!", peer)
                      continue
                  elif peer in peers: