From e66aa094809d60fc6e784f1131015205332366ec Mon Sep 17 00:00:00 2001
From: =?utf8?q?Roland=20H=C3=A4der?= <roland@mxchange.org>
Date: Tue, 11 Jul 2023 15:21:38 +0200
Subject: [PATCH] Continued: - ops, header was wrong here due to previous
 changes (search for all headers) - but after a few renames, all is back in
 order!

---
 fba/networks/lemmy.py | 49 ++++++++++++++++++++++++++-----------------
 1 file changed, 30 insertions(+), 19 deletions(-)

diff --git a/fba/networks/lemmy.py b/fba/networks/lemmy.py
index 3d99838..0b22f53 100644
--- a/fba/networks/lemmy.py
+++ b/fba/networks/lemmy.py
@@ -145,17 +145,18 @@ def fetch_blocks(domain: str, nodeinfo_url: str) -> list:
             logger.debug("doc[]='%s'", type(doc))
 
             found = None
-            for container in [{"class": "home-instances container-lg"}, {"class": "container"}]:
-                logger.debug("container='%s'", container)
-                headers = doc.findAll("div", container)
-
-                logger.debug("Checking %d header(s) ...", len(headers))
-                for header in headers:
-                    logger.debug("header[]='%s'", type(header))
-                    for content in header.find_all(["h2", "h3", "h4", "h5"]): 
-                        logger.debug("content[%s]='%s' - BEFORE!", type(content), content)
-                        if content is not None:
-                            content = str(content.contents[0])
+            for criteria in [{"class": "home-instances container-lg"}, {"class": "container"}]:
+                logger.debug("criteria='%s'", criteria)
+                containers = doc.findAll("div", criteria)
+
+                logger.debug("Checking %d containers ...", len(containers))
+                for container in containers:
+                    logger.debug("container[]='%s'", type(container))
+                    for header in container.find_all(["h2", "h3", "h4", "h5"]): 
+                        content = header
+                        logger.debug("header[%s]='%s' - BEFORE!", type(header), header)
+                        if header is not None:
+                            content = str(header.contents[0])
                         logger.debug("content[%s]='%s' - AFTER!", type(content), content)
 
                         if content is None:
@@ -165,10 +166,20 @@ def fetch_blocks(domain: str, nodeinfo_url: str) -> list:
                             logger.debug("content[]='%s' is not supported/wanted type 'str' - SKIPPED!", type(content))
                             continue
                         elif content.lower() in translations:
-                            logger.debug("Found header with blocked instances - BREAK!")
+                            logger.debug("Found header='%s' with blocked instances - BREAK(3) !", header)
                             found = header
                             break
 
+                    logger.debug("found[]='%s'", type(found))
+                    if found is not None:
+                        logger.debug("Found header with blocked instances - BREAK(2) !")
+                        break
+
+                logger.debug("found[]='%s'", type(found))
+                if found is not None:
+                    logger.debug("Found header with blocked instances - BREAK(1) !")
+                    break
+
             logger.debug("found[]='%s'", type(found))
             if found is None:
                 logger.info("domain='%s' has no HTML blocklist, checking scripts ...", domain)
@@ -187,7 +198,7 @@ def fetch_blocks(domain: str, nodeinfo_url: str) -> list:
                 logger.debug("blocklist()=%d - EXIT!", len(blocklist))
                 return blocklist
 
-            blocking = found.find_next(["ul","table"]).findAll("a")
+            blocking = found.find_next(["ul", "table"]).findAll("a")
             logger.debug("Found %d blocked instance(s) ...", len(blocking))
             for tag in blocking:
                 logger.debug("tag[]='%s'", type(tag))
@@ -239,16 +250,16 @@ def fetch_instances(domain: str, origin: str) -> list:
             doc = bs4.BeautifulSoup(response.text, "html.parser")
             logger.debug("doc[]='%s'", type(doc))
 
-            for container in [{"class": "home-instances container-lg"}, {"class": "container"}]:
-                logger.debug("container='%s'", container)
-                headers = doc.findAll("div", container)
+            for criteria in [{"class": "home-instances container-lg"}, {"class": "container"}]:
+                logger.debug("criteria='%s'", criteria)
+                containers = doc.findAll("div", criteria)
 
-                logger.debug("Checking %d headers ...", len(headers))
-                for header in headers:
+                logger.debug("Checking %d containers ...", len(containers))
+                for header in containers:
                     logger.debug("header[%s]='%s'", type(header), header)
 
                     rows = header.find_next(["ul","table"]).findAll("a")
-                    logger.debug("Found %d blocked instance(s) ...", len(rows))
+                    logger.debug("Found %d instance(s) ...", len(rows))
                     for tag in rows:
                         logger.debug("tag[]='%s'", type(tag))
                         text = tag.contents[0] if isinstance(tag.contents[0], str) else tag.contents[0].text
-- 
2.39.5