]> git.mxchange.org Git - fba.git/commitdiff
Continued:
authorRoland Häder <roland@mxchange.org>
Tue, 11 Jul 2023 06:14:33 +0000 (08:14 +0200)
committerRoland Häder <roland@mxchange.org>
Tue, 11 Jul 2023 06:14:33 +0000 (08:14 +0200)
- find more blocklists/peer lists from Lemmy by also scanning for (out-dated?
  class=container)

fba/networks/lemmy.py

index 1306bb8bf962f176687624855fefc9ae40aa248d..18cf2c474a5c5a35a0e4b91cc1608413a1d23087 100644 (file)
@@ -144,24 +144,27 @@ def fetch_blocks(domain: str, nodeinfo_url: str) -> list:
             doc = bs4.BeautifulSoup(response.text, "html.parser")
             logger.debug("doc[]='%s'", type(doc))
 
-            headers = doc.findAll("div", {"class": "home-instances container-lg"})
             found = None
-            logger.debug("Checking %d header(s) ...", len(headers))
-            for header in headers:
-                logger.debug("header[]='%s'", type(header))
-                content = header.contents[0]
-
-                logger.debug("content[%s]='%s'", type(content), content)
-                if content is None:
-                    logger.debug("domain='%s' has returned empty header='%s' - SKIPPED!", domain, header)
-                    continue
-                elif not isinstance(content, str):
-                    logger.debug("content[]='%s' is not supported/wanted type 'str' - SKIPPED!", type(content))
-                    continue
-                elif content.lower() in translations:
-                    logger.debug("Found header with blocked instances - BREAK!")
-                    found = header
-                    break
+            for container in [{"class": "home-instances container-lg"}, {"class": "container"}]:
+                logger.debug("container='%s'", container)
+                headers = doc.findAll("div", container)
+
+                logger.debug("Checking %d header(s) ...", len(headers))
+                for header in headers:
+                    logger.debug("header[]='%s'", type(header))
+                    content = header.find(["h2", "h3", "h4", "h5"]).contents[0]
+
+                    logger.debug("content[%s]='%s'", type(content), content)
+                    if content is None:
+                        logger.debug("domain='%s' has returned empty header='%s' - SKIPPED!", domain, header)
+                        continue
+                    elif not isinstance(content, str):
+                        logger.debug("content[]='%s' is not supported/wanted type 'str' - SKIPPED!", type(content))
+                        continue
+                    elif content.lower() in translations:
+                        logger.debug("Found header with blocked instances - BREAK!")
+                        found = header
+                        break
 
             logger.debug("found[]='%s'", type(found))
             if found is None:
@@ -233,31 +236,34 @@ def fetch_instances(domain: str, origin: str) -> list:
             doc = bs4.BeautifulSoup(response.text, "html.parser")
             logger.debug("doc[]='%s'", type(doc))
 
-            headers = doc.findAll("div", {"class": "home-instances container-lg"})
-            logger.debug("Checking %d headers ...", len(headers))
-            for header in headers:
-                logger.debug("header[%s]='%s'", type(header), header)
-
-                rows = header.find_next(["ul","table"]).findAll("a")
-                logger.debug("Found %d blocked instance(s) ...", len(rows))
-                for tag in rows:
-                    logger.debug("tag[]='%s'", type(tag))
-                    text = tag.contents[0] if isinstance(tag.contents[0], str) else tag.contents[0].text
-                    peer = tidyup.domain(text)
-                    logger.debug("peer='%s'", peer)
-
-                    if peer == "":
-                        logger.debug("peer is empty - SKIPPED!")
-                        continue
-                    elif not utils.is_domain_wanted(peer):
-                        logger.debug("peer='%s' is not wanted - SKIPPED!", peer)
-                        continue
-                    elif peer in peers:
-                        logger.debug("peer='%s' already added - SKIPPED!", peer)
-                        continue
-
-                    logger.debug("Appending peer='%s' ...", peer)
-                    peers.append(peer)
+            for container in [{"class": "home-instances container-lg"}, {"class": "container"}]:
+                logger.debug("container='%s'", container)
+                headers = doc.findAll("div", container)
+
+                logger.debug("Checking %d headers ...", len(headers))
+                for header in headers:
+                    logger.debug("header[%s]='%s'", type(header), header)
+
+                    rows = header.find_next(["ul","table"]).findAll("a")
+                    logger.debug("Found %d blocked instance(s) ...", len(rows))
+                    for tag in rows:
+                        logger.debug("tag[]='%s'", type(tag))
+                        text = tag.contents[0] if isinstance(tag.contents[0], str) else tag.contents[0].text
+                        peer = tidyup.domain(text)
+                        logger.debug("peer='%s'", peer)
+
+                        if peer == "":
+                            logger.debug("peer is empty - SKIPPED!")
+                            continue
+                        elif not utils.is_domain_wanted(peer):
+                            logger.debug("peer='%s' is not wanted - SKIPPED!", peer)
+                            continue
+                        elif peer in peers:
+                            logger.debug("peer='%s' already added - SKIPPED!", peer)
+                            continue
+
+                        logger.debug("Appending peer='%s' ...", peer)
+                        peers.append(peer)
 
             logger.debug("peers()=%d", len(peers))
             if len(peers) == 0: