]> git.mxchange.org Git - fba.git/commitdiff
Continued:
authorRoland Häder <roland@mxchange.org>
Mon, 26 May 2025 02:07:02 +0000 (04:07 +0200)
committerRoland Häder <roland@mxchange.org>
Mon, 26 May 2025 02:07:02 +0000 (04:07 +0200)
- skip non-parseable documents (prevents possible `nil` error)
- name `features` directly
- removed superfluous commata

fba/commands.py
fba/http/federation.py
fba/networks/friendica.py
fba/networks/lemmy.py
fba/networks/mastodon.py
fba/networks/pleroma.py

index 1581ddb07d08e2161650e491df780eea8aac078e..32935528d84b2ce99ccec5e72ef1fb725c78f947 100644 (file)
@@ -657,7 +657,7 @@ def fetch_todon_wiki(args: argparse.Namespace) -> int:
     ).text
     logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
 
-    doc = bs4.BeautifulSoup(raw, "html.parser")
+    doc = bs4.BeautifulSoup(raw, features="html.parser")
     logger.debug("doc[]='%s'", type(doc))
 
     silenced = doc.find("h3", {"id": "limited_servers"}).find_next("ul").findAll("li")
@@ -951,7 +951,7 @@ def fetch_fbabot_atom(args: argparse.Namespace) -> int:
         logger.debug("atom[]='%s'", type(atom))
         for entry in atom.entries:
             logger.debug("entry[]='%s'", type(entry))
-            doc = bs4.BeautifulSoup(entry.content.value, "html.parser")
+            doc = bs4.BeautifulSoup(entry.content.value, features="html.parser")
 
             logger.debug("doc[]='%s'", type(doc))
             elements = doc.findAll("a")
@@ -1210,7 +1210,7 @@ def fetch_fedipact(args: argparse.Namespace) -> int:
     if response.ok and response.status_code == 200 and response.text != "":
         logger.debug("Parsing %d Bytes ...", len(response.text))
 
-        doc = bs4.BeautifulSoup(response.text, "html.parser")
+        doc = bs4.BeautifulSoup(response.text, features="html.parser")
         logger.debug("doc[]='%s'", type(doc))
 
         rows = doc.findAll("li")
index 6be9822991adf3966e1f984018867b057d9c4b34..c9cd446174cf3a24086daf3c8e9cb0b8aaf4a8a7 100644 (file)
@@ -316,7 +316,7 @@ def fetch_generator_from_path(domain: str, path: str = "/") -> str:
 
     if ((response.ok and response.status_code == 200) or response.status_code == 410) and response.text.find("<html") >= 0 and validators.url(response_url) and domain_helper.is_in_url(domain, response_url):
         logger.debug("Parsing response.text()=%d Bytes ...", len(response.text))
-        doc = bs4.BeautifulSoup(response.text, "html.parser")
+        doc = bs4.BeautifulSoup(response.text, features="html.parser")
 
         logger.debug("doc[]='%s'", type(doc))
         platform  = doc.find("meta", {"property": "og:platform"})
index e5c94cc3e8038653cdea02d1a74bba662b42c74f..f80d1d82f1496c471f160cf2d3a5cb6304cebdbc 100644 (file)
@@ -50,7 +50,7 @@ def fetch_blocks(domain: str) -> list:
         ).text
         logger.debug("Parsing %d Bytes ...", len(raw))
 
-        doc = bs4.BeautifulSoup(raw, "html.parser",)
+        doc = bs4.BeautifulSoup(raw, features="html.parser")
         logger.debug("doc[]='%s'", type(doc))
 
         block_tag = doc.find(id="about_blocklist")
index da30014295edf97564217b5212367d5ce8593217..4ba2392dd9b4f43dff2be5232996e9a9f881510d 100644 (file)
@@ -150,7 +150,7 @@ def fetch_blocks(domain: str) -> list:
         logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
         if response.ok and response.status_code == 200 and response.text != "":
             logger.debug("Parsing %s Bytes ...", len(response.text))
-            doc = bs4.BeautifulSoup(response.text, "html.parser")
+            doc = bs4.BeautifulSoup(response.text, features="html.parser")
             logger.debug("doc[]='%s'", type(doc))
 
             found = None
@@ -259,7 +259,7 @@ def fetch_instances(domain: str, origin: str) -> list:
         if response.ok and response.status_code == 200 and response.text != "":
             logger.debug("Parsing %s Bytes ...", len(response.text))
 
-            doc = bs4.BeautifulSoup(response.text, "html.parser")
+            doc = bs4.BeautifulSoup(response.text, features="html.parser")
             logger.debug("doc[]='%s'", type(doc))
 
             for criteria in [{"class": "home-instances container-lg"}, {"class": "container"}]:
index c4654c8c98a1cbb813e05157e4baf52d24e5a1ba..0500d957da7f9def2126e5db1c9af03c32f7df93 100644 (file)
@@ -88,10 +88,13 @@ def fetch_blocks_from_about(domain: str) -> dict:
                     domain,
                     path
                 ).text,
-                "html.parser",
+                "html.parser"
             )
-
-            if len(doc.find_all("h3")) > 0:
+            logger.debug("doc[]='%s'", type(doc))
+            if doc is None:
+                logger.warning("domain='%s',path='%s' has returned no parseable document! - BREAK!", domain, path)
+                break
+            elif len(doc.find_all("h3")) > 0:
                 logger.debug("path='%s' had some headlines - BREAK!", path)
                 break
 
@@ -100,6 +103,11 @@ def fetch_blocks_from_about(domain: str) -> dict:
             instances.set_last_error(domain, exception)
             break
 
+    logger.debug("doc[]='%s'", type(doc))
+    if doc is None:
+        logger.warning("Cannot fetch any /about pages for domain='%s' - EXIT!", domain)
+        return []
+
     blocklist = {
         "suspended servers": [],
         "filtered media"   : [],
@@ -107,11 +115,6 @@ def fetch_blocks_from_about(domain: str) -> dict:
         "silenced servers" : [],
     }
 
-    logger.debug("doc[]='%s'", type(doc))
-    if doc is None:
-        logger.warning("Cannot fetch any /about pages for domain='%s' - EXIT!", domain)
-        return []
-
     headers = doc.find_all("h3")
 
     logger.info("Checking %d h3 headers ...", len(headers))
index 7cfbd9100012224fe9e79d2c2ea67e8682f0eb45..b336d929ebbb00e206e2c45d03b8835e615e8365 100644 (file)
@@ -319,7 +319,7 @@ def fetch_blocks_from_about(domain: str) -> dict:
             logger.debug("Parsing response.text()=%d Bytes ...", len(response.text))
             doc = bs4.BeautifulSoup(
                 response.text,
-                "html.parser",
+                "html.parser"
             )
 
             logger.debug("doc[]='%s'", type(doc))