From: Roland Häder Date: Mon, 26 May 2025 02:07:02 +0000 (+0200) Subject: Continued: X-Git-Url: https://git.mxchange.org/?a=commitdiff_plain;h=0b8682c9e0daa296f5437ea6e02ac262b30b45d2;p=fba.git Continued: - skip non-parseable documents (prevents possible `nil` error) - name `features` directly - removed superfluous commata --- diff --git a/fba/commands.py b/fba/commands.py index 1581ddb..3293552 100644 --- a/fba/commands.py +++ b/fba/commands.py @@ -657,7 +657,7 @@ def fetch_todon_wiki(args: argparse.Namespace) -> int: ).text logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw)) - doc = bs4.BeautifulSoup(raw, "html.parser") + doc = bs4.BeautifulSoup(raw, features="html.parser") logger.debug("doc[]='%s'", type(doc)) silenced = doc.find("h3", {"id": "limited_servers"}).find_next("ul").findAll("li") @@ -951,7 +951,7 @@ def fetch_fbabot_atom(args: argparse.Namespace) -> int: logger.debug("atom[]='%s'", type(atom)) for entry in atom.entries: logger.debug("entry[]='%s'", type(entry)) - doc = bs4.BeautifulSoup(entry.content.value, "html.parser") + doc = bs4.BeautifulSoup(entry.content.value, features="html.parser") logger.debug("doc[]='%s'", type(doc)) elements = doc.findAll("a") @@ -1210,7 +1210,7 @@ def fetch_fedipact(args: argparse.Namespace) -> int: if response.ok and response.status_code == 200 and response.text != "": logger.debug("Parsing %d Bytes ...", len(response.text)) - doc = bs4.BeautifulSoup(response.text, "html.parser") + doc = bs4.BeautifulSoup(response.text, features="html.parser") logger.debug("doc[]='%s'", type(doc)) rows = doc.findAll("li") diff --git a/fba/http/federation.py b/fba/http/federation.py index 6be9822..c9cd446 100644 --- a/fba/http/federation.py +++ b/fba/http/federation.py @@ -316,7 +316,7 @@ def fetch_generator_from_path(domain: str, path: str = "/") -> str: if ((response.ok and response.status_code == 200) or response.status_code == 410) and response.text.find("= 0 and validators.url(response_url) and domain_helper.is_in_url(domain, response_url): logger.debug("Parsing response.text()=%d Bytes ...", len(response.text)) - doc = bs4.BeautifulSoup(response.text, "html.parser") + doc = bs4.BeautifulSoup(response.text, features="html.parser") logger.debug("doc[]='%s'", type(doc)) platform = doc.find("meta", {"property": "og:platform"}) diff --git a/fba/networks/friendica.py b/fba/networks/friendica.py index e5c94cc..f80d1d8 100644 --- a/fba/networks/friendica.py +++ b/fba/networks/friendica.py @@ -50,7 +50,7 @@ def fetch_blocks(domain: str) -> list: ).text logger.debug("Parsing %d Bytes ...", len(raw)) - doc = bs4.BeautifulSoup(raw, "html.parser",) + doc = bs4.BeautifulSoup(raw, features="html.parser") logger.debug("doc[]='%s'", type(doc)) block_tag = doc.find(id="about_blocklist") diff --git a/fba/networks/lemmy.py b/fba/networks/lemmy.py index da30014..4ba2392 100644 --- a/fba/networks/lemmy.py +++ b/fba/networks/lemmy.py @@ -150,7 +150,7 @@ def fetch_blocks(domain: str) -> list: logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text)) if response.ok and response.status_code == 200 and response.text != "": logger.debug("Parsing %s Bytes ...", len(response.text)) - doc = bs4.BeautifulSoup(response.text, "html.parser") + doc = bs4.BeautifulSoup(response.text, features="html.parser") logger.debug("doc[]='%s'", type(doc)) found = None @@ -259,7 +259,7 @@ def fetch_instances(domain: str, origin: str) -> list: if response.ok and response.status_code == 200 and response.text != "": logger.debug("Parsing %s Bytes ...", len(response.text)) - doc = bs4.BeautifulSoup(response.text, "html.parser") + doc = bs4.BeautifulSoup(response.text, features="html.parser") logger.debug("doc[]='%s'", type(doc)) for criteria in [{"class": "home-instances container-lg"}, {"class": "container"}]: diff --git a/fba/networks/mastodon.py b/fba/networks/mastodon.py index c4654c8..0500d95 100644 --- a/fba/networks/mastodon.py +++ b/fba/networks/mastodon.py @@ -88,10 +88,13 @@ def fetch_blocks_from_about(domain: str) -> dict: domain, path ).text, - "html.parser", + "html.parser" ) - - if len(doc.find_all("h3")) > 0: + logger.debug("doc[]='%s'", type(doc)) + if doc is None: + logger.warning("domain='%s',path='%s' has returned no parseable document! - BREAK!", domain, path) + break + elif len(doc.find_all("h3")) > 0: logger.debug("path='%s' had some headlines - BREAK!", path) break @@ -100,6 +103,11 @@ def fetch_blocks_from_about(domain: str) -> dict: instances.set_last_error(domain, exception) break + logger.debug("doc[]='%s'", type(doc)) + if doc is None: + logger.warning("Cannot fetch any /about pages for domain='%s' - EXIT!", domain) + return [] + blocklist = { "suspended servers": [], "filtered media" : [], @@ -107,11 +115,6 @@ def fetch_blocks_from_about(domain: str) -> dict: "silenced servers" : [], } - logger.debug("doc[]='%s'", type(doc)) - if doc is None: - logger.warning("Cannot fetch any /about pages for domain='%s' - EXIT!", domain) - return [] - headers = doc.find_all("h3") logger.info("Checking %d h3 headers ...", len(headers)) diff --git a/fba/networks/pleroma.py b/fba/networks/pleroma.py index 7cfbd91..b336d92 100644 --- a/fba/networks/pleroma.py +++ b/fba/networks/pleroma.py @@ -319,7 +319,7 @@ def fetch_blocks_from_about(domain: str) -> dict: logger.debug("Parsing response.text()=%d Bytes ...", len(response.text)) doc = bs4.BeautifulSoup( response.text, - "html.parser", + "html.parser" ) logger.debug("doc[]='%s'", type(doc))