From: Roland Häder Date: Tue, 6 Jun 2023 10:48:08 +0000 (+0200) Subject: Continued: X-Git-Url: https://git.mxchange.org/?a=commitdiff_plain;h=90a9f6f442558a351274ffd5763f2813d6d2c2e5;p=fba.git Continued: - don't use tidyup_domain() on header text - also need to check /about/more - they often "hide" there the block list --- diff --git a/fba/fba.py b/fba/fba.py index 640f9fd..6d93fe0 100644 --- a/fba/fba.py +++ b/fba/fba.py @@ -1242,7 +1242,7 @@ def get_mastodon_blocks(domain: str) -> dict: try: doc = bs4.BeautifulSoup( - get_response(domain, "/about", headers, (config.get("connection_timeout"), config.get("read_timeout"))).text, + get_response(domain, "/about/more", headers, (config.get("connection_timeout"), config.get("read_timeout"))).text, "html.parser", ) except BaseException as e: @@ -1251,8 +1251,9 @@ def get_mastodon_blocks(domain: str) -> dict: return {} for header in doc.find_all("h3"): - header_text = tidyup_domain(header.text) + header_text = tidyup_reason(header.text) + # DEBUG: print(f"DEBUG: header_text='{header_text}'") if header_text in language_mapping: # DEBUG: print(f"DEBUG: header_text='{header_text}'") header_text = language_mapping[header_text]