From 90a9f6f442558a351274ffd5763f2813d6d2c2e5 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Roland=20H=C3=A4der?= Date: Tue, 6 Jun 2023 12:48:08 +0200 Subject: [PATCH] =?utf8?q?Continued:=20-=20don't=20use=20tidyup=5Fdomain()?= =?utf8?q?=20on=20header=20text=20-=20also=20need=20to=20check=20/about/mo?= =?utf8?q?re=C2=A0-=20they=20often=20"hide"=20there=20the=20block=20list?= MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit --- fba/fba.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fba/fba.py b/fba/fba.py index 640f9fd..6d93fe0 100644 --- a/fba/fba.py +++ b/fba/fba.py @@ -1242,7 +1242,7 @@ def get_mastodon_blocks(domain: str) -> dict: try: doc = bs4.BeautifulSoup( - get_response(domain, "/about", headers, (config.get("connection_timeout"), config.get("read_timeout"))).text, + get_response(domain, "/about/more", headers, (config.get("connection_timeout"), config.get("read_timeout"))).text, "html.parser", ) except BaseException as e: @@ -1251,8 +1251,9 @@ def get_mastodon_blocks(domain: str) -> dict: return {} for header in doc.find_all("h3"): - header_text = tidyup_domain(header.text) + header_text = tidyup_reason(header.text) + # DEBUG: print(f"DEBUG: header_text='{header_text}'") if header_text in language_mapping: # DEBUG: print(f"DEBUG: header_text='{header_text}'") header_text = language_mapping[header_text] -- 2.39.5