From 99e074c6332a63c26de35e347f13c75b3651d8d8 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Roland=20H=C3=A4der?= Date: Fri, 22 Dec 2023 08:44:52 +0100 Subject: [PATCH] Continued: - need to cut off everything after hash symbol because that is for JavaScript click-event loaded content anyway - prevented a few empty/None strings for invoking tidyup.domain() - improved a few log messages --- fba/http/csrf.py | 4 ++-- fba/http/federation.py | 4 ++-- fba/networks/friendica.py | 7 ++++--- fba/networks/lemmy.py | 4 ++-- fba/networks/mastodon.py | 2 +- fba/networks/misskey.py | 4 ++-- fba/networks/peertube.py | 2 +- 7 files changed, 14 insertions(+), 13 deletions(-) diff --git a/fba/http/csrf.py b/fba/http/csrf.py index b84d5aa..a66fa02 100644 --- a/fba/http/csrf.py +++ b/fba/http/csrf.py @@ -54,7 +54,7 @@ def determine(domain: str, headers: dict) -> dict: ) logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text)) - if response.ok and response.status_code == 200 and response.text.strip() != "" and response.text.find(" 0 and domain_helper.is_in_url(domain, response.url): + if response.ok and response.status_code == 200 and response.text.strip() != "" and response.text.find(" 0 and domain_helper.is_in_url(domain, response.url.split("#")[0]): # Save cookies logger.debug("Parsing response.text()=%d Bytes ...", len(response.text)) cookies.store(domain, response.cookies.get_dict()) @@ -71,7 +71,7 @@ def determine(domain: str, headers: dict) -> dict: if tag is not None: logger.debug("Adding CSRF token='%s' for domain='%s'", tag["content"], domain) reqheaders["X-CSRF-Token"] = tag["content"] - elif not domain_helper.is_in_url(domain, response.url): + elif not domain_helper.is_in_url(domain, response.url.split("#")[0]): logger.warning("domain='%s' doesn't match with response.url='%s', maybe redirect to other domain?", domain, response.url) message = f"Redirect from domain='{domain}' to response.url='{response.url}'" diff --git a/fba/http/federation.py b/fba/http/federation.py index 355036e..2c35fc6 100644 --- a/fba/http/federation.py +++ b/fba/http/federation.py @@ -295,7 +295,7 @@ def fetch_generator_from_path(domain: str, path: str = "/") -> str: ) logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text)) - if ((response.ok and response.status_code == 200) or response.status_code == 410) and response.text.find(" 0 and domain_helper.is_in_url(domain, response.url): + if ((response.ok and response.status_code == 200) or response.status_code == 410) and response.text.find(" 0 and domain_helper.is_in_url(domain, response.url.split("#")[0]): logger.debug("Parsing response.text()=%d Bytes ...", len(response.text)) doc = bs4.BeautifulSoup(response.text, "html.parser") @@ -338,7 +338,7 @@ def fetch_generator_from_path(domain: str, path: str = "/") -> str: if software is not None and software != "": logger.debug("domain='%s' has og:site_name='%s' - Setting detection_mode=SITE_NAME ...", domain, software) instances.set_detection_mode(domain, "SITE_NAME") - elif not domain_helper.is_in_url(domain, response.url): + elif not domain_helper.is_in_url(domain, response.url.split("#")[0]): logger.warning("domain='%s' doesn't match response.url='%s', maybe redirect to other domain?", domain, response.url) components = urlparse(response.url) diff --git a/fba/networks/friendica.py b/fba/networks/friendica.py index f723c71..05efa9a 100644 --- a/fba/networks/friendica.py +++ b/fba/networks/friendica.py @@ -44,7 +44,7 @@ def fetch_blocks(domain: str) -> list: block_tag = None try: - logger.debug("Fetching friendica blocks from domain='%s'", domain) + logger.debug("Fetching friendica blocks from domain='%s' ...", domain) raw = network.fetch_response( domain, "/friendica", @@ -85,10 +85,11 @@ def fetch_blocks(domain: str) -> list: for line in rows: logger.debug("line='%s'", line) blocked = line.find_all("td")[0].text - logger.debug("blocked='%s'", blocked) + reason = line.find_all("td")[1].text + logger.debug("blocked='%s',reason='%s' - BEFORE!", blocked, reason) blocked = tidyup.domain(blocked) if blocked != "" else None - reason = tidyup.reason(line.find_all("td")[1].text) + reason = tidyup.reason(reason) if reason != "" else None logger.debug("blocked='%s',reason='%s' - AFTER!", blocked, reason) if blocked in [None, ""]: diff --git a/fba/networks/lemmy.py b/fba/networks/lemmy.py index 7d058d5..766e4dd 100644 --- a/fba/networks/lemmy.py +++ b/fba/networks/lemmy.py @@ -83,7 +83,7 @@ def fetch_peers(domain: str, origin: str) -> list: headers = tuple() try: - logger.debug("Checking CSRF for domain='%s'", domain) + logger.debug("Checking CSRF for domain='%s' ...", domain) headers = csrf.determine(domain, dict()) except network.exceptions as exception: logger.warning("Exception '%s' during checking CSRF (fetch_peers,%s)", type(exception), __name__) @@ -384,7 +384,7 @@ def parse_script(doc: bs4.BeautifulSoup, only: str = None) -> list: continue logger.debug("row[domain]='%s' - BEFORE!", row["domain"]) - peer = tidyup.domain(row["domain"]) + peer = tidyup.domain(row["domain"]) if row["domain"] != "" else None logger.debug("peer='%s' - AFTER!", peer) if peer in [None, ""]: diff --git a/fba/networks/mastodon.py b/fba/networks/mastodon.py index 127c33a..e7d00b6 100644 --- a/fba/networks/mastodon.py +++ b/fba/networks/mastodon.py @@ -189,7 +189,7 @@ def fetch_blocks(domain: str) -> list: reason = tidyup.reason(block["comment"]) if "comment" in block and block["comment"] is not None and block["comment"] != "" else None - logger.debug("Appending blocker='%s',blocked='%s',reason='%s',block_level='%s'", domain, block["domain"], reason, block["severity"]) + logger.debug("Appending blocker='%s',blocked='%s',reason='%s',block_level='%s' ...", domain, block["domain"], reason, block["severity"]) blocklist.append({ "blocker" : domain, "blocked" : block["domain"], diff --git a/fba/networks/misskey.py b/fba/networks/misskey.py index 6100504..03b038f 100644 --- a/fba/networks/misskey.py +++ b/fba/networks/misskey.py @@ -297,8 +297,8 @@ def fetch_blocks(domain: str) -> list: for instance in rows: # Is it there? logger.debug("instance[]='%s'", type(instance)) - blocked = tidyup.domain(instance["host"]) - logger.debug("blocked='%s'", blocked) + blocked = tidyup.domain(instance["host"]) if instance["host"] != "" else None + logger.debug("blocked='%s' - AFTER!", blocked) if blocked in [None, ""]: logger.warning("instance[host]='%s' is None or empty after tidyup.domain() - SKIPPED!", instance["host"]) diff --git a/fba/networks/peertube.py b/fba/networks/peertube.py index caf3865..733a5b6 100644 --- a/fba/networks/peertube.py +++ b/fba/networks/peertube.py @@ -43,7 +43,7 @@ def fetch_peers(domain: str) -> list: start = 0 try: - logger.debug("Checking CSRF for domain='%s'", domain) + logger.debug("Checking CSRF for domain='%s' ...", domain) headers = csrf.determine(domain, dict()) except network.exceptions as exception: logger.warning("Exception '%s' during checking CSRF (fetch_peers,%s)", type(exception), __name__) -- 2.39.5