From b1067e2ed998e0651d1fe1ccf5fb361b0aaceb3d Mon Sep 17 00:00:00 2001 From: =?utf8?q?Roland=20H=C3=A4der?= Date: Sun, 27 Aug 2023 16:27:54 +0200 Subject: [PATCH] Continued: - let's try it stricter: 200 OK is the only HTTP status code we accept - log failing fetch of /instances from a Lemmy instance --- fba/commands.py | 8 ++++---- fba/csrf.py | 2 +- fba/http/federation.py | 2 +- fba/http/network.py | 2 +- fba/networks/lemmy.py | 10 ++++++++-- 5 files changed, 15 insertions(+), 9 deletions(-) diff --git a/fba/commands.py b/fba/commands.py index e1ffd37..9775189 100644 --- a/fba/commands.py +++ b/fba/commands.py @@ -778,7 +778,7 @@ def fetch_fba_rss(args: argparse.Namespace) -> int: response = utils.fetch_url(args.feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))) logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text)) - if response.ok and response.status_code < 300 and len(response.text) > 0: + if response.ok and response.status_code == 200 and len(response.text) > 0: logger.debug("Parsing RSS feed (%d Bytes) ...", len(response.text)) rss = atoma.parse_rss_bytes(response.content) @@ -856,7 +856,7 @@ def fetch_fbabot_atom(args: argparse.Namespace) -> int: response = utils.fetch_url(feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))) logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text)) - if response.ok and response.status_code < 300 and len(response.text) > 0: + if response.ok and response.status_code == 200 and len(response.text) > 0: logger.debug("Parsing ATOM feed (%d Bytes) ...", len(response.text)) atom = atoma.parse_atom_bytes(response.content) @@ -1146,7 +1146,7 @@ def fetch_txt(args: argparse.Namespace) -> int: response = utils.fetch_url(row["url"], network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))) logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text)) - if response.ok and response.status_code < 300 and response.text != "": + if response.ok and response.status_code == 200 and response.text != "": logger.debug("Returned %d Bytes for processing", len(response.text.strip())) domains = response.text.split("\n") @@ -1199,7 +1199,7 @@ def fetch_fedipact(args: argparse.Namespace) -> int: ) logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text)) - if response.ok and response.status_code < 300 and response.text != "": + if response.ok and response.status_code == 200 and response.text != "": logger.debug("Parsing %d Bytes ...", len(response.text)) doc = bs4.BeautifulSoup(response.text, "html.parser") diff --git a/fba/csrf.py b/fba/csrf.py index f529af3..eadbc0e 100644 --- a/fba/csrf.py +++ b/fba/csrf.py @@ -50,7 +50,7 @@ def determine(domain: str, headers: dict) -> dict: ) logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text)) - if response.ok and response.status_code < 300 and response.text.strip() != "" and response.text.find(" 0 and domain_helper.is_in_url(domain, response.url): + if response.ok and response.status_code == 200 and response.text.strip() != "" and response.text.find(" 0 and domain_helper.is_in_url(domain, response.url): # Save cookies logger.debug("Parsing response.text()=%d Bytes ...", len(response.text)) cookies.store(domain, response.cookies.get_dict()) diff --git a/fba/http/federation.py b/fba/http/federation.py index 8440f40..7a6e7e7 100644 --- a/fba/http/federation.py +++ b/fba/http/federation.py @@ -268,7 +268,7 @@ def fetch_generator_from_path(domain: str, path: str = "/") -> str: ) logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text)) - if ((response.ok and response.status_code < 300) or response.status_code == 410) and response.text.find(" 0 and domain_helper.is_in_url(domain, response.url): + if ((response.ok and response.status_code == 200) or response.status_code == 410) and response.text.find(" 0 and domain_helper.is_in_url(domain, response.url): logger.debug("Parsing response.text()=%d Bytes ...", len(response.text)) doc = bs4.BeautifulSoup(response.text, "html.parser") diff --git a/fba/http/network.py b/fba/http/network.py index b16cd15..6834de0 100644 --- a/fba/http/network.py +++ b/fba/http/network.py @@ -235,7 +235,7 @@ def send_bot_post(domain: str, blocklist: list): ) logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text)) - return response.ok and response.status_code < 300 and response.text.strip() != "" + return response.ok and response.status_code == 200 and response.text.strip() != "" def fetch_response(domain: str, path: str, headers: dict, timeout: tuple, allow_redirects: bool = False) -> requests.models.Response: logger.debug("domain='%s',path='%s',headers()=%d,timeout='%s',allow_redirects='%s' - CALLED!", domain, path, len(headers), timeout, allow_redirects) diff --git a/fba/networks/lemmy.py b/fba/networks/lemmy.py index 6a3ed10..31f3019 100644 --- a/fba/networks/lemmy.py +++ b/fba/networks/lemmy.py @@ -139,7 +139,7 @@ def fetch_blocks(domain: str, nodeinfo_url: str) -> list: ) logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text)) - if response.ok and response.status_code < 300 and response.text != "": + if response.ok and response.status_code == 200 and response.text != "": logger.debug("Parsing %s Bytes ...", len(response.text)) doc = bs4.BeautifulSoup(response.text, "html.parser") @@ -217,6 +217,9 @@ def fetch_blocks(domain: str, nodeinfo_url: str) -> list: "reason" : None, "block_level": "reject", }) + else: + logger.warning("Cannot fetch /instances due to error: response.ok='%s',response.status_code=%d,response.details='%s'", response.ok, response.status_code, response.reason) + instances.set_last_error(domain, response) except network.exceptions as exception: logger.warning("domain='%s',exception[%s]:'%s'", domain, type(exception), str(exception)) @@ -242,7 +245,7 @@ def fetch_instances(domain: str, origin: str) -> list: ) logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text)) - if response.ok and response.status_code < 300 and response.text != "": + if response.ok and response.status_code == 200 and response.text != "": logger.debug("Parsing %s Bytes ...", len(response.text)) doc = bs4.BeautifulSoup(response.text, "html.parser") @@ -278,6 +281,9 @@ def fetch_instances(domain: str, origin: str) -> list: if len(peers) == 0: logger.debug("Found no peers for domain='%s', trying script tag ...", domain) peers = parse_script(doc) + else: + logger.warning("Cannot fetch /instances due to error: response.ok='%s',response.status_code=%d,response.details='%s'", response.ok, response.status_code, response.reason) + instances.set_last_error(domain, response) logger.debug("Marking domain='%s' as successfully handled, peers()=%d ...", domain, len(peers)) instances.set_success(domain) -- 2.39.5