From 028c4afcc22a6232051305bdbcc7c01612e55a26 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Roland=20H=C3=A4der?= Date: Tue, 14 Jan 2025 01:17:39 +0100 Subject: [PATCH] Continued: - marked api_headers as private - invoked network.fetch_url() instead of reqto.get() as the first one wraps it nicely --- fba/commands.py | 34 +++++++++++++++++++++++----------- fba/http/network.py | 10 +++++----- fba/networks/pleroma.py | 2 +- 3 files changed, 29 insertions(+), 17 deletions(-) diff --git a/fba/commands.py b/fba/commands.py index 23dc506..3118a63 100644 --- a/fba/commands.py +++ b/fba/commands.py @@ -27,7 +27,6 @@ import argparse import atoma import bs4 import markdown -import reqto import validators from fba import database @@ -512,7 +511,7 @@ def fetch_observer(args: argparse.Namespace) -> int: logger.info("Fetching software list ...") raw = network.fetch_url( f"https://{source_domain}", - network.web_headers, + headers=network.web_headers, timeout=config.timeout ).text logger.debug("raw[%s]()=%d", type(raw), len(raw)) @@ -649,7 +648,7 @@ def fetch_todon_wiki(args: argparse.Namespace) -> int: logger.debug("Fetching domainblocks from source_domain='%s'", source_domain) raw = network.fetch_url( f"https://{source_domain}/todon/domainblocks", - network.web_headers, + headers=network.web_headers, timeout=config.timeout ).text logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw)) @@ -762,7 +761,7 @@ def fetch_cs(args: argparse.Namespace): logger.info("Fetching federation.md from source_domain='%s' ...", source_domain) raw = network.fetch_url( f"https://{source_domain}/federation", - network.web_headers, + headers=network.web_headers, timeout=config.timeout ).text logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw)) @@ -847,7 +846,11 @@ def fetch_fba_rss(args: argparse.Namespace) -> int: sources.update(domain) logger.info("Fetch FBA-specific RSS args.feed='%s' ...", args.feed) - response = network.fetch_url(args.feed, network.web_headers, config.timeout) + response = network.fetch_url( + args.feed, + headers=network.web_headers, + timeout=config.timeout + ) logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text)) if response.ok and response.status_code == 200 and len(response.text) > 0: @@ -926,7 +929,11 @@ def fetch_fbabot_atom(args: argparse.Namespace) -> int: domains = [] logger.info("Fetching ATOM feed='%s' from FBA bot account ...", feed) - response = network.fetch_url(feed, network.web_headers, config.timeout) + response = network.fetch_url( + feed, + headers=network.web_headers, + timeout=config.timeout + ) logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text)) if response.ok and response.status_code == 200 and len(response.text) > 0: @@ -937,6 +944,7 @@ def fetch_fbabot_atom(args: argparse.Namespace) -> int: for entry in atom.entries: logger.debug("entry[]='%s'", type(entry)) doc = bs4.BeautifulSoup(entry.content.value, "html.parser") + logger.debug("doc[]='%s'", type(doc)) elements = doc.findAll("a") @@ -1154,7 +1162,11 @@ def fetch_txt(args: argparse.Namespace) -> int: logger.info("Checking %d text file(s) ...", len(blocklists.txt_files)) for row in blocklists.txt_files: logger.debug("Fetching row[url]='%s' ...", row["url"]) - response = network.fetch_url(row["url"], network.web_headers, config.timeout) + response = network.fetch_url( + row["url"], + headers=network.web_headers, + timeout=config.timeout + ) logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text)) if response.ok and response.status_code == 200 and response.text != "": @@ -1201,7 +1213,7 @@ def fetch_fedipact(args: argparse.Namespace) -> int: logger.info("Fetching / from source_domain='%s' ...", source_domain) response = network.fetch_url( f"https://{source_domain}", - network.web_headers, + headers=network.web_headers, timeout=config.timeout ) @@ -1260,7 +1272,7 @@ def fetch_joinmobilizon(args: argparse.Namespace) -> int: logger.info("Fetching instances from source_domain='%s' ...", source_domain) raw = network.fetch_url( f"https://{source_domain}/api/v1/instances", - network.web_headers, + headers=network.web_headers, timeout=config.timeout ).text logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw)) @@ -1311,7 +1323,7 @@ def fetch_joinmisskey(args: argparse.Namespace) -> int: logger.info("Fetching instances.json from source_domain='%s' ...", source_domain) raw = network.fetch_url( f"https://{source_domain}/instances.json", - network.web_headers, + headers=network.web_headers, timeout=config.timeout ).text logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw)) @@ -1518,7 +1530,7 @@ def fetch_fedilist(args: argparse.Namespace) -> int: url = f"http://{source_domain}/instance/csv?software={args.software}&onion=not" logger.info("Fetching url='%s' ...", url) - response = reqto.get( + response = network.fetch_url( url, headers=network.web_headers, timeout=config.timeout, diff --git a/fba/http/network.py b/fba/http/network.py index 829e0f5..4b247c9 100644 --- a/fba/http/network.py +++ b/fba/http/network.py @@ -39,7 +39,7 @@ web_headers = { } # HTTP headers for API requests -api_headers = { +_api_headers = { "User-Agent" : config.get("useragent"), "Content-Type": "application/json", } @@ -90,7 +90,7 @@ def post_json_api(domain: str, path: str, data: str = "", headers: dict = None) response = reqto.post( f"https://{domain}{path}", data=data, - headers={**api_headers, **headers}, + headers={**_api_headers, **headers}, timeout=config.timeout, cookies=cookies.get_all(domain), allow_redirects=False @@ -144,7 +144,7 @@ def fetch_api_url(url: str, timeout: tuple) -> dict: try: logger.debug("Fetching url='%s' ...", url) - response = fetch_url(url, api_headers, timeout) + response = fetch_url(url, _api_headers, timeout) logger.debug("response.ok='%s',response.status_code=%d,response.reason='%s'", response.ok, response.status_code, response.reason) if response.ok and response.status_code == 200: @@ -190,7 +190,7 @@ def get_json_api(domain: str, path: str, headers: dict, timeout: tuple) -> dict: try: logger.debug("Sending GET to domain='%s',path='%s',timeout(%d)='%s'", domain, path, len(timeout), timeout) - response = _fetch_response(domain, path, {**api_headers, **headers}, timeout) + response = _fetch_response(domain, path, {**_api_headers, **headers}, timeout) except exceptions as exception: logger.debug("Fetching path='%s' from domain='%s' failed. exception[%s]='%s'", path, domain, type(exception), str(exception)) json_reply["status_code"] = 999 @@ -270,7 +270,7 @@ def send_bot_post(domain: str, blocklist: list) -> None: "visibility" : config.get("bot_visibility"), "content_type": "text/plain" }, - headers={**api_headers, **{"Authorization": "Bearer " + config.get("bot_token")}}, + headers={**_api_headers, **{"Authorization": "Bearer " + config.get("bot_token")}}, timeout=config.timeout, allow_redirects=False ) diff --git a/fba/networks/pleroma.py b/fba/networks/pleroma.py index 5ca1a93..1d98332 100644 --- a/fba/networks/pleroma.py +++ b/fba/networks/pleroma.py @@ -375,7 +375,7 @@ def fetch_blocks_from_about(domain: str) -> dict: logger.debug("line[]='%s'", type(line)) tds = line.find_all("td") - logger.debug("tds[%s]()=%d", type(tds) len(tds)) + logger.debug("tds[%s]()=%d", type(tds), len(tds)) blocked = tds[0].text reason = tds[1].text -- 2.39.5