From 413f462f772e139020981ca020924be4682e86a0 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Roland=20H=C3=A4der?= Date: Fri, 9 Jun 2023 05:45:04 +0200 Subject: [PATCH] Continued: - moved headers,api_headers to fba.network module - fixed more references --- fba/boot.py | 1 + fba/commands.py | 8 ++++---- fba/fba.py | 23 ++++++----------------- fba/federation/lemmy.py | 2 +- fba/federation/mastodon.py | 8 ++++---- fba/federation/peertube.py | 2 +- fba/network.py | 14 +++++++++++++- 7 files changed, 30 insertions(+), 28 deletions(-) diff --git a/fba/boot.py b/fba/boot.py index fcdef23..d0bc456 100644 --- a/fba/boot.py +++ b/fba/boot.py @@ -19,6 +19,7 @@ import os import sys import tempfile import zc.lockfile + from fba import commands from fba import fba diff --git a/fba/commands.py b/fba/commands.py index 1db0c08..98d8ba6 100644 --- a/fba/commands.py +++ b/fba/commands.py @@ -284,7 +284,7 @@ def fetch_cs(args: argparse.Namespace): } try: - raw = fba.fetch_url("https://raw.githubusercontent.com/chaossocial/meta/master/federation.md", fba.headers, (config.get("connection_timeout"), config.get("read_timeout"))).text + raw = fba.fetch_url("https://raw.githubusercontent.com/chaossocial/meta/master/federation.md", network.headers, (config.get("connection_timeout"), config.get("read_timeout"))).text # DEBUG: print(f"DEBUG: raw()={len(raw)}[]={type(raw)}") doc = bs4.BeautifulSoup(markdown.markdown(raw, extensions=extensions), features='html.parser') @@ -331,7 +331,7 @@ def fetch_fba_rss(args: argparse.Namespace): try: print(f"INFO: Fetch FBA-specific RSS args.feed='{args.feed}' ...") - response = fba.fetch_url(args.feed, fba.headers, (config.get("connection_timeout"), config.get("read_timeout"))) + response = fba.fetch_url(args.feed, network.headers, (config.get("connection_timeout"), config.get("read_timeout"))) # DEBUG: print(f"DEBUG: response.ok={response.ok},response.status_code='{response.status_code}',response.text()={len(response.text)}") if response.ok and response.status_code < 300 and len(response.text) > 0: @@ -378,7 +378,7 @@ def fetch_fbabot_atom(args: argparse.Namespace): domains = list() try: print(f"INFO: Fetching ATOM feed='{feed}' from FBA bot account ...") - response = fba.fetch_url(feed, fba.headers, (config.get("connection_timeout"), config.get("read_timeout"))) + response = fba.fetch_url(feed, network.headers, (config.get("connection_timeout"), config.get("read_timeout"))) # DEBUG: print(f"DEBUG: response.ok={response.ok},response.status_code='{response.status_code}',response.text()={len(response.text)}") if response.ok and response.status_code < 300 and len(response.text) > 0: @@ -458,7 +458,7 @@ def fetch_federater(args: argparse.Namespace): boot.acquire_lock() # Fetch this URL - response = fba.fetch_url("https://github.com/federater/blocks_recommended/raw/main/federater.csv", fba.headers, (config.get("connection_timeout"), config.get("read_timeout"))) + response = fba.fetch_url("https://github.com/federater/blocks_recommended/raw/main/federater.csv", network.headers, (config.get("connection_timeout"), config.get("read_timeout"))) # DEBUG: print(f"DEBUG: response[]='{type(response)}'") if response.ok and response.content != "": # DEBUG: print(f"DEBUG: Fetched {len(response.content)} Bytes, parsing CSV ...") diff --git a/fba/fba.py b/fba/fba.py index 7f0c140..95c916c 100644 --- a/fba/fba.py +++ b/fba/fba.py @@ -51,17 +51,6 @@ nodeinfo_identifier = [ "http://nodeinfo.diaspora.software/ns/schema/1.0", ] -# HTTP headers for non-API requests -headers = { - "User-Agent": config.get("useragent"), -} - -# HTTP headers for API requests -api_headers = { - "User-Agent": config.get("useragent"), - "Content-Type": "application/json", -} - # Connect to database connection = sqlite3.connect("blocks.db") cursor = connection.cursor() @@ -371,14 +360,14 @@ def fetch_peers(domain: str, software: str) -> list: # DEBUG: print(f"DEBUG: Fetching peers from '{domain}',software='{software}' ...") peers = list() try: - response = network.fetch_response(domain, "/api/v1/instance/peers", api_headers, (config.get("connection_timeout"), config.get("read_timeout"))) + response = network.fetch_response(domain, "/api/v1/instance/peers", network.api_headers, (config.get("connection_timeout"), config.get("read_timeout"))) data = json_from_response(response) # DEBUG: print(f"DEBUG: response.ok={response.ok},response.status_code={response.status_code},data[]='{type(data)}'") if not response.ok or response.status_code >= 400: # DEBUG: print(f"DEBUG: Was not able to fetch peers, trying alternative ...") - response = network.fetch_response(domain, "/api/v3/site", api_headers, (config.get("connection_timeout"), config.get("read_timeout"))) + response = network.fetch_response(domain, "/api/v3/site", network.api_headers, (config.get("connection_timeout"), config.get("read_timeout"))) data = json_from_response(response) # DEBUG: print(f"DEBUG: response.ok={response.ok},response.status_code={response.status_code},data[]='{type(data)}'") @@ -446,7 +435,7 @@ def fetch_nodeinfo(domain: str, path: str = None) -> list: try: # DEBUG: print(f"DEBUG: Fetching request='{request}' from domain='{domain}' ...") - response = network.fetch_response(domain, request, api_headers, (config.get("nodeinfo_connection_timeout"), config.get("nodeinfo_read_timeout"))) + response = network.fetch_response(domain, request, network.api_headers, (config.get("nodeinfo_connection_timeout"), config.get("nodeinfo_read_timeout"))) data = json_from_response(response) # DEBUG: print(f"DEBUG: response.ok={response.ok},response.status_code={response.status_code},data[]='{type(data)}'") @@ -482,7 +471,7 @@ def fetch_wellknown_nodeinfo(domain: str) -> list: data = {} try: - response = network.fetch_response(domain, "/.well-known/nodeinfo", api_headers, (config.get("nodeinfo_connection_timeout"), config.get("nodeinfo_read_timeout"))) + response = network.fetch_response(domain, "/.well-known/nodeinfo", network.api_headers, (config.get("nodeinfo_connection_timeout"), config.get("nodeinfo_read_timeout"))) data = json_from_response(response) # DEBUG: print("DEBUG: domain,response.ok,data[]:", domain, response.ok, type(data)) @@ -495,7 +484,7 @@ def fetch_wellknown_nodeinfo(domain: str) -> list: # DEBUG: print("DEBUG: rel,href:", link["rel"], link["href"]) if link["rel"] in nodeinfo_identifier: # DEBUG: print("DEBUG: Fetching nodeinfo from:", link["href"]) - response = fetch_url(link["href"], api_headers, (config.get("connection_timeout"), config.get("read_timeout"))) + response = fetch_url(link["href"], network.api_headers, (config.get("connection_timeout"), config.get("read_timeout"))) data = json_from_response(response) # DEBUG: print("DEBUG: href,response.ok,response.status_code:", link["href"], response.ok, response.status_code) @@ -533,7 +522,7 @@ def fetch_generator_from_path(domain: str, path: str = "/") -> str: try: # DEBUG: print(f"DEBUG: Fetching path='{path}' from '{domain}' ...") - response = network.fetch_response(domain, path, headers, (config.get("connection_timeout"), config.get("read_timeout"))) + response = network.fetch_response(domain, path, network.headers, (config.get("connection_timeout"), config.get("read_timeout"))) # DEBUG: print("DEBUG: domain,response.ok,response.status_code,response.text[]:", domain, response.ok, response.status_code, type(response.text)) if response.ok and response.status_code < 300 and len(response.text) > 0: diff --git a/fba/federation/lemmy.py b/fba/federation/lemmy.py index 7e3c513..f578abf 100644 --- a/fba/federation/lemmy.py +++ b/fba/federation/lemmy.py @@ -29,7 +29,7 @@ def fetch_peers(domain: str) -> list: peers = list() try: # DEBUG: print(f"DEBUG: domain='{domain}' is Lemmy, fetching JSON ...") - response = network.fetch_response(domain, "/api/v3/site", fba.api_headers, (config.get("connection_timeout"), config.get("read_timeout"))) + response = network.fetch_response(domain, "/api/v3/site", network.api_headers, (config.get("connection_timeout"), config.get("read_timeout"))) data = fba.json_from_response(response) diff --git a/fba/federation/mastodon.py b/fba/federation/mastodon.py index 3106065..b2213a5 100644 --- a/fba/federation/mastodon.py +++ b/fba/federation/mastodon.py @@ -68,7 +68,7 @@ def fetch_blocks_from_about(domain: str) -> dict: try: doc = bs4.BeautifulSoup( - network.fetch_response(domain, "/about/more", fba.headers, (config.get("connection_timeout"), config.get("read_timeout"))).text, + network.fetch_response(domain, "/about/more", network.headers, (config.get("connection_timeout"), config.get("read_timeout"))).text, "html.parser", ) except BaseException as exception: @@ -135,16 +135,16 @@ def fetch_blocks(domain: str, origin: str, nodeinfo_url: str): # handling CSRF, I've saw at least one server requiring it to access the endpoint # DEBUG: print("DEBUG: Fetching meta:", domain) meta = bs4.BeautifulSoup( - network.fetch_response(domain, "/", fba.headers, (config.get("connection_timeout"), config.get("read_timeout"))).text, + network.fetch_response(domain, "/", network.headers, (config.get("connection_timeout"), config.get("read_timeout"))).text, "html.parser", ) try: csrf = meta.find("meta", attrs={"name": "csrf-token"})["content"] # DEBUG: print("DEBUG: Adding CSRF token:", domain, csrf) - reqheaders = {**fba.api_headers, **{"X-CSRF-Token": csrf}} + reqheaders = {**network.api_headers, **{"X-CSRF-Token": csrf}} except BaseException as exception: # DEBUG: print("DEBUG: No CSRF token found, using normal headers:", domain, exception) - reqheaders = fba.api_headers + reqheaders = network.api_headers # DEBUG: print("DEBUG: Querying API domain_blocks:", domain) blocklist = network.fetch_response(domain, "/api/v1/instance/domain_blocks", reqheaders, (config.get("connection_timeout"), config.get("read_timeout"))).json() diff --git a/fba/federation/peertube.py b/fba/federation/peertube.py index 4c9c782..cc9dd7a 100644 --- a/fba/federation/peertube.py +++ b/fba/federation/peertube.py @@ -33,7 +33,7 @@ def fetch_peers(domain: str) -> list: # DEBUG: print(f"DEBUG: domain='{domain}',mode='{mode}'") while True: try: - response = network.fetch_response(domain, "/api/v1/server/{mode}?start={start}&count=100", headers, (config.get("connection_timeout"), config.get("read_timeout"))) + response = network.fetch_response(domain, "/api/v1/server/{mode}?start={start}&count=100", network.headers, (config.get("connection_timeout"), config.get("read_timeout"))) data = fba.json_from_response(response) # DEBUG: print(f"DEBUG: response.ok={response.ok},response.status_code='{response.status_code}',data[]='{type(data)}'") diff --git a/fba/network.py b/fba/network.py index 8d64144..d932221 100644 --- a/fba/network.py +++ b/fba/network.py @@ -19,8 +19,20 @@ import reqto import requests from fba import config +from fba import fba from fba import instances +# HTTP headers for non-API requests +headers = { + "User-Agent": config.get("useragent"), +} + +# HTTP headers for API requests +api_headers = { + "User-Agent" : config.get("useragent"), + "Content-Type": "application/json", +} + def post_json_api(domain: str, path: str, parameter: str, extra_headers: dict = {}) -> dict: # DEBUG: print(f"DEBUG: domain='{domain}',path='{path}',parameter='{parameter}',extra_headers()={len(extra_headers)} - CALLED!") if not isinstance(domain, str): @@ -44,7 +56,7 @@ def post_json_api(domain: str, path: str, parameter: str, extra_headers: dict = timeout=(config.get("connection_timeout"), config.get("read_timeout")) ) - data = json_from_response(response) + data = fba.json_from_response(response) # DEBUG: print(f"DEBUG: response.ok={response.ok},response.status_code={response.status_code},data[]='{type(data)}'") if not response.ok or response.status_code >= 400: print(f"WARNING: Cannot query JSON API: domain='{domain}',path='{path}',parameter()={len(parameter)},response.status_code='{response.status_code}',data[]='{type(data)}'") -- 2.39.5