From 54eb2cd65bb2146a4a85b97c8b8d871dbbdbd462 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Roland=20H=C3=A4der?= Date: Thu, 19 Sep 2024 00:37:32 +0200 Subject: [PATCH] Continued: - introduced network.fetch_json_rows() which fetches a JSON API response and returns a specifyable rows key (optional) to return as rows --- fba/commands.py | 67 ++++++++++--------------------------- fba/http/federation.py | 43 ++++++------------------ fba/http/network.py | 50 ++++++++++++++++++++++++++++ fba/networks/peertube.py | 71 +++++++++++++++++----------------------- 4 files changed, 107 insertions(+), 124 deletions(-) diff --git a/fba/commands.py b/fba/commands.py index 3c20a97..821f9f4 100644 --- a/fba/commands.py +++ b/fba/commands.py @@ -130,22 +130,13 @@ def fetch_pixelfed_api(args: argparse.Namespace) -> int: try: logger.info("Fetching JSON from pixelfed.org API, headers()=%d ...", len(headers)) - fetched = network.get_json_api( + rows = network.fetch_json_rows( source_domain, "/api/v1/servers/all.json?scope=All&country=all&language=all", headers, - (config.get("connection_timeout"), config.get("read_timeout")) + "data" ) - logger.debug("fetched(%d)[]='%s'", len(fetched), type(fetched)) - if "error_message" in fetched: - logger.warning("API returned error_message='%s' - EXIT!", fetched["error_message"]) - return 101 - elif "data" not in fetched["json"]: - logger.warning("API did not return JSON with 'data' element - EXIT!") - return 102 - - rows = fetched["json"]["data"] logger.info("Checking %d fetched rows ...", len(rows)) for row in rows: logger.debug("row[]='%s'", type(row)) @@ -1640,34 +1631,23 @@ def fetch_instances_social(args: argparse.Namespace) -> int: logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain) sources.update(source_domain) - headers = { - "Authorization": f"Bearer {config.get('instances_social_api_key')}", - } - logger.info("Fetching list from source_domain='%s' ...", source_domain) - fetched = network.get_json_api( + rows = network.fetch_json_rows( source_domain, - "/api/1.0/instances/list?count=0&sort_by=name", - headers=headers, - timeout=(config.get("connection_timeout"), config.get("read_timeout")) + "/api/1.0/instances/list?count=0&sort_by=name", + { + "Authorization": f"Bearer {config.get('instances_social_api_key')}", + }, + "instances" ) - logger.debug("fetched(%d)[]='%s'", len(fetched), type(fetched)) - if "error_message" in fetched: - logger.warning("Error during fetching API result: '%s' - EXIT!", fetched["error_message"]) - return 2 - elif "exception" in fetched: - logger.warning("Exception '%s' during fetching API result - EXIT!", type(fetched["exception"])) - return 3 - elif "json" not in fetched: - logger.warning("fetched has no element 'json' - EXIT!") - return 4 - elif "instances" not in fetched["json"]: - logger.warning("fetched[row] has no element 'instances' - EXIT!") - return 5 - - domains = list() - rows = fetched["json"]["instances"] + logger.info("Fetching list from source_domain='%s' ...", source_domain) + rows = network.fetch_json_rows( + domain, + path, + headers, + "instances" + ) logger.info("Checking %d row(s) ...", len(rows)) for row in rows: @@ -1718,23 +1698,10 @@ def fetch_relaylist(args: argparse.Namespace) -> int: sources.update(source_domain) logger.info("Fetching list from source_domain='%s' ...", source_domain) - fetched = network.get_json_api( + rows = network.fetch_json_rows( source_domain, - "/relays", - {}, - (config.get("connection_timeout"), config.get("read_timeout")) + "/relays" ) - logger.debug("fetched(%d)[]='%s'", len(fetched), type(fetched)) - - if "error_message" in fetched: - logger.warning("Error during fetching API result: '%s' - EXIT!", fetched["error_message"]) - return 2 - elif "exception" in fetched: - logger.warning("Exception '%s' during fetching API result - EXIT!", type(fetched["exception"])) - return 3 - elif "json" not in fetched: - logger.warning("fetched has no element 'json' - EXIT!") - return 4 domains = list() diff --git a/fba/http/federation.py b/fba/http/federation.py index 544796e..b0fc359 100644 --- a/fba/http/federation.py +++ b/fba/http/federation.py @@ -261,18 +261,14 @@ def fetch_peers(domain: str, software: str, origin: str) -> list: logger.debug("Checking %d API paths ...", len(_api_paths)) for path in _api_paths: logger.debug("Fetching path='%s' from domain='%s',software='%s' ...", path, domain, software) - data = network.get_json_api( + peers = network.fetch_json_rows( domain, path, - headers=headers, - timeout=(config.get("connection_timeout"), config.get("read_timeout")) + headers ) - logger.debug("data(%d)[]='%s'", len(data), type(data)) - if "error_message" in data: - logger.debug("Was not able to fetch peers from path='%s',domain='%s' ...", path, domain) - instances.set_last_error(domain, data) - elif "json" in data and len(data["json"]) > 0: + logger.debug("peers()=%d", len(peers)) + if len(peers) > 0: logger.debug("Querying API path='%s' was successful: domain='%s',data[json][%s]()=%d", path, domain, type(data['json']), len(data['json'])) peers = data["json"] @@ -634,7 +630,7 @@ def fetch_blocks(domain: str) -> list: blocklist = list() # No CSRF by default, you don't have to add network.api_headers by yourself here - headers = tuple() + headers = dict() try: logger.debug("Checking CSRF for domain='%s'", domain) @@ -649,35 +645,16 @@ def fetch_blocks(domain: str) -> list: try: # json endpoint for newer mastodongs logger.info("Fetching domain_blocks from domain='%s' ...", domain) - data = network.get_json_api( + rows = network.fetch_json_rows( domain, "/api/v1/instance/domain_blocks", - headers=headers, - timeout=(config.get("connection_timeout"), config.get("read_timeout")) + headers ) - rows = list() - - logger.debug("data(%d)[]='%s'", len(data), type(data)) - if "error_message" in data: - logger.debug("Was not able to fetch domain_blocks from domain='%s': status_code=%d,error_message='%s'", domain, data['status_code'], data['error_message']) - instances.set_last_error(domain, data) - logger.debug("blocklist()=%d - EXIT!", len(blocklist)) - return blocklist - elif "json" in data and "error" in data["json"]: - logger.warning("JSON API returned error message: '%s'", data["json"]["error"]) - instances.set_last_error(domain, data) - - logger.debug("blocklist()=%d - EXIT!", len(blocklist)) - return blocklist - else: - # Getting blocklist - rows = data["json"] - - logger.debug("Marking domain='%s' as successfully handled ...", domain) - instances.set_success(domain) + logger.debug("Marking domain='%s' as successfully handled ...", domain) + instances.set_success(domain) - logger.debug("rows(%d)[]='%s'", len(rows), type(rows)) + logger.debug("rows()=%d", len(rows)) if len(rows) > 0: logger.debug("Checking %d entries from domain='%s' ...", len(rows), domain) for block in rows: diff --git a/fba/http/network.py b/fba/http/network.py index b823eaa..c982f61 100644 --- a/fba/http/network.py +++ b/fba/http/network.py @@ -349,3 +349,53 @@ def fetch_url(url: str, headers: dict, timeout: tuple) -> requests.models.Respon logger.debug("response[]='%s' - EXIT!", type(response)) return response + +def fetch_json_rows(hostname: str, path: str, headers: dict = {}, rows_key: str = None): + logger.debug("hostname='%s',path='%s',headers()=%d,rows_key='%s' - CALLED!", hostname, path, len(headers), rows_key) + if not isinstance(hostname, str): + raise ValueError(f"hostname[]='{type(hostname)}' is not of type 'str'") + elif hostname == "": + raise ValueError("Parameter 'hostname' is an empty string") + elif not validators.hostname(hostname): + raise ValueError(f"hostname='{hostname}' is not a valid hostname") + elif not isinstance(path, str): + raise ValueError(f"path[]='{type(path)}' is not of type 'str'") + elif path == "": + raise ValueError("Parameter 'path' is an empty string") + elif not path.startswith("/"): + raise ValueError(f"path='{path}' does not start with a slash") + elif not isinstance(headers, dict): + raise ValueError(f"headers[]='{type(headers)}' is not of type 'dict'") + elif not isinstance(rows_key, str) and rows_key is not None: + raise ValueError(f"rows_key[]='{type(rows_key)}' is not of type 'str'") + elif rows_key is not None and rows_key == "": + raise ValueError("Parameter 'rows_key' is an empty string") + + logger.info("Fetching list from hostname='%s',path='%s' ...", hostname, path) + fetched = get_json_api( + hostname, + path, + headers=headers, + timeout=(config.get("connection_timeout"), config.get("read_timeout")) + ) + logger.debug("fetched(%d)[]='%s'", len(fetched), type(fetched)) + + if "error_message" in fetched: + logger.warning("Error during fetching API result: '%s' - EXIT!", fetched["error_message"]) + return list() + elif "exception" in fetched: + logger.warning("Exception '%s' during fetching API result - EXIT!", type(fetched["exception"])) + return list() + elif "json" not in fetched: + raise KeyError("fetched has no element 'json'") + elif rows_key != "" and rows_key not in fetched["json"]: + raise KeyError("fetched[row] has no element '{rows_key}'") + elif rows_key == None: + logger.debug("Parameter 'rows_key' is not set, using whole fetched['json'] as rows ...") + rows = fetched["json"] + else: + logger.debug("Setting rows to fetched[json][%s]()=%d ...", rows_key, len(fetched["json"][rows_key])) + rows = fetched["json"][rows_key] + + logger.debug("rows()=%d - EXIT!", len(rows)) + return rows diff --git a/fba/networks/peertube.py b/fba/networks/peertube.py index 6eddfec..80437a1 100644 --- a/fba/networks/peertube.py +++ b/fba/networks/peertube.py @@ -55,53 +55,42 @@ def fetch_peers(domain: str) -> list: for mode in ["followers", "following"]: logger.debug("domain='%s',mode='%s'", domain, mode) while True: - data = network.get_json_api( + rows = network.fetch_json_rows( domain, f"/api/v1/server/{mode}?start={start}&count=100", headers, - (config.get("connection_timeout"), config.get("read_timeout")) + "data" ) - logger.debug("data[]='%s'", type(data)) - if "error_message" in data: - logger.warning("domain='%s' causes error during API query: '%s' - SKIPPED!", domain, data["error_message"]) + instances.set_success(domain) + + logger.debug("Found %d record(s).", len(rows)) + for record in rows: + logger.debug("record()=%d", len(record)) + for mode2 in ["follower", "following"]: + logger.debug("mode=%s,mode2='%s'", mode, mode2) + if mode2 not in record: + logger.debug("Array record does not contain element mode2='%s' - SKIPPED!", mode2) + continue + elif "host" not in record[mode2]: + logger.debug("record[%s] does not contain element 'host' - SKIPPED!", mode2) + continue + elif record[mode2]["host"] == domain: + logger.debug("record[%s]='%s' matches domain='%s' - SKIPPED!", mode2, record[mode2]["host"], domain) + continue + elif not domain_helper.is_wanted(record[mode2]["host"]): + logger.debug("record[%s][host]='%s' is not wanted - SKIPPED!", mode2, record[mode2]["host"]) + continue + + logger.debug("Appending mode2='%s',host='%s' ...", mode2, record[mode2]["host"]) + peers.append(record[mode2]["host"]) + + if len(rows) < 100: + logger.debug("Reached end of JSON response, domain='%s'", domain) break - elif "data" not in data["json"]: - logger.warning("domain='%s' has no 'data' element returned - SKIPPED!", domain) - break - else: - logger.debug("Success, data[json]()=%d", len(data["json"])) - instances.set_success(domain) - - rows = data["json"]["data"] - - logger.debug("Found %d record(s).", len(rows)) - for record in rows: - logger.debug("record()=%d", len(record)) - for mode2 in ["follower", "following"]: - logger.debug("mode=%s,mode2='%s'", mode, mode2) - if mode2 not in record: - logger.debug("Array record does not contain element mode2='%s' - SKIPPED!", mode2) - continue - elif "host" not in record[mode2]: - logger.debug("record[%s] does not contain element 'host' - SKIPPED!", mode2) - continue - elif record[mode2]["host"] == domain: - logger.debug("record[%s]='%s' matches domain='%s' - SKIPPED!", mode2, record[mode2]["host"], domain) - continue - elif not domain_helper.is_wanted(record[mode2]["host"]): - logger.debug("record[%s][host]='%s' is not wanted - SKIPPED!", mode2, record[mode2]["host"]) - continue - - logger.debug("Appending mode2='%s',host='%s' ...", mode2, record[mode2]["host"]) - peers.append(record[mode2]["host"]) - - if len(rows) < 100: - logger.debug("Reached end of JSON response, domain='%s'", domain) - break - - # Continue with next row - start = start + 100 + + # Continue with next row + start = start + 100 logger.debug("peers[%s]()=%d - EXIT!", type(peers), len(peers)) return peers -- 2.39.5