From e50bba4516fa08b89991b5703accf3b499097bb2 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Roland=20H=C3=A4der?= Date: Sun, 28 May 2023 14:06:53 +0200 Subject: [PATCH] Continued: - encapsulted into function add_peers() - need to add "Content-Type: application/json" for API requests, thanks to Kromonos - introduced 'api_headers' for JSON API requests --- fba.py | 108 +++++++++++++++++++++++++++--------------------- fetch_blocks.py | 6 +-- 2 files changed, 65 insertions(+), 49 deletions(-) diff --git a/fba.py b/fba.py index 82bf029..5ea42a0 100644 --- a/fba.py +++ b/fba.py @@ -43,9 +43,14 @@ nodeinfo_identifier = [ "http://nodeinfo.diaspora.software/ns/schema/1.0", ] -# HTTP headers for requests +# HTTP headers for non-API requests headers = { - "user-agent": config["useragent"], + "User-Agent": config["useragent"], +} +# HTTP headers for API requests +api_headers = { + "User-Agent": config["useragent"], + "Content-Type": "application/json", } # Found info from node, such as nodeinfo URL, detection mode that needs to be @@ -105,6 +110,18 @@ patterns = [ re.compile("^[a-f0-9]{7}$"), ] +def add_peers(rows: dict) -> dict: + # DEBUG: print(f"DEBUG: rows()={len(rows)} - CALLED!") + peers = {} + for element in ["linked", "allowed", "blocked"]: + # DEBUG: print(f"DEBUG: Checking element='{element}'") + if element in rows and rows[element] != None: + # DEBUG: print(f"DEBUG: Adding {len(rows[element])} peer(s) to peers list ...") + peers = {**peers, **rows[element]} + + # DEBUG: print(f"DEBUG: peers()={len(peers)} - CALLED!") + return peers + def remove_version(software: str) -> str: # DEBUG: print(f"DEBUG: software='{software}' - CALLED!") if not "." in software and " " not in software: @@ -342,23 +359,26 @@ def get_peers(domain: str, software: str) -> list: while True: if counter == 0: fetched = post_json_api(domain, "/api/federation/instances", json.dumps({ - "sort" : "+caughtAt", + "sort" : "+pubAt", "host" : None, "limit": step - })) + }), {"Origin": domain}) else: fetched = post_json_api(domain, "/api/federation/instances", json.dumps({ - "sort" : "+caughtAt", + "sort" : "+pubAt", "host" : None, "limit" : step, "offset": counter - 1 - })) + }), {"Origin": domain}) # DEBUG: print("DEBUG: fetched():", len(fetched)) if len(fetched) == 0: # DEBUG: print("DEBUG: Returned zero bytes, exiting loop:", domain) break + # DEBUG: print("DEBUG: Raising counter by step:", step) + counter = counter + step + # Check records for row in fetched: # DEBUG: print(f"DEBUG: row():{len(row)}") @@ -371,19 +391,21 @@ def get_peers(domain: str, software: str) -> list: elif software == "lemmy": # DEBUG: print(f"DEBUG: domain='{domain}' is Lemmy, fetching JSON ...") try: - res = reqto.get(f"https://{domain}/api/v3/site", headers=headers, timeout=(config["connection_timeout"], config["read_timeout"])) + res = reqto.get(f"https://{domain}/api/v3/site", headers=api_headers, timeout=(config["connection_timeout"], config["read_timeout"])) # DEBUG: print(f"DEBUG: res.ok={res.ok},res.status_code='{res.status_code}'") - if res.ok and isinstance(res.json(), dict): - # DEBUG: print("DEBUG: Success, res.json():", len(res.json())) - data = res.json() - - if "federated_instances" in data and "linked" in data["federated_instances"]: - # DEBUG: print("DEBUG: Found federated_instances", domain) - peers = data["federated_instances"]["linked"] + data["federated_instances"]["allowed"] + data["federated_instances"]["blocked"] + if not res.ok or res.status_code >= 400: + print("WARNING: Could not reach any JSON API:", domain) + update_last_error(domain, res) + elif "federated_instances" in res.json(): + # DEBUG: print("DEBUG: Found federated_instances", domain) + peers = peers + add_peers(res.json()["federated_instances"]) + else: + print("WARNING: JSON response does not contain 'federated_instances':", domain) + update_last_error(domain, res) except BaseException as e: - print("WARNING: Exception during fetching JSON:", domain, e) + print(f"WARNING: Exception during fetching JSON: domain='{domain}',exception:'{e}'") update_last_nodeinfo(domain) @@ -397,7 +419,7 @@ def get_peers(domain: str, software: str) -> list: # DEBUG: print(f"DEBUG: domain='{domain}',mode='{mode}'") while True: try: - res = reqto.get(f"https://{domain}/api/v1/server/{mode}?start={start}&count=100", headers=headers, timeout=(config["connection_timeout"], config["read_timeout"])) + res = reqto.get(f"https://{domain}/api/v1/server/{mode}?start={start}&count=100", headers=api_headers, timeout=(config["connection_timeout"], config["read_timeout"])) # DEBUG: print(f"DEBUG: res.ok={res.ok},res.status_code='{res.status_code}'") if res.ok and isinstance(res.json(), dict): @@ -422,7 +444,7 @@ def get_peers(domain: str, software: str) -> list: start = start + 100 except BaseException as e: - print("WARNING: Exception during fetching JSON:", domain, e) + print(f"WARNING: Exception during fetching JSON: domain='{domain}',exception:'{e}'") update_last_nodeinfo(domain) @@ -431,25 +453,19 @@ def get_peers(domain: str, software: str) -> list: # DEBUG: print(f"DEBUG: Fetching get_peers_url='{get_peers_url}' from '{domain}' ...") try: - res = reqto.get(f"https://{domain}{get_peers_url}", headers=headers, timeout=(config["connection_timeout"], config["read_timeout"])) + res = reqto.get(f"https://{domain}{get_peers_url}", headers=api_headers, timeout=(config["connection_timeout"], config["read_timeout"])) # DEBUG: print(f"DEBUG: res.ok={res.ok},res.status_code={res.status_code}") if not res.ok or res.status_code >= 400: # DEBUG: print(f"DEBUG: Was not able to fetch '{get_peers_url}', trying alternative ...") - res = reqto.get(f"https://{domain}/api/v3/site", headers=headers, timeout=(config["connection_timeout"], config["read_timeout"])) + res = reqto.get(f"https://{domain}/api/v3/site", headers=api_headers, timeout=(config["connection_timeout"], config["read_timeout"])) if not res.ok or res.status_code >= 400: print("WARNING: Could not reach any JSON API:", domain) update_last_error(domain, res) - elif "federated_instances" in res.json() and "linked" in res.json()["federated_instances"]: + elif "federated_instances" in res.json(): # DEBUG: print("DEBUG: Found federated_instances", domain) - data = res.json() - - for element in ["linked", "allowed", "blocked"]: - # DEBUG: print(f"DEBUG: Checking element='{element}'") - if element in data["federated_instances"] and data["federated_instances"][element] != None: - print(f"DEBUG Adding {len(data['federated_instances'][element])} peer(s) to peers list ...") - peers = peers + data["federated_instances"][element] + peers = peers + add_peers(res.json()["federated_instances"]) else: print("WARNING: JSON response does not contain 'federated_instances':", domain) update_last_error(domain, res) @@ -467,11 +483,11 @@ def get_peers(domain: str, software: str) -> list: # DEBUG: print("DEBUG: Returning peers[]:", type(peers)) return peers -def post_json_api(domain: str, path: str, parameter: str) -> list: - # DEBUG: print("DEBUG: Sending POST to domain,path,parameter:", domain, path, parameter) +def post_json_api(domain: str, path: str, parameter: str, extra_headers: dict = {}) -> list: + # DEBUG: print("DEBUG: Sending POST to domain,path,parameter:", domain, path, parameter, extra_headers) data = {} try: - res = reqto.post(f"https://{domain}{path}", data=parameter, headers=headers, timeout=(config["connection_timeout"], config["read_timeout"])) + res = reqto.post(f"https://{domain}{path}", data=parameter, headers={**api_headers, **extra_headers}, timeout=(config["connection_timeout"], config["read_timeout"])) # DEBUG: print(f"DEBUG: res.ok={res.ok},res.status_code={res.status_code}") if not res.ok or res.status_code >= 400: @@ -510,7 +526,7 @@ def fetch_nodeinfo(domain: str) -> list: for request in requests: try: # DEBUG: print("DEBUG: Fetching request:", request) - res = reqto.get(request, headers=headers, timeout=(config["connection_timeout"], config["read_timeout"])) + res = reqto.get(request, headers=api_headers, timeout=(config["connection_timeout"], config["read_timeout"])) # DEBUG: print(f"DEBUG: res.ok={res.ok},res.status_code={res.status_code}") if res.ok and isinstance(res.json(), dict): @@ -537,7 +553,7 @@ def fetch_wellknown_nodeinfo(domain: str) -> list: data = {} try: - res = reqto.get(f"https://{domain}/.well-known/nodeinfo", headers=headers, timeout=(config["connection_timeout"], config["read_timeout"])) + res = reqto.get(f"https://{domain}/.well-known/nodeinfo", headers=api_headers, timeout=(config["connection_timeout"], config["read_timeout"])) # DEBUG: print("DEBUG: domain,res.ok,res.json[]:", domain, res.ok, type(res.json())) if res.ok and isinstance(res.json(), dict): nodeinfo = res.json() @@ -815,9 +831,9 @@ def add_instance(domain: str, origin: str, originator: str): ) for key in nodeinfos: - p# DEBUG: print(f"DEBUG: key='{key}',domain='{domain}',nodeinfos[key]={nodeinfos[key]}") + # DEBUG: print(f"DEBUG: key='{key}',domain='{domain}',nodeinfos[key]={nodeinfos[key]}") if domain in nodeinfos[key]: - p# DEBUG: print(f"DEBUG: domain='{domain}' has pending nodeinfo being updated ...") + # DEBUG: print(f"DEBUG: domain='{domain}' has pending nodeinfo being updated ...") update_nodeinfos(domain) remove_pending_error(domain) break @@ -856,7 +872,7 @@ def send_bot_post(instance: str, blocks: dict): if truncated: message = message + "(the list has been truncated to the first 20 entries)" - botheaders = {**headers, **{"Authorization": "Bearer " + config["bot_token"]}} + botheaders = {**api_headers, **{"Authorization": "Bearer " + config["bot_token"]}} req = reqto.post( f"{config['bot_instance']}/api/v1/statuses", @@ -963,21 +979,21 @@ def get_misskey_blocks(domain: str) -> dict: try: if counter == 0: # DEBUG: print("DEBUG: Sending JSON API request to domain,step,counter:", domain, step, counter) - doc = post_json_api(domain, "/api/federation/instances/", json.dumps({ - "sort" : "+caughtAt", + doc = post_json_api(domain, "/api/federation/instances", json.dumps({ + "sort" : "+pubAt", "host" : None, "suspended": True, "limit" : step - })) + }), {"Origin": domain}) else: # DEBUG: print("DEBUG: Sending JSON API request to domain,step,counter:", domain, step, counter) - doc = post_json_api(domain, "/api/federation/instances/", json.dumps({ - "sort" : "+caughtAt", + doc = post_json_api(domain, "/api/federation/instances", json.dumps({ + "sort" : "+pubAt", "host" : None, "suspended": True, "limit" : step, - "offset" : counter-1 - })) + "offset" : counter - 1 + }), {"Origin": domain}) # DEBUG: print("DEBUG: doc():", len(doc)) if len(doc) == 0: @@ -1014,20 +1030,20 @@ def get_misskey_blocks(domain: str) -> dict: if counter == 0: # DEBUG: print("DEBUG: Sending JSON API request to domain,step,counter:", domain, step, counter) doc = post_json_api(domain,"/api/federation/instances", json.dumps({ - "sort" : "+caughtAt", + "sort" : "+pubAt", "host" : None, "blocked": True, "limit" : step - })) + }), {"Origin": domain}) else: # DEBUG: print("DEBUG: Sending JSON API request to domain,step,counter:", domain, step, counter) doc = post_json_api(domain,"/api/federation/instances", json.dumps({ - "sort" : "+caughtAt", + "sort" : "+pubAt", "host" : None, "blocked": True, "limit" : step, "offset" : counter-1 - })) + }), {"Origin": domain}) # DEBUG: print("DEBUG: doc():", len(doc)) if len(doc) == 0: diff --git a/fetch_blocks.py b/fetch_blocks.py index ca6dc1d..5cda828 100644 --- a/fetch_blocks.py +++ b/fetch_blocks.py @@ -176,10 +176,10 @@ for blocker, software in rows: try: csrf = meta.find("meta", attrs={"name": "csrf-token"})["content"] # NOISY-DEBUG: print("DEBUG: Adding CSRF token:", blocker, csrf) - reqheaders = {**fba.headers, **{"x-csrf-token": csrf}} + reqheaders = {**fba.api_headers, **{"X-CSRF-Token": csrf}} except: # NOISY-DEBUG: print("DEBUG: No CSRF token found, using normal headers:", blocker) - reqheaders = fba.headers + reqheaders = fba.api_headers # NOISY-DEBUG: print("DEBUG: Quering API domain_blocks:", blocker) blocks = reqto.get(f"https://{blocker}/api/v1/instance/domain_blocks", headers=reqheaders, timeout=(fba.config["connection_timeout"], config["read_timeout"])).json() @@ -355,7 +355,7 @@ for blocker, software in rows: print("INFO: blocker:", blocker) try: # Blocks - federation = reqto.get(f"https://{blocker}{get_peers_url}?filter=suspended", headers=fba.headers, timeout=(fba.config["connection_timeout"], config["read_timeout"])).json() + federation = reqto.get(f"https://{blocker}{get_peers_url}?filter=suspended", headers=fba.api_headers, timeout=(fba.config["connection_timeout"], config["read_timeout"])).json() if (federation == None): print("WARNING: No valid response:", blocker); -- 2.39.5