From 254375ae28cf3c229b5d57f976806af2fc3bcd5a Mon Sep 17 00:00:00 2001 From: =?utf8?q?Roland=20H=C3=A4der?= Date: Tue, 23 May 2023 20:02:26 +0200 Subject: [PATCH] Continued: - lemmy has an other API URL and also other JSON output for getting more peers - Unfinished: Fetching blocks from Lemmy --- fba.py | 277 +++++++++++++++++++++++++-------------------- fetch_blocks.py | 2 +- fetch_instances.py | 23 ++-- 3 files changed, 164 insertions(+), 138 deletions(-) diff --git a/fba.py b/fba.py index 5de6ba3..fea6af2 100644 --- a/fba.py +++ b/fba.py @@ -122,7 +122,7 @@ def update_nodeinfos(domain: str): print(f"ERROR: failed SQL query: domain='{domain}',sql='{sql}',exception='{e}'") sys.exit(255) - # NOISY-DEBUG: # NOISY-DEBUG: print("DEBUG: Deleting nodeinfos for domain:", domain) + # NOISY-DEBUG: print("DEBUG: Deleting nodeinfos for domain:", domain) for key in nodeinfos: try: # NOISY-DEBUG: print("DEBUG: Deleting key:", key) @@ -133,12 +133,13 @@ def update_nodeinfos(domain: str): # NOISY-DEBUG: print("DEBUG: EXIT!") def update_last_error(domain: str, res: any): - # NOISY-DEBUG: print("DEBUG: domain,res.status_code:", domain, res.status_code, res.reason) + # NOISY-DEBUG: print("DEBUG: domain,res[]:", domain, type(res)) try: - # NOISY-DEBUG: print("DEBUG: res[]:", type(res)) - if isinstance(res, BaseException): + # NOISY-DEBUG: print("DEBUG: BEFORE res[]:", type(res)) + if isinstance(res, BaseException) or isinstance(res, json.JSONDecodeError): res = str(res) + # NOISY-DEBUG: print("DEBUG: AFTER res[]:", type(res)) if type(res) is str: cursor.execute("UPDATE instances SET last_status_code = 999, last_error_details = ?, last_updated = ? WHERE domain = ? LIMIT 1", [ res, @@ -184,23 +185,49 @@ def update_last_nodeinfo(domain: str): connection.commit() # NOISY-DEBUG: print("DEBUG: EXIT!") -def get_peers(domain: str) -> list: - # NOISY-DEBUG: print("DEBUG: Getting peers for domain:", domain) +def get_peers(domain: str, software: str) -> list: + # NOISY-DEBUG: print("DEBUG: Getting peers for domain:", domain, software) peers = None + if software == "lemmy": + # NOISY-DEBUG: print(f"DEBUG: domain='{domain}' is Lemmy. fetching JSON ...") + try: + res = reqto.get(f"https://{domain}/api/v3/site", headers=headers, timeout=(config["connection_timeout"], config["read_timeout"])) + + if res.ok and res.json() is not None: + # NOISY-DEBUG: print("DEBUG: Success, res.json():", len(res.json())) + json = res.json() + + if "federated_instances" in json and "linked" in json["federated_instances"]: + # NOISY-DEBUG: print("DEBUG: Found federated_instances", domain) + peers = json["federated_instances"]["linked"] + json["federated_instances"]["allowed"] + json["federated_instances"]["blocked"] + + except BaseException as e: + print("WARNING: Exception during fetching JSON:", domain, e) + + # NOISY-DEBUG: print("DEBUG: Returning peers[]:", type(peers)) + return peers + try: - res = reqto.get(f"https://{domain}{get_peers_url}", headers=headers, timeout=(config["connection_timeout"], config["read_timeout"])) + res = reqto.get(f"https://{domain}/api/v1/instance/peers", headers=headers, timeout=(config["connection_timeout"], config["read_timeout"])) if not res.ok or res.status_code >= 400: - print("WARNING: Cannot fetch peers:", domain) - update_last_error(domain, res) + res = reqto.get(f"https://{domain}/api/v3/site", headers=headers, timeout=(config["connection_timeout"], config["read_timeout"])) + + if "federated_instances" in json and "linked" in json["federated_instances"]: + # NOISY-DEBUG: print("DEBUG: Found federated_instances", domain) + peers = json["federated_instances"]["linked"] + json["federated_instances"]["allowed"] + json["federated_instances"]["blocked"] + else: + print("WARNING: Could not reach any JSON API:", domain) + update_last_error(domain, res) else: # NOISY-DEBUG: print("DEBUG: Querying API was successful:", domain, len(res.json())) peers = res.json() nodeinfos["get_peers_url"][domain] = get_peers_url - except: - print("WARNING: Some error during get():", domain) + except BaseException as e: + print("WARNING: Some error during get():", domain, e) + update_last_error(domain, e) update_last_nodeinfo(domain) @@ -220,8 +247,8 @@ def post_json_api(domain: str, path: str, data: str) -> list: update_last_nodeinfo(domain) json = res.json() - except: - print("WARNING: Some error during post():", domain, path, data) + except BaseException as e: + print("WARNING: Some error during post():", domain, path, data, e) # NOISY-DEBUG: print("DEBUG: Returning json():", len(json)) return json @@ -252,7 +279,7 @@ def fetch_nodeinfo(domain: str) -> list: res = reqto.get(request, headers=headers, timeout=(config["connection_timeout"], config["read_timeout"])) # NOISY-DEBUG: print("DEBUG: res.ok,res.json[]:", res.ok, type(res.json())) - if res.ok and res.json() is not None: + if res.ok and res.json() is dict: # NOISY-DEBUG: print("DEBUG: Success:", request) json = res.json() nodeinfos["detection_mode"][domain] = "STATIC_CHECK" @@ -281,8 +308,8 @@ def fetch_wellknown_nodeinfo(domain: str) -> list: try: res = reqto.get(f"https://{domain}/.well-known/nodeinfo", headers=headers, timeout=(config["connection_timeout"], config["read_timeout"])) - # NOISY-DEBUG: print("DEBUG: domain,res.ok:", domain, res.ok) - if res.ok and res.json() is not None: + # NOISY-DEBUG: print("DEBUG: domain,res.ok,res.json[]:", domain, res.ok, type(res.json())) + if res.ok and res.json() is dict: nodeinfo = res.json() # NOISY-DEBUG: print("DEBUG: Found entries:", len(nodeinfo), domain) if "links" in nodeinfo: @@ -293,7 +320,7 @@ def fetch_wellknown_nodeinfo(domain: str) -> list: # NOISY-DEBUG: print("DEBUG: Fetching nodeinfo from:", link["href"]) res = reqto.get(link["href"]) # NOISY-DEBUG: print("DEBUG: href,res.ok,res.status_code:", link["href"], res.ok, res.status_code) - if res.ok and res.json() is not None: + if res.ok and res.json() is dict: # NOISY-DEBUG: print("DEBUG: Found JSON nodeinfo():", len(res.json())) json = res.json() nodeinfos["detection_mode"][domain] = "AUTO_DISCOVERY" @@ -387,8 +414,8 @@ def update_block_reason(reason: str, blocker: str, blocked: str, block_level: st if cursor.rowcount == 0: print("WARNING: Did not update any rows:", domain) - except: - print("ERROR: failed SQL query:", reason, blocker, blocked, block_level) + except baseException as e: + print("ERROR: failed SQL query:", reason, blocker, blocked, block_level, e) sys.exit(255) def update_last_seen(blocker: str, blocked: str, block_level: str): @@ -407,8 +434,8 @@ def update_last_seen(blocker: str, blocked: str, block_level: str): if cursor.rowcount == 0: print("WARNING: Did not update any rows:", domain) - except: - print("ERROR: failed SQL query:", last_seen, blocker, blocked, block_level) + except BaseException as e: + print("ERROR: failed SQL query:", last_seen, blocker, blocked, block_level, e) sys.exit(255) # NOISY-DEBUG: print("DEBUG: EXIT!") @@ -436,8 +463,8 @@ def block_instance(blocker: str, blocked: str, reason: str, block_level: str): ), ) - except: - print("ERROR: failed SQL query:", blocker, blocked, reason, block_level, first_added, last_seen) + except BaseException as e: + print("ERROR: failed SQL query:", blocker, blocked, reason, block_level, first_added, last_seen, e) sys.exit(255) # NOISY-DEBUG: print("DEBUG: EXIT!") @@ -469,7 +496,7 @@ def add_instance(domain: str, origin: str, originator: str): ) if domain in nodeinfos["nodeinfo_url"]: - # NOISY-DEBUG print("DEBUG: domain has pending nodeinfo being updated:", domain) + # NOISY-DEBUG # NOISY-DEBUG: print("DEBUG: domain has pending nodeinfo being updated:", domain) update_nodeinfos(domain) if domain in pending_errors: @@ -546,8 +573,9 @@ def get_mastodon_blocks(domain: str) -> dict: reqto.get(f"https://{domain}/about/more", headers=headers, timeout=(config["connection_timeout"], config["read_timeout"])).text, "html.parser", ) - except: - print("ERROR: Cannot fetch from domain:", domain) + except BaseException as e: + print("ERROR: Cannot fetch from domain:", domain, e) + update_last_error(domain, e) return {} for header in doc.find_all("h3"): @@ -583,8 +611,9 @@ def get_friendica_blocks(domain: str) -> dict: reqto.get(f"https://{domain}/friendica", headers=headers, timeout=(config["connection_timeout"], config["read_timeout"])).text, "html.parser", ) - except: - print("WARNING: Failed to fetch /friendica from domain:", domain) + except baseException as e: + print("WARNING: Failed to fetch /friendica from domain:", domain, e) + update_last_error(domain, e) return {} blocklist = doc.find(id="about_blocklist") @@ -612,113 +641,111 @@ def get_misskey_blocks(domain: str) -> dict: "blocked" : [] } - try: - counter = 0 - step = 99 - while True: - # iterating through all "suspended" (follow-only in its terminology) - # instances page-by-page, since that troonware doesn't support - # sending them all at once - try: - if counter == 0: - # NOISY-DEBUG: print("DEBUG: Sending JSON API request to domain,step,counter:", domain, step, counter) - doc = post_json_api(domain, "/api/federation/instances/", json.dumps({ - "sort" : "+caughtAt", - "host" : None, - "suspended": True, - "limit" : step - })) - else: - # NOISY-DEBUG: print("DEBUG: Sending JSON API request to domain,step,counter:", domain, step, counter) - doc = post_json_api(domain, "/api/federation/instances/", json.dumps({ - "sort" : "+caughtAt", - "host" : None, - "suspended": True, - "limit" : step, - "offset" : counter-1 - })) - - # NOISY-DEBUG: print("DEBUG: doc():", len(doc)) - if len(doc) == 0: - # NOISY-DEBUG: print("DEBUG: Returned zero bytes, exiting loop:", domain) - break - - for instance in doc: - # just in case - if instance["isSuspended"]: - blocks["suspended"].append( - { - "domain": tidyup(instance["host"]), - # no reason field, nothing - "reason": "" - } - ) - - if len(doc) < step: - # NOISY-DEBUG: print("DEBUG: End of request:", len(doc), step) - break - - # NOISY-DEBUG: print("DEBUG: Raising counter by step:", step) - counter = counter + step - - except: - print("WARNING: Caught error, exiting loop:", domain) - counter = 0 + counter = 0 + step = 99 + while True: + # iterating through all "suspended" (follow-only in its terminology) + # instances page-by-page, since that troonware doesn't support + # sending them all at once + try: + if counter == 0: + # NOISY-DEBUG: print("DEBUG: Sending JSON API request to domain,step,counter:", domain, step, counter) + doc = post_json_api(domain, "/api/federation/instances/", json.dumps({ + "sort" : "+caughtAt", + "host" : None, + "suspended": True, + "limit" : step + })) + else: + # NOISY-DEBUG: print("DEBUG: Sending JSON API request to domain,step,counter:", domain, step, counter) + doc = post_json_api(domain, "/api/federation/instances/", json.dumps({ + "sort" : "+caughtAt", + "host" : None, + "suspended": True, + "limit" : step, + "offset" : counter-1 + })) + + # NOISY-DEBUG: print("DEBUG: doc():", len(doc)) + if len(doc) == 0: + # NOISY-DEBUG: print("DEBUG: Returned zero bytes, exiting loop:", domain) break - while True: - # same shit, different asshole ("blocked" aka full suspend) - try: - if counter == 0: - # NOISY-DEBUG: print("DEBUG: Sending JSON API request to domain,step,counter:", domain, step, counter) - doc = post_json_api(domain,"/api/federation/instances", json.dumps({ - "sort" : "+caughtAt", - "host" : None, - "blocked": True, - "limit" : step - })) - else: - # NOISY-DEBUG: print("DEBUG: Sending JSON API request to domain,step,counter:", domain, step, counter) - doc = post_json_api(domain,"/api/federation/instances", json.dumps({ - "sort" : "+caughtAt", - "host" : None, - "blocked": True, - "limit" : step, - "offset" : counter-1 - })) - - # NOISY-DEBUG: print("DEBUG: doc():", len(doc)) - if len(doc) == 0: - # NOISY-DEBUG: print("DEBUG: Returned zero bytes, exiting loop:", domain) - break - - for instance in doc: - if instance["isBlocked"]: - blocks["blocked"].append({ + for instance in doc: + # just in case + if instance["isSuspended"]: + blocks["suspended"].append( + { "domain": tidyup(instance["host"]), + # no reason field, nothing "reason": "" - }) + } + ) + + if len(doc) < step: + # NOISY-DEBUG: print("DEBUG: End of request:", len(doc), step) + break + + # NOISY-DEBUG: print("DEBUG: Raising counter by step:", step) + counter = counter + step + + except BaseException as e: + print("WARNING: Caught error, exiting loop:", domain, e) + update_last_error(domain, e) + counter = 0 + break - if len(doc) < step: - # NOISY-DEBUG: print("DEBUG: End of request:", len(doc), step) - break + while True: + # same shit, different asshole ("blocked" aka full suspend) + try: + if counter == 0: + # NOISY-DEBUG: print("DEBUG: Sending JSON API request to domain,step,counter:", domain, step, counter) + doc = post_json_api(domain,"/api/federation/instances", json.dumps({ + "sort" : "+caughtAt", + "host" : None, + "blocked": True, + "limit" : step + })) + else: + # NOISY-DEBUG: print("DEBUG: Sending JSON API request to domain,step,counter:", domain, step, counter) + doc = post_json_api(domain,"/api/federation/instances", json.dumps({ + "sort" : "+caughtAt", + "host" : None, + "blocked": True, + "limit" : step, + "offset" : counter-1 + })) + + # NOISY-DEBUG: print("DEBUG: doc():", len(doc)) + if len(doc) == 0: + # NOISY-DEBUG: print("DEBUG: Returned zero bytes, exiting loop:", domain) + break - # NOISY-DEBUG: print("DEBUG: Raising counter by step:", step) - counter = counter + step + for instance in doc: + if instance["isBlocked"]: + blocks["blocked"].append({ + "domain": tidyup(instance["host"]), + "reason": "" + }) - except: - counter = 0 + if len(doc) < step: + # NOISY-DEBUG: print("DEBUG: End of request:", len(doc), step) break - # NOISY-DEBUG: print("DEBUG: Returning for domain,blocked(),suspended():", domain, len(blocks["blocked"]), len(blocks["suspended"])) - return { - "reject" : blocks["blocked"], - "followers_only": blocks["suspended"] - } + # NOISY-DEBUG: print("DEBUG: Raising counter by step:", step) + counter = counter + step - except: - print("WARNING: API request failed for domain:", domain) - return {} + except BaseException as e: + print("ERROR: Exception during POST:", domain, e) + update_last_error(domain, e) + counter = 0 + break + + # NOISY-DEBUG: print("DEBUG: Returning for domain,blocked(),suspended():", domain, len(blocks["blocked"]), len(blocks["suspended"])) + return { + "reject" : blocks["blocked"], + "followers_only": blocks["suspended"] + } def tidyup(string: str) -> str: # some retards put their blocks in variable case diff --git a/fetch_blocks.py b/fetch_blocks.py index a5aa9ea..4b4aba4 100644 --- a/fetch_blocks.py +++ b/fetch_blocks.py @@ -360,7 +360,7 @@ for blocker, software in fba.cursor.fetchall(): print("INFO: blocker:", blocker) try: # Blocks - federation = reqto.get(f"https://{blocker}{get_peers_url}?filter=suspended", headers=fba.headers, timeout=(fba.config["connection_timeout"], config["read_timeout"])).json() + federation = reqto.get(f"https://{blocker}/api/v1/instance/peers?filter=suspended", headers=fba.headers, timeout=(fba.config["connection_timeout"], config["read_timeout"])).json() if (federation == None): print("WARNING: No valid response:", blocker); diff --git a/fetch_instances.py b/fetch_instances.py index 9aab60c..285e8e3 100644 --- a/fetch_instances.py +++ b/fetch_instances.py @@ -4,8 +4,8 @@ import json import time import fba -def fetch_instances(domain: str, origin: str): - # NOISY-DEBUG: print("DEBUG: domain,origin:", domain, origin) +def fetch_instances(domain: str, origin: str, software: str): + # NOISY-DEBUG: print("DEBUG: domain,origin,software:", domain, origin, software) fba.cursor.execute( "SELECT domain FROM instances WHERE domain = ? LIMIT 1", [domain] ) @@ -14,8 +14,8 @@ def fetch_instances(domain: str, origin: str): # NOISY-DEBUG: print("DEBUG: Adding new domain:", domain, origin) fba.add_instance(domain, origin, sys.argv[0]) - # NOISY-DEBUG: print("DEBUG: Fetching instances for domain:", domain, origin) - peerlist = fba.get_peers(domain) + # NOISY-DEBUG: print("DEBUG: Fetching instances for domain:", domain, software) + peerlist = fba.get_peers(domain, software) if (peerlist is None): print("ERROR: Cannot fetch peers:", domain) @@ -55,21 +55,20 @@ def fetch_instances(domain: str, origin: str): instance = sys.argv[1] # Initial fetch -fetch_instances(instance, None) +fetch_instances(instance, None, None) # Loop through some instances fba.cursor.execute( - "SELECT domain FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'gotosocial', 'bookwyrm', 'takahe') AND (last_nodeinfo IS NULL OR last_nodeinfo < ?) ORDER BY rowid DESC", [time.time() - fba.config["recheck_instance"]] + "SELECT domain,software FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'gotosocial', 'bookwyrm', 'takahe', 'lemmy') AND (last_nodeinfo IS NULL OR last_nodeinfo < ?) ORDER BY rowid DESC", [time.time() - fba.config["recheck_instance"]] ) for row in fba.cursor.fetchall(): - domain = row[0] - # NOISY-DEBUG: print("DEBUG: domain:", domain) - if fba.is_blacklisted(domain): - print("WARNING: domain is blacklisted:", domain) + # NOISY-DEBUG: print("DEBUG: domain:", row[0]) + if fba.is_blacklisted(row[0]): + print("WARNING: domain is blacklisted:", row[0]) continue - print("INFO: Fetching instances for instance:", domain) - fetch_instances(domain, None) + print("INFO: Fetching instances for instance:", row[0]) + fetch_instances(row[0], None, row[1]) fba.connection.close() -- 2.39.5