From bc499c7a65d742c362e01184e2f679d9ded7039e Mon Sep 17 00:00:00 2001 From: =?utf8?q?Roland=20H=C3=A4der?= Date: Sun, 28 May 2023 15:19:41 +0200 Subject: [PATCH] Continued: - 100 rows should work! (the fail-safe check "fetched versus expected" will kick in here) - also read origin and pass over 'origin' during fetching instances --- config.defaults.json | 2 +- fba.py | 99 ++++++++++++++++++++++++-------------------- fetch_instances.py | 6 +-- 3 files changed, 59 insertions(+), 48 deletions(-) diff --git a/config.defaults.json b/config.defaults.json index c5787e6..ca48eef 100644 --- a/config.defaults.json +++ b/config.defaults.json @@ -12,5 +12,5 @@ "slogan" : "### Your footer slogan ###", "recheck_instance" : 3600, "recheck_block" : 3600, - "misskey_offset" : 10 + "misskey_offset" : 100 } diff --git a/fba.py b/fba.py index 5ea42a0..f922580 100644 --- a/fba.py +++ b/fba.py @@ -354,10 +354,14 @@ def get_peers(domain: str, software: str) -> list: if software == "misskey": # DEBUG: print(f"DEBUG: domain='{domain}' is misskey, sending API POST request ...") - counter = 0 + offset = 0 step = config["misskey_offset"] + # iterating through all "suspended" (follow-only in its terminology) + # instances page-by-page, since that troonware doesn't support + # sending them all at once while True: - if counter == 0: + # DEBUG: print(f"DEBUG: Fetching offset='{offset}' from '{domain}' ...") + if offset == 0: fetched = post_json_api(domain, "/api/federation/instances", json.dumps({ "sort" : "+pubAt", "host" : None, @@ -368,23 +372,30 @@ def get_peers(domain: str, software: str) -> list: "sort" : "+pubAt", "host" : None, "limit" : step, - "offset": counter - 1 + "offset": offset - 1 }), {"Origin": domain}) # DEBUG: print("DEBUG: fetched():", len(fetched)) if len(fetched) == 0: # DEBUG: print("DEBUG: Returned zero bytes, exiting loop:", domain) break - - # DEBUG: print("DEBUG: Raising counter by step:", step) - counter = counter + step + elif len(fetched) != config["misskey_offset"]: + print(f"WARNING: Fetched '{len(fetched)}' row(s) but expected: '{config['misskey_offset']}'") + offset = offset + (config["misskey_offset"] - len(fetched)) + else: + # DEBUG: print("DEBUG: Raising offset by step:", step) + offset = offset + step # Check records for row in fetched: # DEBUG: print(f"DEBUG: row():{len(row)}") - if "host" in row: + if "host" in row and is_blacklisted(row["host"]): + print(f"WARNING: row[host]='{row['host']}' is blacklisted. domain='{domain}'") + elif "host" in row: # DEBUG: print(f"DEBUG: Adding peer: '{row['host']}'") peers.append(row["host"]) + else: + print(f"WARNING: row()={len(row)} does not contain element 'host': {row},domain='{domain}'") # DEBUG: print("DEBUG: Returning peers[]:", type(peers)) return peers @@ -498,7 +509,7 @@ def post_json_api(domain: str, path: str, parameter: str, extra_headers: dict = data = res.json() except BaseException as e: - print("WARNING: Some error during post():", domain, path, parameter, e) + print(f"WARNING: Some error during post(): domain='{domain},path='{path}',parameter()={len(parameter)},exception:'{e}'") # DEBUG: print("DEBUG: Returning data():", len(data)) return data @@ -507,7 +518,7 @@ def fetch_nodeinfo(domain: str) -> list: # DEBUG: print("DEBUG: Fetching nodeinfo from domain:", domain) nodeinfo = fetch_wellknown_nodeinfo(domain) - # DEBUG: print("DEBUG:nodeinfo:", len(nodeinfo)) + # DEBUG: print("DEBUG: nodeinfo:", len(nodeinfo)) if len(nodeinfo) > 0: # DEBUG: print("DEBUG: Returning auto-discovered nodeinfo:", len(nodeinfo)) @@ -953,6 +964,7 @@ def get_friendica_blocks(domain: str) -> dict: return {} for line in blocklist.find("table").find_all("tr")[1:]: + # DEBUG: print(f"DEBUG: line='{line}'") blocks.append({ "domain": tidyup(line.find_all("td")[0].text), "reason": tidyup(line.find_all("td")[1].text) @@ -970,37 +982,44 @@ def get_misskey_blocks(domain: str) -> dict: "blocked" : [] } - counter = 0 + offset = 0 step = config["misskey_offset"] while True: # iterating through all "suspended" (follow-only in its terminology) # instances page-by-page, since that troonware doesn't support # sending them all at once try: - if counter == 0: - # DEBUG: print("DEBUG: Sending JSON API request to domain,step,counter:", domain, step, counter) - doc = post_json_api(domain, "/api/federation/instances", json.dumps({ + print(f"DEBUG: Fetching offset='{offset}' from '{domain}' ...") + if offset == 0: + # DEBUG: print("DEBUG: Sending JSON API request to domain,step,offset:", domain, step, offset) + fetched = post_json_api(domain, "/api/federation/instances", json.dumps({ "sort" : "+pubAt", "host" : None, "suspended": True, "limit" : step }), {"Origin": domain}) else: - # DEBUG: print("DEBUG: Sending JSON API request to domain,step,counter:", domain, step, counter) - doc = post_json_api(domain, "/api/federation/instances", json.dumps({ + # DEBUG: print("DEBUG: Sending JSON API request to domain,step,offset:", domain, step, offset) + fetched = post_json_api(domain, "/api/federation/instances", json.dumps({ "sort" : "+pubAt", "host" : None, "suspended": True, "limit" : step, - "offset" : counter - 1 + "offset" : offset - 1 }), {"Origin": domain}) - # DEBUG: print("DEBUG: doc():", len(doc)) - if len(doc) == 0: + print("DEBUG: fetched():", len(fetched)) + if len(fetched) == 0: # DEBUG: print("DEBUG: Returned zero bytes, exiting loop:", domain) break + elif len(fetched) != config["misskey_offset"]: + print(f"WARNING: Fetched '{len(fetched)}' row(s) but expected: '{config['misskey_offset']}'") + offset = offset + (config["misskey_offset"] - len(fetched)) + else: + # DEBUG: print("DEBUG: Raising offset by step:", step) + offset = offset + step - for instance in doc: + for instance in fetched: # just in case if instance["isSuspended"]: blocks["suspended"].append( @@ -1011,63 +1030,55 @@ def get_misskey_blocks(domain: str) -> dict: } ) - if len(doc) < step: - # DEBUG: print("DEBUG: End of request:", len(doc), step) - break - - # DEBUG: print("DEBUG: Raising counter by step:", step) - counter = counter + step - except BaseException as e: print("WARNING: Caught error, exiting loop:", domain, e) update_last_error(domain, e) - counter = 0 + offset = 0 break while True: # same shit, different asshole ("blocked" aka full suspend) try: - if counter == 0: - # DEBUG: print("DEBUG: Sending JSON API request to domain,step,counter:", domain, step, counter) - doc = post_json_api(domain,"/api/federation/instances", json.dumps({ + if offset == 0: + # DEBUG: print("DEBUG: Sending JSON API request to domain,step,offset:", domain, step, offset) + fetched = post_json_api(domain,"/api/federation/instances", json.dumps({ "sort" : "+pubAt", "host" : None, "blocked": True, "limit" : step }), {"Origin": domain}) else: - # DEBUG: print("DEBUG: Sending JSON API request to domain,step,counter:", domain, step, counter) - doc = post_json_api(domain,"/api/federation/instances", json.dumps({ + # DEBUG: print("DEBUG: Sending JSON API request to domain,step,offset:", domain, step, offset) + fetched = post_json_api(domain,"/api/federation/instances", json.dumps({ "sort" : "+pubAt", "host" : None, "blocked": True, "limit" : step, - "offset" : counter-1 + "offset" : offset-1 }), {"Origin": domain}) - # DEBUG: print("DEBUG: doc():", len(doc)) - if len(doc) == 0: + print("DEBUG: fetched():", len(fetched)) + if len(fetched) == 0: # DEBUG: print("DEBUG: Returned zero bytes, exiting loop:", domain) break + elif len(fetched) != config["misskey_offset"]: + print(f"WARNING: Fetched '{len(fetched)}' row(s) but expected: '{config['misskey_offset']}'") + offset = offset + (config["misskey_offset"] - len(fetched)) + else: + # DEBUG: print("DEBUG: Raising offset by step:", step) + offset = offset + step - for instance in doc: + for instance in fetched: if instance["isBlocked"]: blocks["blocked"].append({ "domain": tidyup(instance["host"]), "reason": None }) - if len(doc) < step: - # DEBUG: print("DEBUG: End of request:", len(doc), step) - break - - # DEBUG: print("DEBUG: Raising counter by step:", step) - counter = counter + step - except BaseException as e: print("ERROR: Exception during POST:", domain, e) update_last_error(domain, e) - counter = 0 + offset = 0 break # DEBUG: print("DEBUG: Returning for domain,blocked(),suspended():", domain, len(blocks["blocked"]), len(blocks["suspended"])) diff --git a/fetch_instances.py b/fetch_instances.py index a53ef22..fbd113a 100644 --- a/fetch_instances.py +++ b/fetch_instances.py @@ -53,7 +53,7 @@ fetch_instances(instance, None, None) # Loop through some instances fba.cursor.execute( - "SELECT domain,software FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'gotosocial', 'bookwyrm', 'takahe', 'lemmy') AND (last_nodeinfo IS NULL OR last_nodeinfo < ?) ORDER BY rowid DESC", [time.time() - fba.config["recheck_instance"]] + "SELECT domain,origin,software FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'gotosocial', 'bookwyrm', 'takahe', 'lemmy') AND (last_nodeinfo IS NULL OR last_nodeinfo < ?) ORDER BY rowid DESC", [time.time() - fba.config["recheck_instance"]] ) rows = fba.cursor.fetchall() @@ -64,7 +64,7 @@ for row in rows: print("WARNING: domain is blacklisted:", row[0]) continue - print("INFO: Fetching instances for instance:", row[0]) - fetch_instances(row[0], None, row[1]) + print(f"INFO: Fetching instances for instance '{row[0]}'('{row[2]}') of origin '{row[1]}'") + fetch_instances(row[0], row[1], row[2]) fba.connection.close() -- 2.39.5