From 00eeda8dc81338255b4a95c53b2f35e492e19349 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Roland=20H=C3=A4der?= Date: Tue, 6 Jun 2023 20:43:41 +0200 Subject: [PATCH] WIP: - moved network-specific function to their own fba/network/.py module - renamed blocker -> domain in network-specific functions (it is 'domain' in the end) --- fba/__init__.py | 3 +- fba/commands.py | 317 ++-------------------------------------- fba/fba.py | 251 ++----------------------------- fba/network/__init__.py | 7 + fba/network/lemmy.py | 64 ++++++++ fba/network/mastodon.py | 259 ++++++++++++++++++++++++++++++++ fba/network/misskey.py | 107 ++++++++++++++ fba/network/peertube.py | 68 +++++++++ fba/network/pleroma.py | 203 +++++++++++++++++++++++++ 9 files changed, 735 insertions(+), 544 deletions(-) create mode 100644 fba/network/__init__.py create mode 100644 fba/network/lemmy.py create mode 100644 fba/network/mastodon.py create mode 100644 fba/network/misskey.py create mode 100644 fba/network/peertube.py create mode 100644 fba/network/pleroma.py diff --git a/fba/__init__.py b/fba/__init__.py index 70dc665..0a14a1a 100644 --- a/fba/__init__.py +++ b/fba/__init__.py @@ -4,5 +4,6 @@ __all__ = [ 'commands', 'config', 'fba', - 'instances' + 'instances', + 'network', ] diff --git a/fba/commands.py b/fba/commands.py index 74871ec..ce413ce 100644 --- a/fba/commands.py +++ b/fba/commands.py @@ -29,6 +29,7 @@ import validators from fba import boot from fba import config from fba import fba +from fba.network import * def check_instance(args: argparse.Namespace) -> int: # DEBUG: print(f"DEBUG: args.domain='{args.domain}' - CALLED!") @@ -125,7 +126,7 @@ def fetch_blocks(args: argparse.Namespace): print(f"INFO: Checking {len(rows)} entries ...") for blocker, software, origin, nodeinfo_url in rows: # DEBUG: print("DEBUG: BEFORE blocker,software,origin,nodeinfo_url:", blocker, software, origin, nodeinfo_url) - blockdict = [] + blockdict = list() blocker = fba.tidyup_domain(blocker) # DEBUG: print("DEBUG: AFTER blocker,software:", blocker, software) @@ -140,314 +141,18 @@ def fetch_blocks(args: argparse.Namespace): fba.update_last_blocked(blocker) if software == "pleroma": - print("INFO: blocker:", blocker) - try: - # Blocks - json = fba.fetch_nodeinfo(blocker, nodeinfo_url) - if json is None: - print("WARNING: Could not fetch nodeinfo from blocker:", blocker) - continue - elif not "metadata" in json: - print(f"WARNING: json()={len(json)} does not have key 'metadata', blocker='{blocker}'") - continue - elif not "federation" in json["metadata"]: - print(f"WARNING: json()={len(json['metadata'])} does not have key 'federation', blocker='{blocker}'") - continue - - # DEBUG: print("DEBUG: Updating nodeinfo:", blocker) - fba.update_last_nodeinfo(blocker) - - federation = json["metadata"]["federation"] - - if "enabled" in federation: - # DEBUG: print("DEBUG: Instance has no block list to analyze:", blocker) - continue - - if "mrf_simple" in federation: - for block_level, blocks in ( - {**federation["mrf_simple"], - **{"quarantined_instances": federation["quarantined_instances"]}} - ).items(): - # DEBUG: print("DEBUG: block_level, blocks():", block_level, len(blocks)) - block_level = fba.tidyup_domain(block_level) - # DEBUG: print("DEBUG: BEFORE block_level:", block_level) - - if block_level == "": - print("WARNING: block_level is now empty!") - continue - - # DEBUG: print(f"DEBUG: Checking {len(blocks)} entries from blocker='{blocker}',software='{software}',block_level='{block_level}' ...") - for blocked in blocks: - # DEBUG: print("DEBUG: BEFORE blocked:", blocked) - blocked = fba.tidyup_domain(blocked) - # DEBUG: print("DEBUG: AFTER blocked:", blocked) - - if blocked == "": - print("WARNING: blocked is empty after fba.tidyup_domain():", blocker, block_level) - continue - elif fba.is_blacklisted(blocked): - # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - skipping!") - continue - elif blocked.count("*") > 1: - # -ACK!-oma also started obscuring domains without hash - fba.cursor.execute( - "SELECT domain, nodeinfo_url FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", [blocked.replace("*", "_")] - ) - searchres = fba.cursor.fetchone() - # DEBUG: print("DEBUG: searchres[]:", type(searchres)) - - if searchres == None: - print(f"WARNING: Cannot deobsfucate blocked='{blocked}' - SKIPPED!") - continue - - blocked = searchres[0] - nodeinfo_url = searchres[1] - # DEBUG: print("DEBUG: Looked up domain:", blocked) - elif not validators.domain(blocked): - print(f"WARNING: blocked='{blocked}',software='{software}' is not a valid domain name - skipped!") - continue - - # DEBUG: print("DEBUG: Looking up instance by domain:", blocked) - if not validators.domain(blocked): - print(f"WARNING: blocked='{blocked}',software='{software}' is not a valid domain name - skipped!") - continue - elif not fba.is_instance_registered(blocked): - # DEBUG: print(f"DEBUG: Domain blocked='{blocked}' wasn't found, adding ..., blocker='{blocker}',origin='{origin}',nodeinfo_url='{nodeinfo_url}'") - fba.add_instance(blocked, blocker, inspect.currentframe().f_code.co_name, nodeinfo_url) - - if not fba.is_instance_blocked(blocker, blocked, block_level): - # DEBUG: print("DEBUG: Blocking:", blocker, blocked, block_level) - fba.block_instance(blocker, blocked, "unknown", block_level) - - if block_level == "reject": - # DEBUG: print("DEBUG: Adding to blockdict:", blocked) - blockdict.append( - { - "blocked": blocked, - "reason" : None - }) - else: - # DEBUG: print(f"DEBUG: Updating block last seen for blocker='{blocker}',blocked='{blocked}' ...") - fba.update_last_seen(blocker, blocked, block_level) - - # DEBUG: print("DEBUG: Committing changes ...") - fba.connection.commit() - - # Reasons - if "mrf_simple_info" in federation: - # DEBUG: print("DEBUG: Found mrf_simple_info:", blocker) - for block_level, info in ( - {**federation["mrf_simple_info"], - **(federation["quarantined_instances_info"] - if "quarantined_instances_info" in federation - else {})} - ).items(): - # DEBUG: print("DEBUG: block_level, info.items():", block_level, len(info.items())) - block_level = fba.tidyup_domain(block_level) - # DEBUG: print("DEBUG: BEFORE block_level:", block_level) - - if block_level == "": - print("WARNING: block_level is now empty!") - continue - - # DEBUG: print(f"DEBUG: Checking {len(info.items())} entries from blocker='{blocker}',software='{software}',block_level='{block_level}' ...") - for blocked, reason in info.items(): - # DEBUG: print("DEBUG: BEFORE blocked:", blocked) - blocked = fba.tidyup_domain(blocked) - # DEBUG: print("DEBUG: AFTER blocked:", blocked) - - if blocked == "": - print("WARNING: blocked is empty after fba.tidyup_domain():", blocker, block_level) - continue - elif fba.is_blacklisted(blocked): - # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - skipping!") - continue - elif blocked.count("*") > 1: - # same domain guess as above, but for reasons field - fba.cursor.execute( - "SELECT domain, origin, nodeinfo_url FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", [blocked.replace("*", "_")] - ) - searchres = fba.cursor.fetchone() - - if searchres == None: - print(f"WARNING: Cannot deobsfucate blocked='{blocked}' - SKIPPED!") - continue - - blocked = searchres[0] - origin = searchres[1] - nodeinfo_url = searchres[2] - elif not validators.domain(blocked): - print(f"WARNING: blocked='{blocked}',software='{software}' is not a valid domain name - skipped!") - continue - - # DEBUG: print("DEBUG: Looking up instance by domain:", blocked) - if not validators.domain(blocked): - print(f"WARNING: blocked='{blocked}',software='{software}' is not a valid domain name - skipped!") - continue - elif not fba.is_instance_registered(blocked): - # DEBUG: print(f"DEBUG: Domain blocked='{blocked}' wasn't found, adding ..., blocker='{blocker}',origin='{origin}',nodeinfo_url='{nodeinfo_url}'") - fba.add_instance(blocked, blocker, inspect.currentframe().f_code.co_name, nodeinfo_url) - - # DEBUG: print("DEBUG: Updating block reason:", blocker, blocked, reason["reason"]) - fba.update_block_reason(reason["reason"], blocker, blocked, block_level) - - for entry in blockdict: - if entry["blocked"] == blocked: - # DEBUG: print("DEBUG: Updating entry reason:", blocked) - entry["reason"] = reason["reason"] - - fba.connection.commit() - except Exception as e: - print(f"ERROR: blocker='{blocker}',software='{software}',exception[{type(e)}]:'{str(e)}'") + print(f"INFO: blocker='{blocker}',software='{software}'") + pleroma.fetch_blocks(blocker, software, origin, nodeinfo_url) elif software == "mastodon": - print("INFO: blocker:", blocker) - try: - # json endpoint for newer mastodongs - try: - json = { - "reject" : [], - "media_removal" : [], - "followers_only": [], - "report_removal": [] - } - - # handling CSRF, I've saw at least one server requiring it to access the endpoint - # DEBUG: print("DEBUG: Fetching meta:", blocker) - meta = bs4.BeautifulSoup( - fba.get_response(blocker, "/", fba.headers, (config.get("connection_timeout"), config.get("read_timeout"))).text, - "html.parser", - ) - try: - csrf = meta.find("meta", attrs={"name": "csrf-token"})["content"] - # DEBUG: print("DEBUG: Adding CSRF token:", blocker, csrf) - reqheaders = {**fba.api_headers, **{"X-CSRF-Token": csrf}} - except BaseException as e: - # DEBUG: print("DEBUG: No CSRF token found, using normal headers:", blocker, e) - reqheaders = fba.api_headers - - # DEBUG: print("DEBUG: Querying API domain_blocks:", blocker) - blocks = fba.get_response(blocker, "/api/v1/instance/domain_blocks", reqheaders, (config.get("connection_timeout"), config.get("read_timeout"))).json() - - print(f"INFO: Checking {len(blocks)} entries from blocker='{blocker}',software='{software}' ...") - for block in blocks: - entry = { - 'domain': block['domain'], - 'hash' : block['digest'], - 'reason': block['comment'] - } - - # DEBUG: print("DEBUG: severity,domain,hash,comment:", block['severity'], block['domain'], block['digest'], block['comment']) - if block['severity'] == 'suspend': - # DEBUG: print(f"DEBUG: Adding entry='{entry}' with severity='{block['severity']}' ...") - json['reject'].append(entry) - elif block['severity'] == 'silence': - # DEBUG: print(f"DEBUG: Adding entry='{entry}' with severity='{block['severity']}' ...") - json['followers_only'].append(entry) - elif block['severity'] == 'reject_media': - # DEBUG: print(f"DEBUG: Adding entry='{entry}' with severity='{block['severity']}' ...") - json['media_removal'].append(entry) - elif block['severity'] == 'reject_reports': - # DEBUG: print(f"DEBUG: Adding entry='{entry}' with severity='{block['severity']}' ...") - json['report_removal'].append(entry) - else: - print("WARNING: Unknown severity:", block['severity'], block['domain']) - except BaseException as e: - # DEBUG: print(f"DEBUG: Failed, trying mastodon-specific fetches: blocker='{blocker}',exception[{type(e)}]={str(e)}") - json = fba.get_mastodon_blocks(blocker) - - print(f"INFO: Checking {len(json.items())} entries from blocker='{blocker}',software='{software}' ...") - for block_level, blocks in json.items(): - # DEBUG: print("DEBUG: blocker,block_level,blocks():", blocker, block_level, len(blocks)) - block_level = fba.tidyup_domain(block_level) - # DEBUG: print("DEBUG: AFTER-block_level:", block_level) - if block_level == "": - print("WARNING: block_level is empty, blocker:", blocker) - continue - - # DEBUG: print(f"DEBUG: Checking {len(blocks)} entries from blocker='{blocker}',software='{software}',block_level='{block_level}' ...") - for block in blocks: - blocked, blocked_hash, reason = block.values() - # DEBUG: print("DEBUG: blocked,hash,reason:", blocked, blocked_hash, reason) - blocked = fba.tidyup_domain(blocked) - # DEBUG: print("DEBUG: AFTER-blocked:", blocked) - - if blocked == "": - print("WARNING: blocked is empty:", blocker) - continue - elif fba.is_blacklisted(blocked): - # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - skipping!") - continue - elif blocked.count("*") > 0: - # Doing the hash search for instance names as well to tidy up DB - fba.cursor.execute( - "SELECT domain, origin, nodeinfo_url FROM instances WHERE hash = ? LIMIT 1", [blocked_hash] - ) - searchres = fba.cursor.fetchone() - - if searchres == None: - print(f"WARNING: Cannot deobsfucate blocked='{blocked}',blocked_hash='{blocked_hash}' - SKIPPED!") - continue - - # DEBUG: print("DEBUG: Updating domain: ", searchres[0]) - blocked = searchres[0] - origin = searchres[1] - nodeinfo_url = searchres[2] - - # DEBUG: print("DEBUG: Looking up instance by domain:", blocked) - if not validators.domain(blocked): - print(f"WARNING: blocked='{blocked}',software='{software}' is not a valid domain name - skipped!") - continue - elif not fba.is_instance_registered(blocked): - # DEBUG: print(f"DEBUG: Domain blocked='{blocked}' wasn't found, adding ..., blocker='{blocker}',origin='{origin}',nodeinfo_url='{nodeinfo_url}'") - fba.add_instance(blocked, blocker, inspect.currentframe().f_code.co_name, nodeinfo_url) - elif not validators.domain(blocked): - print(f"WARNING: blocked='{blocked}',software='{software}' is not a valid domain name - skipped!") - continue - - # DEBUG: print("DEBUG: Looking up instance by domain:", blocked) - if not validators.domain(blocked): - print(f"WARNING: blocked='{blocked}',software='{software}' is not a valid domain name - skipped!") - continue - elif not fba.is_instance_registered(blocked): - # DEBUG: print("DEBUG: Hash wasn't found, adding:", blocked, blocker) - fba.add_instance(blocked, blocker, inspect.currentframe().f_code.co_name, nodeinfo_url) - - blocking = blocked if blocked.count("*") <= 1 else blocked_hash - # DEBUG: print(f"DEBUG: blocking='{blocking}',blocked='{blocked}',blocked_hash='{blocked_hash}'") - - if not fba.is_instance_blocked(blocker, blocked, block_level): - # DEBUG: print("DEBUG: Blocking:", blocker, blocked, block_level) - fba.block_instance(blocker, blocking, reason, block_level) - - if block_level == "reject": - blockdict.append({ - "blocked": blocked, - "reason" : reason - }) - else: - # DEBUG: print(f"DEBUG: Updating block last seen and reason for blocker='{blocker}',blocking='{blocking}' ...") - fba.update_last_seen(blocker, blocking, block_level) - fba.update_block_reason(reason, blocker, blocking, block_level) - - # DEBUG: print("DEBUG: Committing changes ...") - fba.connection.commit() - except Exception as e: - print(f"ERROR: blocker='{blocker}',software='{software}',exception[{type(e)}]:'{str(e)}'") - elif software == "friendica" or software == "misskey" or software == "bookwyrm" or software == "takahe": - print("INFO: blocker:", blocker) + print(f"INFO: blocker='{blocker}',software='{software}'") + mastodon.fetch_blocks(blocker, software, origin, nodeinfo_url) + elif software == "friendica" or software == "misskey": + print(f"INFO: blocker='{blocker}',software='{software}'") try: if software == "friendica": - json = fba.get_friendica_blocks(blocker) + json = fba.fetch_friendica_blocks(blocker) elif software == "misskey": - json = fba.get_misskey_blocks(blocker) - elif software == "bookwyrm": - print("WARNING: bookwyrm is not fully supported for fetching blacklist!", blocker) - #json = fba.get_bookwyrm_blocks(blocker) - continue - elif software == "takahe": - print("WARNING: takahe is not fully supported for fetching blacklist!", blocker) - #json = fba.get_takahe_blocks(blocker) - continue + json = fba.fetch_misskey_blocks(blocker) print(f"INFO: Checking {len(json.items())} entries from blocker='{blocker}',software='{software}' ...") for block_level, blocks in json.items(): @@ -531,7 +236,7 @@ def fetch_blocks(args: argparse.Namespace): except Exception as e: print(f"ERROR: blocker='{blocker}',software='{software}',exception[{type(e)}]:'{str(e)}'") elif software == "gotosocial": - print("INFO: blocker:", blocker) + print(f"INFO: blocker='{blocker}',software='{software}'") try: # Blocks federation = fba.get_response(blocker, f"{fba.get_peers_url}?filter=suspended", fba.api_headers, (config.get("connection_timeout"), config.get("read_timeout"))).json() diff --git a/fba/fba.py b/fba/fba.py index c1c47e9..16c07d1 100644 --- a/fba/fba.py +++ b/fba/fba.py @@ -31,6 +31,9 @@ from fba import cache from fba import config from fba import instances +from fba.network import lemmy +from fba.network import misskey + # Don't check these, known trolls/flooders/testing/developing blacklist = [ # Floods network with fake nodes as "research" project @@ -76,32 +79,6 @@ api_headers = { "Content-Type": "application/json", } -language_mapping = { - # English -> English - "Silenced instances" : "Silenced servers", - "Suspended instances" : "Suspended servers", - "Limited instances" : "Limited servers", - "Filtered media" : "Filtered media", - # Mappuing German -> English - "Gesperrte Server" : "Suspended servers", - "Gefilterte Medien" : "Filtered media", - "Stummgeschaltete Server" : "Silenced servers", - # Japanese -> English - "停止済みのサーバー" : "Suspended servers", - "制限中のサーバー" : "Limited servers", - "メディアを拒否しているサーバー": "Filtered media", - "サイレンス済みのサーバー" : "Silenced servers", - # ??? -> English - "שרתים מושעים" : "Suspended servers", - "מדיה מסוננת" : "Filtered media", - "שרתים מוגבלים" : "Silenced servers", - # French -> English - "Serveurs suspendus" : "Suspended servers", - "Médias filtrés" : "Filtered media", - "Serveurs limités" : "Limited servers", - "Serveurs modérés" : "Limited servers", -} - # URL for fetching peers get_peers_url = "/api/v1/instance/peers" @@ -481,163 +458,18 @@ def get_peers(domain: str, software: str) -> list: elif type(software) != str and software != None: raise ValueError(f"software[]={type(software)} is not 'str'") - peers = list() - if software == "misskey": - # DEBUG: print(f"DEBUG: domain='{domain}' is misskey, sending API POST request ...") - offset = 0 - step = config.get("misskey_limit") - - # iterating through all "suspended" (follow-only in its terminology) - # instances page-by-page, since that troonware doesn't support - # sending them all at once - while True: - # DEBUG: print(f"DEBUG: Fetching offset='{offset}' from '{domain}' ...") - if offset == 0: - fetched = post_json_api(domain, "/api/federation/instances", json.dumps({ - "sort" : "+pubAt", - "host" : None, - "limit": step - }), { - "Origin": domain - }) - else: - fetched = post_json_api(domain, "/api/federation/instances", json.dumps({ - "sort" : "+pubAt", - "host" : None, - "limit" : step, - "offset": offset - 1 - }), { - "Origin": domain - }) - - # DEBUG: print(f"DEBUG: fetched()={len(fetched)}") - if len(fetched) == 0: - # DEBUG: print("DEBUG: Returned zero bytes, exiting loop:", domain) - break - elif len(fetched) != config.get("misskey_limit"): - # DEBUG: print(f"DEBUG: Fetched '{len(fetched)}' row(s) but expected: '{config.get('misskey_limit')}'") - offset = offset + (config.get("misskey_limit") - len(fetched)) - else: - # DEBUG: print("DEBUG: Raising offset by step:", step) - offset = offset + step - - # Check records - # DEBUG: print(f"DEBUG: fetched({len(fetched)})[]={type(fetched)}") - if isinstance(fetched, dict) and "error" in fetched and "message" in fetched["error"]: - print(f"WARNING: post_json_api() returned error: {fetched['error']['message']}") - update_last_error(domain, fetched["error"]["message"]) - break - - already = 0 - for row in fetched: - # DEBUG: print(f"DEBUG: row():{len(row)}") - if not "host" in row: - print(f"WARNING: row()={len(row)} does not contain key 'host': {row},domain='{domain}'") - continue - elif type(row["host"]) != str: - print(f"WARNING: row[host][]={type(row['host'])} is not 'str'") - continue - elif is_blacklisted(row["host"]): - # DEBUG: print(f"DEBUG: row[host]='{row['host']}' is blacklisted. domain='{domain}'") - continue - elif row["host"] in peers: - # DEBUG: print(f"DEBUG: Not adding row[host]='{row['host']}', already found.") - already = already + 1 - continue - - # DEBUG: print(f"DEBUG: Adding peer: '{row['host']}'") - peers.append(row["host"]) - - if already == len(fetched): - print(f"WARNING: Host returned same set of '{already}' instances, aborting loop!") - break - - # DEBUG: print(f"DEBUG: Adding '{len(peers)}' for domain='{domain}'") - instances.set("total_peers", domain, len(peers)) - - # DEBUG: print(f"DEBUG: Updating last_instance_fetch for domain='{domain}' ...") - update_last_instance_fetch(domain) - - # DEBUG: print("DEBUG: Returning peers[]:", type(peers)) - return peers + print(f"DEBUG: Invoking misskey.get_peers({domain}) ...") + return misskey.get_peers(domain) elif software == "lemmy": - # DEBUG: print(f"DEBUG: domain='{domain}' is Lemmy, fetching JSON ...") - try: - response = get_response(domain, "/api/v3/site", api_headers, (config.get("connection_timeout"), config.get("read_timeout"))) - - data = json_from_response(response) - - # DEBUG: print(f"DEBUG: response.ok={response.ok},response.status_code='{response.status_code}',data[]='{type(data)}'") - if not response.ok or response.status_code >= 400: - print("WARNING: Could not reach any JSON API:", domain) - update_last_error(domain, response) - elif response.ok and isinstance(data, list): - # DEBUG: print(f"DEBUG: domain='{domain}' returned a list: '{data}'") - sys.exit(255) - elif "federated_instances" in data: - # DEBUG: print(f"DEBUG: Found federated_instances for domain='{domain}'") - peers = peers + add_peers(data["federated_instances"]) - # DEBUG: print("DEBUG: Added instance(s) to peers") - else: - print("WARNING: JSON response does not contain 'federated_instances':", domain) - update_last_error(domain, response) - - except BaseException as e: - print(f"WARNING: Exception during fetching JSON: domain='{domain}',exception[{type(e)}]:'{str(e)}'") - - # DEBUG: print(f"DEBUG: Adding '{len(peers)}' for domain='{domain}'") - instances.set("total_peers", domain, len(peers)) - - # DEBUG: print(f"DEBUG: Updating last_instance_fetch for domain='{domain}' ...") - update_last_instance_fetch(domain) - - # DEBUG: print("DEBUG: Returning peers[]:", type(peers)) - return peers + print(f"DEBUG: Invoking lemmy.get_peers({domain}) ...") + return lemmy.get_peers(domain) elif software == "peertube": - # DEBUG: print(f"DEBUG: domain='{domain}' is a PeerTube, fetching JSON ...") - - start = 0 - for mode in ["followers", "following"]: - # DEBUG: print(f"DEBUG: domain='{domain}',mode='{mode}'") - while True: - try: - response = get_response(domain, "/api/v1/server/{mode}?start={start}&count=100", headers, (config.get("connection_timeout"), config.get("read_timeout"))) - - data = json_from_response(response) - # DEBUG: print(f"DEBUG: response.ok={response.ok},response.status_code='{response.status_code}',data[]='{type(data)}'") - if response.ok and isinstance(data, dict): - # DEBUG: print("DEBUG: Success, data:", len(data)) - if "data" in data: - # DEBUG: print(f"DEBUG: Found {len(data['data'])} record(s).") - for record in data["data"]: - # DEBUG: print(f"DEBUG: record()={len(record)}") - if mode in record and "host" in record[mode]: - # DEBUG: print(f"DEBUG: Found host={record[mode]['host']}, adding ...") - peers.append(record[mode]["host"]) - else: - print(f"WARNING: record from '{domain}' has no '{mode}' or 'host' record: {record}") - - if len(data["data"]) < 100: - # DEBUG: print("DEBUG: Reached end of JSON response:", domain) - break - - # Continue with next row - start = start + 100 - - except BaseException as e: - print(f"WARNING: Exception during fetching JSON: domain='{domain}',exception[{type(e)}]:'{str(e)}'") - - # DEBUG: print(f"DEBUG: Adding '{len(peers)}' for domain='{domain}'") - instances.set("total_peers", domain, len(peers)) - - # DEBUG: print(f"DEBUG: Updating last_instance_fetch for domain='{domain}' ...") - update_last_instance_fetch(domain) - - # DEBUG: print("DEBUG: Returning peers[]:", type(peers)) - return peers - - # DEBUG: print(f"DEBUG: Fetching get_peers_url='{get_peers_url}' from '{domain}' ...") + print(f"DEBUG: Invoking peertube.get_peers({domain}) ...") + return peertube.get_peers(domain) + + # DEBUG: print(f"DEBUG: Fetching get_peers_url='{get_peers_url}' from '{domain}',software='{software}' ...") + peers = list() try: response = get_response(domain, get_peers_url, api_headers, (config.get("connection_timeout"), config.get("read_timeout"))) @@ -1226,62 +1058,7 @@ def send_bot_post(instance: str, blocks: dict): return True -def get_mastodon_blocks(domain: str) -> dict: - # DEBUG: print(f"DEBUG: domain='{domain}' - CALLED!") - if type(domain) != str: - raise ValueError(f"Parameter domain[]={type(domain)} is not 'str'") - elif domain == "": - raise ValueError(f"Parameter 'domain' cannot be empty") - - # DEBUG: print("DEBUG: Fetching mastodon blocks from domain:", domain) - blocks = { - "Suspended servers": [], - "Filtered media" : [], - "Limited servers" : [], - "Silenced servers" : [], - } - - try: - doc = bs4.BeautifulSoup( - get_response(domain, "/about/more", headers, (config.get("connection_timeout"), config.get("read_timeout"))).text, - "html.parser", - ) - except BaseException as e: - print("ERROR: Cannot fetch from domain:", domain, e) - update_last_error(domain, e) - return {} - - for header in doc.find_all("h3"): - header_text = tidyup_reason(header.text) - - # DEBUG: print(f"DEBUG: header_text='{header_text}'") - if header_text in language_mapping: - # DEBUG: print(f"DEBUG: header_text='{header_text}'") - header_text = language_mapping[header_text] - else: - print(f"WARNING: header_text='{header_text}' not found in language mapping table") - - if header_text in blocks or header_text.lower() in blocks: - # replaced find_next_siblings with find_all_next to account for instances that e.g. hide lists in dropdown menu - for line in header.find_all_next("table")[0].find_all("tr")[1:]: - blocks[header_text].append( - { - "domain": tidyup_domain(line.find("span").text), - "hash" : tidyup_domain(line.find("span")["title"][9:]), - "reason": tidyup_domain(line.find_all("td")[1].text), - } - ) - else: - print(f"WARNING: header_text='{header_text}' not found in blocks()={len(blocks)}") - - # DEBUG: print("DEBUG: Returning blocks for domain:", domain) - return { - "reject" : blocks["Suspended servers"], - "media_removal" : blocks["Filtered media"], - "followers_only": blocks["Limited servers"] + blocks["Silenced servers"], - } - -def get_friendica_blocks(domain: str) -> dict: +def fetch_friendica_blocks(domain: str) -> dict: # DEBUG: print(f"DEBUG: domain='{domain}' - CALLED!") if type(domain) != str: raise ValueError(f"Parameter domain[]={type(domain)} is not 'str'") @@ -1320,7 +1097,7 @@ def get_friendica_blocks(domain: str) -> dict: "reject": blocks } -def get_misskey_blocks(domain: str) -> dict: +def fetch_misskey_blocks(domain: str) -> dict: # DEBUG: print(f"DEBUG: domain='{domain}' - CALLED!") if type(domain) != str: raise ValueError(f"Parameter domain[]={type(domain)} is not 'str'") diff --git a/fba/network/__init__.py b/fba/network/__init__.py new file mode 100644 index 0000000..31e5992 --- /dev/null +++ b/fba/network/__init__.py @@ -0,0 +1,7 @@ +__all__ = [ + 'lemmy', + 'mastodon', + 'misskey', + 'peertube', + 'pleroma', +] diff --git a/fba/network/lemmy.py b/fba/network/lemmy.py new file mode 100644 index 0000000..05c8df0 --- /dev/null +++ b/fba/network/lemmy.py @@ -0,0 +1,64 @@ +# Fedi API Block - An aggregator for fetching blocking data from fediverse nodes +# Copyright (C) 2023 Free Software Foundation +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published +# by the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +import sys + +from fba import config +from fba import fba +from fba import instances + +def get_peers(domain: str) -> list: + # DEBUG: print(f"DEBUG: domain({len(domain)})={domain},software={software} - CALLED!") + if type(domain) != str: + raise ValueError(f"Parameter domain[]={type(domain)} is not 'str'") + elif domain == "": + raise ValueError(f"Parameter 'domain' cannot be empty") + elif type(software) != str and software != None: + raise ValueError(f"software[]={type(software)} is not 'str'") + + # DEBUG: print(f"DEBUG: domain='{domain}' is Lemmy, fetching JSON ...") + peers = list() + try: + response = fba.get_response(domain, "/api/v3/site", api_headers, (config.get("connection_timeout"), config.get("read_timeout"))) + + data = fba.json_from_response(response) + + # DEBUG: print(f"DEBUG: response.ok={response.ok},response.status_code='{response.status_code}',data[]='{type(data)}'") + if not response.ok or response.status_code >= 400: + print("WARNING: Could not reach any JSON API:", domain) + fba.update_last_error(domain, response) + elif response.ok and isinstance(data, list): + print(f"UNSUPPORTED: domain='{domain}' returned a list: '{data}'") + sys.exit(255) + elif "federated_instances" in data: + # DEBUG: print(f"DEBUG: Found federated_instances for domain='{domain}'") + peers = peers + fba.add_peers(data["federated_instances"]) + # DEBUG: print("DEBUG: Added instance(s) to peers") + else: + print("WARNING: JSON response does not contain 'federated_instances':", domain) + fba.update_last_error(domain, response) + + except BaseException as e: + print(f"WARNING: Exception during fetching JSON: domain='{domain}',exception[{type(e)}]:'{str(e)}'") + + # DEBUG: print(f"DEBUG: Adding '{len(peers)}' for domain='{domain}'") + instances.set("total_peers", domain, len(peers)) + + # DEBUG: print(f"DEBUG: Updating last_instance_fetch for domain='{domain}' ...") + fba.update_last_instance_fetch(domain) + + # DEBUG: print("DEBUG: Returning peers[]:", type(peers)) + return peers diff --git a/fba/network/mastodon.py b/fba/network/mastodon.py new file mode 100644 index 0000000..89bded8 --- /dev/null +++ b/fba/network/mastodon.py @@ -0,0 +1,259 @@ +# Fedi API Block - An aggregator for fetching blocking data from fediverse nodes +# Copyright (C) 2023 Free Software Foundation +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published +# by the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +import bs4 +import validators + +from fba import config +from fba import fba + +language_mapping = { + # English -> English + "Silenced instances" : "Silenced servers", + "Suspended instances" : "Suspended servers", + "Limited instances" : "Limited servers", + "Filtered media" : "Filtered media", + # Mappuing German -> English + "Gesperrte Server" : "Suspended servers", + "Gefilterte Medien" : "Filtered media", + "Stummgeschaltete Server" : "Silenced servers", + # Japanese -> English + "停止済みのサーバー" : "Suspended servers", + "制限中のサーバー" : "Limited servers", + "メディアを拒否しているサーバー": "Filtered media", + "サイレンス済みのサーバー" : "Silenced servers", + # ??? -> English + "שרתים מושעים" : "Suspended servers", + "מדיה מסוננת" : "Filtered media", + "שרתים מוגבלים" : "Silenced servers", + # French -> English + "Serveurs suspendus" : "Suspended servers", + "Médias filtrés" : "Filtered media", + "Serveurs limités" : "Limited servers", + "Serveurs modérés" : "Limited servers", +} + +def fetch_blocks_from_about(domain: str) -> dict: + # DEBUG: print(f"DEBUG: domain='{domain}' - CALLED!") + if type(domain) != str: + raise ValueError(f"Parameter domain[]={type(domain)} is not 'str'") + elif domain == "": + raise ValueError(f"Parameter 'domain' cannot be empty") + + # DEBUG: print("DEBUG: Fetching mastodon blocks from domain:", domain) + blocks = { + "Suspended servers": [], + "Filtered media" : [], + "Limited servers" : [], + "Silenced servers" : [], + } + + try: + doc = bs4.BeautifulSoup( + fba.get_response(domain, "/about/more", fba.headers, (config.get("connection_timeout"), config.get("read_timeout"))).text, + "html.parser", + ) + except BaseException as e: + print("ERROR: Cannot fetch from domain:", domain, e) + fba.update_last_error(domain, e) + return {} + + for header in doc.find_all("h3"): + header_text = fba.tidyup_reason(header.text) + + # DEBUG: print(f"DEBUG: header_text='{header_text}'") + if header_text in language_mapping: + # DEBUG: print(f"DEBUG: header_text='{header_text}'") + header_text = language_mapping[header_text] + else: + print(f"WARNING: header_text='{header_text}' not found in language mapping table") + + if header_text in blocks or header_text.lower() in blocks: + # replaced find_next_siblings with find_all_next to account for instances that e.g. hide lists in dropdown menu + for line in header.find_all_next("table")[0].find_all("tr")[1:]: + blocks[header_text].append( + { + "domain": fba.tidyup_domain(line.find("span").text), + "hash" : fba.tidyup_domain(line.find("span")["title"][9:]), + "reason": fba.tidyup_domain(line.find_all("td")[1].text), + } + ) + else: + print(f"WARNING: header_text='{header_text}' not found in blocks()={len(blocks)}") + + # DEBUG: print("DEBUG: Returning blocks for domain:", domain) + return { + "reject" : blocks["Suspended servers"], + "media_removal" : blocks["Filtered media"], + "followers_only": blocks["Limited servers"] + blocks["Silenced servers"], + } + +def fetch_blocks(domain: str, software: str, origin: str, nodeinfo_url: str): + print(f"DEBUG: domain='{domain}',software='{software}',origin='{origin}',nodeinfo_url='{nodeinfo_url}' - CALLED!") + if type(domain) != str: + raise ValueError(f"Parameter domain[]={type(domain)} is not 'str'") + elif domain == "": + raise ValueError(f"Parameter 'domain' cannot be empty") + elif type(software) != str: + raise ValueError(f"Parameter software[]={type(software)} is not 'str'") + elif software == "": + raise ValueError(f"Parameter 'software' cannot be empty") + elif type(origin) != str and origin != None: + raise ValueError(f"Parameter origin[]={type(origin)} is not 'str'") + elif origin == "": + raise ValueError(f"Parameter 'origin' cannot be empty") + elif type(nodeinfo_url) != str: + raise ValueError(f"Parameter nodeinfo_url[]={type(nodeinfo_url)} is not 'str'") + elif nodeinfo_url == "": + raise ValueError(f"Parameter 'nodeinfo_url' cannot be empty") + + try: + # json endpoint for newer mastodongs + blockdict = list() + try: + json = { + "reject" : [], + "media_removal" : [], + "followers_only": [], + "report_removal": [] + } + + # handling CSRF, I've saw at least one server requiring it to access the endpoint + # DEBUG: print("DEBUG: Fetching meta:", domain) + meta = bs4.BeautifulSoup( + fba.get_response(domain, "/", fba.headers, (config.get("connection_timeout"), config.get("read_timeout"))).text, + "html.parser", + ) + try: + csrf = meta.find("meta", attrs={"name": "csrf-token"})["content"] + # DEBUG: print("DEBUG: Adding CSRF token:", domain, csrf) + reqheaders = {**fba.api_headers, **{"X-CSRF-Token": csrf}} + except BaseException as e: + # DEBUG: print("DEBUG: No CSRF token found, using normal headers:", domain, e) + reqheaders = fba.api_headers + + # DEBUG: print("DEBUG: Querying API domain_blocks:", domain) + blocks = fba.get_response(domain, "/api/v1/instance/domain_blocks", reqheaders, (config.get("connection_timeout"), config.get("read_timeout"))).json() + + print(f"INFO: Checking {len(blocks)} entries from domain='{domain}',software='{software}' ...") + for block in blocks: + entry = { + 'domain': block['domain'], + 'hash' : block['digest'], + 'reason': block['comment'] + } + + # DEBUG: print("DEBUG: severity,domain,hash,comment:", block['severity'], block['domain'], block['digest'], block['comment']) + if block['severity'] == 'suspend': + # DEBUG: print(f"DEBUG: Adding entry='{entry}' with severity='{block['severity']}' ...") + json['reject'].append(entry) + elif block['severity'] == 'silence': + # DEBUG: print(f"DEBUG: Adding entry='{entry}' with severity='{block['severity']}' ...") + json['followers_only'].append(entry) + elif block['severity'] == 'reject_media': + # DEBUG: print(f"DEBUG: Adding entry='{entry}' with severity='{block['severity']}' ...") + json['media_removal'].append(entry) + elif block['severity'] == 'reject_reports': + # DEBUG: print(f"DEBUG: Adding entry='{entry}' with severity='{block['severity']}' ...") + json['report_removal'].append(entry) + else: + print("WARNING: Unknown severity:", block['severity'], block['domain']) + + except BaseException as e: + # DEBUG: print(f"DEBUG: Failed, trying mastodon-specific fetches: domain='{domain}',exception[{type(e)}]={str(e)}") + json = fetch_blocks_from_about(domain) + + print(f"INFO: Checking {len(json.items())} entries from domain='{domain}',software='{software}' ...") + for block_level, blocks in json.items(): + # DEBUG: print("DEBUG: domain,block_level,blocks():", domain, block_level, len(blocks)) + block_level = fba.tidyup_domain(block_level) + + # DEBUG: print("DEBUG: AFTER-block_level:", block_level) + if block_level == "": + print("WARNING: block_level is empty, domain:", domain) + continue + + # DEBUG: print(f"DEBUG: Checking {len(blocks)} entries from domain='{domain}',software='{software}',block_level='{block_level}' ...") + for block in blocks: + blocked, blocked_hash, reason = block.values() + # DEBUG: print("DEBUG: blocked,hash,reason:", blocked, blocked_hash, reason) + blocked = fba.tidyup_domain(blocked) + # DEBUG: print("DEBUG: AFTER-blocked:", blocked) + + if blocked == "": + print("WARNING: blocked is empty:", domain) + continue + elif fba.is_blacklisted(blocked): + # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - skipping!") + continue + elif blocked.count("*") > 0: + # Doing the hash search for instance names as well to tidy up DB + fba.cursor.execute( + "SELECT domain, origin, nodeinfo_url FROM instances WHERE hash = ? LIMIT 1", [blocked_hash] + ) + searchres = fba.cursor.fetchone() + + if searchres == None: + print(f"WARNING: Cannot deobsfucate blocked='{blocked}',blocked_hash='{blocked_hash}' - SKIPPED!") + continue + + # DEBUG: print("DEBUG: Updating domain: ", searchres[0]) + blocked = searchres[0] + origin = searchres[1] + nodeinfo_url = searchres[2] + + # DEBUG: print("DEBUG: Looking up instance by domain:", blocked) + if not validators.domain(blocked): + print(f"WARNING: blocked='{blocked}',software='{software}' is not a valid domain name - skipped!") + continue + elif not fba.is_instance_registered(blocked): + # DEBUG: print(f"DEBUG: Domain blocked='{blocked}' wasn't found, adding ..., domain='{domain}',origin='{origin}',nodeinfo_url='{nodeinfo_url}'") + fba.add_instance(blocked, domain, inspect.currentframe().f_code.co_name, nodeinfo_url) + elif not validators.domain(blocked): + print(f"WARNING: blocked='{blocked}',software='{software}' is not a valid domain name - skipped!") + continue + + # DEBUG: print("DEBUG: Looking up instance by domain:", blocked) + if not validators.domain(blocked): + print(f"WARNING: blocked='{blocked}',software='{software}' is not a valid domain name - skipped!") + continue + elif not fba.is_instance_registered(blocked): + # DEBUG: print("DEBUG: Hash wasn't found, adding:", blocked, domain) + fba.add_instance(blocked, domain, inspect.currentframe().f_code.co_name, nodeinfo_url) + + blocking = blocked if blocked.count("*") <= 1 else blocked_hash + # DEBUG: print(f"DEBUG: blocking='{blocking}',blocked='{blocked}',blocked_hash='{blocked_hash}'") + + if not fba.is_instance_blocked(domain, blocked, block_level): + # DEBUG: print("DEBUG: Blocking:", domain, blocked, block_level) + fba.block_instance(domain, blocking, reason, block_level) + + if block_level == "reject": + blockdict.append({ + "blocked": blocked, + "reason" : reason + }) + else: + # DEBUG: print(f"DEBUG: Updating block last seen and reason for domain='{domain}',blocking='{blocking}' ...") + fba.update_last_seen(domain, blocking, block_level) + fba.update_block_reason(reason, domain, blocking, block_level) + + # DEBUG: print("DEBUG: Committing changes ...") + fba.connection.commit() + except Exception as e: + print(f"ERROR: domain='{domain}',software='{software}',exception[{type(e)}]:'{str(e)}'") + + # DEBUG: print("DEBUG: EXIT!") diff --git a/fba/network/misskey.py b/fba/network/misskey.py new file mode 100644 index 0000000..5a90d37 --- /dev/null +++ b/fba/network/misskey.py @@ -0,0 +1,107 @@ +# Fedi API Block - An aggregator for fetching blocking data from fediverse nodes +# Copyright (C) 2023 Free Software Foundation +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published +# by the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +import json + +from fba import config +from fba import fba +from fba import instances + +def get_peers(domain: str) -> list: + # DEBUG: print(f"DEBUG: domain({len(domain)})={domain} - CALLED!") + if type(domain) != str: + raise ValueError(f"Parameter domain[]={type(domain)} is not 'str'") + elif domain == "": + raise ValueError(f"Parameter 'domain' cannot be empty") + + # DEBUG: print(f"DEBUG: domain='{domain}' is misskey, sending API POST request ...") + peers = list() + offset = 0 + step = config.get("misskey_limit") + + # iterating through all "suspended" (follow-only in its terminology) + # instances page-by-page, since that troonware doesn't support + # sending them all at once + while True: + # DEBUG: print(f"DEBUG: Fetching offset='{offset}' from '{domain}' ...") + if offset == 0: + fetched = fba.post_json_api(domain, "/api/federation/instances", json.dumps({ + "sort" : "+pubAt", + "host" : None, + "limit": step + }), { + "Origin": domain + }) + else: + fetched = fba.post_json_api(domain, "/api/federation/instances", json.dumps({ + "sort" : "+pubAt", + "host" : None, + "limit" : step, + "offset": offset - 1 + }), { + "Origin": domain + }) + + # DEBUG: print(f"DEBUG: fetched()={len(fetched)}") + if len(fetched) == 0: + # DEBUG: print("DEBUG: Returned zero bytes, exiting loop:", domain) + break + elif len(fetched) != config.get("misskey_limit"): + # DEBUG: print(f"DEBUG: Fetched '{len(fetched)}' row(s) but expected: '{config.get('misskey_limit')}'") + offset = offset + (config.get("misskey_limit") - len(fetched)) + else: + # DEBUG: print("DEBUG: Raising offset by step:", step) + offset = offset + step + + # Check records + # DEBUG: print(f"DEBUG: fetched({len(fetched)})[]={type(fetched)}") + if isinstance(fetched, dict) and "error" in fetched and "message" in fetched["error"]: + print(f"WARNING: post_json_api() returned error: {fetched['error']['message']}") + fba.update_last_error(domain, fetched["error"]["message"]) + break + + already = 0 + for row in fetched: + # DEBUG: print(f"DEBUG: row():{len(row)}") + if not "host" in row: + print(f"WARNING: row()={len(row)} does not contain key 'host': {row},domain='{domain}'") + continue + elif type(row["host"]) != str: + print(f"WARNING: row[host][]={type(row['host'])} is not 'str'") + continue + elif is_blacklisted(row["host"]): + # DEBUG: print(f"DEBUG: row[host]='{row['host']}' is blacklisted. domain='{domain}'") + continue + elif row["host"] in peers: + # DEBUG: print(f"DEBUG: Not adding row[host]='{row['host']}', already found.") + already = already + 1 + continue + + # DEBUG: print(f"DEBUG: Adding peer: '{row['host']}'") + peers.append(row["host"]) + + if already == len(fetched): + print(f"WARNING: Host returned same set of '{already}' instances, aborting loop!") + break + + # DEBUG: print(f"DEBUG: Adding '{len(peers)}' for domain='{domain}'") + instances.set("total_peers", domain, len(peers)) + + # DEBUG: print(f"DEBUG: Updating last_instance_fetch for domain='{domain}' ...") + fba.update_last_instance_fetch(domain) + + # DEBUG: print("DEBUG: Returning peers[]:", type(peers)) + return peers diff --git a/fba/network/peertube.py b/fba/network/peertube.py new file mode 100644 index 0000000..1906abc --- /dev/null +++ b/fba/network/peertube.py @@ -0,0 +1,68 @@ +# Fedi API Block - An aggregator for fetching blocking data from fediverse nodes +# Copyright (C) 2023 Free Software Foundation +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published +# by the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +from fba import config +from fba import fba +from fba import instances + +def get_peers(domain: str) -> list: + # DEBUG: print(f"DEBUG: domain({len(domain)})={domain},software={software} - CALLED!") + if type(domain) != str: + raise ValueError(f"Parameter domain[]={type(domain)} is not 'str'") + elif domain == "": + raise ValueError(f"Parameter 'domain' cannot be empty") + + # DEBUG: print(f"DEBUG: domain='{domain}' is a PeerTube, fetching JSON ...") + peers = list() + start = 0 + for mode in ["followers", "following"]: + # DEBUG: print(f"DEBUG: domain='{domain}',mode='{mode}'") + while True: + try: + response = fba.get_response(domain, "/api/v1/server/{mode}?start={start}&count=100", headers, (config.get("connection_timeout"), config.get("read_timeout"))) + + data = fba.json_from_response(response) + # DEBUG: print(f"DEBUG: response.ok={response.ok},response.status_code='{response.status_code}',data[]='{type(data)}'") + if response.ok and isinstance(data, dict): + # DEBUG: print("DEBUG: Success, data:", len(data)) + if "data" in data: + # DEBUG: print(f"DEBUG: Found {len(data['data'])} record(s).") + for record in data["data"]: + # DEBUG: print(f"DEBUG: record()={len(record)}") + if mode in record and "host" in record[mode]: + # DEBUG: print(f"DEBUG: Found host={record[mode]['host']}, adding ...") + peers.append(record[mode]["host"]) + else: + print(f"WARNING: record from '{domain}' has no '{mode}' or 'host' record: {record}") + + if len(data["data"]) < 100: + # DEBUG: print("DEBUG: Reached end of JSON response:", domain) + break + + # Continue with next row + start = start + 100 + + except BaseException as e: + print(f"WARNING: Exception during fetching JSON: domain='{domain}',exception[{type(e)}]:'{str(e)}'") + + # DEBUG: print(f"DEBUG: Adding '{len(peers)}' for domain='{domain}'") + instances.set("total_peers", domain, len(peers)) + + # DEBUG: print(f"DEBUG: Updating last_instance_fetch for domain='{domain}' ...") + update_last_instance_fetch(domain) + + # DEBUG: print("DEBUG: Returning peers[]:", type(peers)) + return peers diff --git a/fba/network/pleroma.py b/fba/network/pleroma.py new file mode 100644 index 0000000..5cfc5ba --- /dev/null +++ b/fba/network/pleroma.py @@ -0,0 +1,203 @@ +# Fedi API Block - An aggregator for fetching blocking data from fediverse nodes +# Copyright (C) 2023 Free Software Foundation +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published +# by the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +import inspect +import validators + +from fba import fba + +def fetch_blocks(domain: str, software: str, origin: str, nodeinfo_url: str): + print(f"DEBUG: domain='{domain}',software='{software}',origin='{origin}',nodeinfo_url='{nodeinfo_url}' - CALLED!") + if type(domain) != str: + raise ValueError(f"Parameter domain[]={type(domain)} is not 'str'") + elif domain == "": + raise ValueError(f"Parameter 'domain' cannot be empty") + elif type(software) != str: + raise ValueError(f"Parameter software[]={type(software)} is not 'str'") + elif software == "": + raise ValueError(f"Parameter 'software' cannot be empty") + elif type(origin) != str and origin != None: + raise ValueError(f"Parameter origin[]={type(origin)} is not 'str'") + elif origin == "": + raise ValueError(f"Parameter 'origin' cannot be empty") + elif type(nodeinfo_url) != str: + raise ValueError(f"Parameter nodeinfo_url[]={type(nodeinfo_url)} is not 'str'") + elif nodeinfo_url == "": + raise ValueError(f"Parameter 'nodeinfo_url' cannot be empty") + + try: + # Blocks + blockdict = list() + json = fba.fetch_nodeinfo(domain, nodeinfo_url) + + if json is None: + print("WARNING: Could not fetch nodeinfo from domain:", domain) + return + elif not "metadata" in json: + print(f"WARNING: json()={len(json)} does not have key 'metadata', domain='{domain}'") + return + elif not "federation" in json["metadata"]: + print(f"WARNING: json()={len(json['metadata'])} does not have key 'federation', domain='{domain}'") + return + + # DEBUG: print("DEBUG: Updating nodeinfo:", domain) + fba.update_last_nodeinfo(domain) + + federation = json["metadata"]["federation"] + + if "enabled" in federation: + # DEBUG: print("DEBUG: Instance has no block list to analyze:", domain) + return + + if "mrf_simple" in federation: + for block_level, blocks in ( + {**federation["mrf_simple"], + **{"quarantined_instances": federation["quarantined_instances"]}} + ).items(): + # DEBUG: print("DEBUG: block_level, blocks():", block_level, len(blocks)) + block_level = fba.tidyup_domain(block_level) + # DEBUG: print("DEBUG: BEFORE block_level:", block_level) + + if block_level == "": + print("WARNING: block_level is now empty!") + continue + + # DEBUG: print(f"DEBUG: Checking {len(blocks)} entries from domain='{domain}',software='{software}',block_level='{block_level}' ...") + for blocked in blocks: + # DEBUG: print("DEBUG: BEFORE blocked:", blocked) + blocked = fba.tidyup_domain(blocked) + # DEBUG: print("DEBUG: AFTER blocked:", blocked) + + if blocked == "": + print("WARNING: blocked is empty after fba.tidyup_domain():", domain, block_level) + continue + elif fba.is_blacklisted(blocked): + # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - skipping!") + continue + elif blocked.count("*") > 1: + # -ACK!-oma also started obscuring domains without hash + fba.cursor.execute( + "SELECT domain, nodeinfo_url FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", [blocked.replace("*", "_")] + ) + searchres = fba.cursor.fetchone() + # DEBUG: print("DEBUG: searchres[]:", type(searchres)) + + if searchres == None: + print(f"WARNING: Cannot deobsfucate blocked='{blocked}' - SKIPPED!") + continue + + blocked = searchres[0] + nodeinfo_url = searchres[1] + # DEBUG: print("DEBUG: Looked up domain:", blocked) + elif not validators.domain(blocked): + print(f"WARNING: blocked='{blocked}',software='{software}' is not a valid domain name - skipped!") + continue + + # DEBUG: print("DEBUG: Looking up instance by domain:", blocked) + if not validators.domain(blocked): + print(f"WARNING: blocked='{blocked}',software='{software}' is not a valid domain name - skipped!") + continue + elif not fba.is_instance_registered(blocked): + # DEBUG: print(f"DEBUG: Domain blocked='{blocked}' wasn't found, adding ..., domain='{domain}',origin='{origin}',nodeinfo_url='{nodeinfo_url}'") + fba.add_instance(blocked, domain, inspect.currentframe().f_code.co_name, nodeinfo_url) + + if not fba.is_instance_blocked(domain, blocked, block_level): + # DEBUG: print("DEBUG: Blocking:", domain, blocked, block_level) + fba.block_instance(domain, blocked, "unknown", block_level) + + if block_level == "reject": + # DEBUG: print("DEBUG: Adding to blockdict:", blocked) + blockdict.append( + { + "blocked": blocked, + "reason" : None + }) + else: + # DEBUG: print(f"DEBUG: Updating block last seen for domain='{domain}',blocked='{blocked}' ...") + fba.update_last_seen(domain, blocked, block_level) + + # DEBUG: print("DEBUG: Committing changes ...") + fba.connection.commit() + + # Reasons + if "mrf_simple_info" in federation: + # DEBUG: print("DEBUG: Found mrf_simple_info:", domain) + for block_level, info in ( + {**federation["mrf_simple_info"], + **(federation["quarantined_instances_info"] + if "quarantined_instances_info" in federation + else {})} + ).items(): + # DEBUG: print("DEBUG: block_level, info.items():", block_level, len(info.items())) + block_level = fba.tidyup_domain(block_level) + # DEBUG: print("DEBUG: BEFORE block_level:", block_level) + + if block_level == "": + print("WARNING: block_level is now empty!") + continue + + # DEBUG: print(f"DEBUG: Checking {len(info.items())} entries from domain='{domain}',software='{software}',block_level='{block_level}' ...") + for blocked, reason in info.items(): + # DEBUG: print("DEBUG: BEFORE blocked:", blocked) + blocked = fba.tidyup_domain(blocked) + # DEBUG: print("DEBUG: AFTER blocked:", blocked) + + if blocked == "": + print("WARNING: blocked is empty after fba.tidyup_domain():", domain, block_level) + continue + elif fba.is_blacklisted(blocked): + # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - skipping!") + continue + elif blocked.count("*") > 1: + # same domain guess as above, but for reasons field + fba.cursor.execute( + "SELECT domain, origin, nodeinfo_url FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", [blocked.replace("*", "_")] + ) + searchres = fba.cursor.fetchone() + + if searchres == None: + print(f"WARNING: Cannot deobsfucate blocked='{blocked}' - SKIPPED!") + continue + + blocked = searchres[0] + origin = searchres[1] + nodeinfo_url = searchres[2] + elif not validators.domain(blocked): + print(f"WARNING: blocked='{blocked}',software='{software}' is not a valid domain name - skipped!") + continue + + # DEBUG: print("DEBUG: Looking up instance by domain:", blocked) + if not validators.domain(blocked): + print(f"WARNING: blocked='{blocked}',software='{software}' is not a valid domain name - skipped!") + continue + elif not fba.is_instance_registered(blocked): + # DEBUG: print(f"DEBUG: Domain blocked='{blocked}' wasn't found, adding ..., domain='{domain}',origin='{origin}',nodeinfo_url='{nodeinfo_url}'") + fba.add_instance(blocked, domain, inspect.currentframe().f_code.co_name, nodeinfo_url) + + # DEBUG: print("DEBUG: Updating block reason:", domain, blocked, reason["reason"]) + fba.update_block_reason(reason["reason"], domain, blocked, block_level) + + # DEBUG: print(f"DEBUG: blockdict()={count(blockdict)") + for entry in blockdict: + if entry["blocked"] == blocked: + # DEBUG: print("DEBUG: Updating entry reason:", blocked) + entry["reason"] = reason["reason"] + + fba.connection.commit() + except Exception as e: + print(f"ERROR: domain='{domain}',software='{software}',exception[{type(e)}]:'{str(e)}'") + + # DEBUG: print("DEBUG: EXIT!") -- 2.39.5