X-Git-Url: https://git.mxchange.org/?a=blobdiff_plain;f=fetch_blocks.py;h=f79a206d830214cec6f273f0b0d1716d99d7eacd;hb=5bd02655d8ddb4c6281e2a7b9bee1b022e38fe5b;hp=187d23ece2441b7a9a51bdd67577d90f64defba4;hpb=65ef38213b1b0bacb393b01ddce7bda2358a2d15;p=fba.git diff --git a/fetch_blocks.py b/fetch_blocks.py index 187d23e..f79a206 100644 --- a/fetch_blocks.py +++ b/fetch_blocks.py @@ -1,12 +1,42 @@ -from requests import get +from reqto import get +from reqto import post from hashlib import sha256 import sqlite3 from bs4 import BeautifulSoup +from json import dumps +from json import loads +import re +from time import time +import itertools + +with open("config.json") as f: + config = loads(f.read()) headers = { - "user-agent": "fedi-block-api (https://gitlab.com/EnjuAihara/fedi-block-api)" + "user-agent": config["useragent"] } +def send_bot_post(instance: str, blocks: dict): + message = instance + " has blocked the following instances:\n\n" + truncated = False + if len(blocks) > 20: + truncated = True + blocks = blocks[0 : 19] + for block in blocks: + if block["reason"] == None or block["reason"] == '': + message = message + block["blocked"] + " with unspecified reason\n" + else: + message = message + block["blocked"] + ' for "' + block["reason"] + '"\n' + if truncated: + message = message + "(the list has been truncated to the first 20 entries)" + + botheaders = {**headers, **{"Authorization": "Bearer " + config["bot_token"]}} + req = post(f"{config['bot_instance']}/api/v1/statuses", + data={"status":message, "visibility":config['bot_visibility'], "content_type":"text/plain"}, + headers=botheaders, timeout=10).json() + print(req) + return True + def get_mastodon_blocks(domain: str) -> dict: blocks = { "Suspended servers": [], @@ -15,6 +45,23 @@ def get_mastodon_blocks(domain: str) -> dict: "Silenced servers": [], } + translations = { + "Silenced instances": "Silenced servers", + "Suspended instances": "Suspended servers", + "Gesperrte Server": "Suspended servers", + "Gefilterte Medien": "Filtered media", + "Stummgeschaltete Server": "Silenced servers", + "停止済みのサーバー": "Suspended servers", + "メディアを拒否しているサーバー": "Filtered media", + "サイレンス済みのサーバー": "Silenced servers", + "שרתים מושעים": "Suspended servers", + "מדיה מסוננת": "Filtered media", + "שרתים מוגבלים": "Silenced servers", + "Serveurs suspendus": "Suspended servers", + "Médias filtrés": "Filtered media", + "Serveurs limités": "Silenced servers", + } + try: doc = BeautifulSoup( get(f"https://{domain}/about/more", headers=headers, timeout=5).text, @@ -22,11 +69,15 @@ def get_mastodon_blocks(domain: str) -> dict: ) except: return {} - + for header in doc.find_all("h3"): - for line in header.find_next_siblings("table")[0].find_all("tr")[1:]: - if header.text in blocks: - blocks[header.text].append( + header_text = header.text + if header_text in translations: + header_text = translations[header_text] + if header_text in blocks: + # replaced find_next_siblings with find_all_next to account for instances that e.g. hide lists in dropdown menu + for line in header.find_all_next("table")[0].find_all("tr")[1:]: + blocks[header_text].append( { "domain": line.find("span").text, "hash": line.find("span")["title"][9:], @@ -36,19 +87,119 @@ def get_mastodon_blocks(domain: str) -> dict: return { "reject": blocks["Suspended servers"], "media_removal": blocks["Filtered media"], - "federated_timeline_removal": blocks["Limited servers"] + blocks["Silenced servers"], + "followers_only": blocks["Limited servers"] + + blocks["Silenced servers"], } +def get_friendica_blocks(domain: str) -> dict: + blocks = [] + + try: + doc = BeautifulSoup( + get(f"https://{domain}/friendica", headers=headers, timeout=5).text, + "html.parser", + ) + except: + return {} + + blocklist = doc.find(id="about_blocklist") + for line in blocklist.find("table").find_all("tr")[1:]: + blocks.append( + { + "domain": line.find_all("td")[0].text.strip(), + "reason": line.find_all("td")[1].text.strip() + } + ) + + return { + "reject": blocks + } + +def get_pisskey_blocks(domain: str) -> dict: + blocks = { + "suspended": [], + "blocked": [] + } + + try: + counter = 0 + step = 99 + while True: + # iterating through all "suspended" (follow-only in its terminology) instances page-by-page, since that troonware doesn't support sending them all at once + try: + if counter == 0: + doc = post(f"https://{domain}/api/federation/instances", data=dumps({"sort":"+caughtAt","host":None,"suspended":True,"limit":step}), headers=headers, timeout=5).json() + if doc == []: raise + else: + doc = post(f"https://{domain}/api/federation/instances", data=dumps({"sort":"+caughtAt","host":None,"suspended":True,"limit":step,"offset":counter-1}), headers=headers, timeout=5).json() + if doc == []: raise + for instance in doc: + # just in case + if instance["isSuspended"]: + blocks["suspended"].append( + { + "domain": instance["host"], + # no reason field, nothing + "reason": "" + } + ) + counter = counter + step + except: + counter = 0 + break + + while True: + # same shit, different asshole ("blocked" aka full suspend) + try: + if counter == 0: + doc = post(f"https://{domain}/api/federation/instances", data=dumps({"sort":"+caughtAt","host":None,"blocked":True,"limit":step}), headers=headers, timeout=5).json() + if doc == []: raise + else: + doc = post(f"https://{domain}/api/federation/instances", data=dumps({"sort":"+caughtAt","host":None,"blocked":True,"limit":step,"offset":counter-1}), headers=headers, timeout=5).json() + if doc == []: raise + for instance in doc: + if instance["isBlocked"]: + blocks["blocked"].append( + { + "domain": instance["host"], + "reason": "" + } + ) + counter = counter + step + except: + counter = 0 + break + + return { + "reject": blocks["blocked"], + "followers_only": blocks["suspended"] + } + + except: + return {} + def get_hash(domain: str) -> str: return sha256(domain.encode("utf-8")).hexdigest() + def get_type(domain: str) -> str: try: res = get(f"https://{domain}/nodeinfo/2.1.json", headers=headers, timeout=5) + if res.status_code == 404: + res = get(f"https://{domain}/nodeinfo/2.0", headers=headers, timeout=5) if res.status_code == 404: res = get(f"https://{domain}/nodeinfo/2.0.json", headers=headers, timeout=5) + if res.ok and "text/html" in res.headers["content-type"]: + res = get(f"https://{domain}/nodeinfo/2.1", headers=headers, timeout=5) if res.ok: - return res.json()["software"]["name"] + if res.json()["software"]["name"] in ["akkoma", "rebased"]: + return "pleroma" + elif res.json()["software"]["name"] in ["hometown", "ecko"]: + return "mastodon" + elif res.json()["software"]["name"] in ["calckey", "groundpolis", "foundkey", "cherrypick"]: + return "misskey" + else: + return res.json()["software"]["name"] elif res.status_code == 404: res = get(f"https://{domain}/api/v1/instance", headers=headers, timeout=5) if res.ok: @@ -56,54 +207,352 @@ def get_type(domain: str) -> str: except: return None +def tidyup(domain: str) -> str: + # some retards put their blocks in variable case + domain = domain.lower() + # other retards put the port + domain = re.sub("\:\d+$", "", domain) + # bigger retards put the schema in their blocklist, sometimes even without slashes + domain = re.sub("^https?\:(\/*)", "", domain) + # and trailing slash + domain = re.sub("\/$", "", domain) + # and the @ + domain = re.sub("^\@", "", domain) + # the biggest retards of them all try to block individual users + domain = re.sub("(.+)\@", "", domain) + return domain + conn = sqlite3.connect("blocks.db") c = conn.cursor() -c.execute("select domain, software from instances where software in ('pleroma', 'mastodon')") +c.execute( + #"select domain, software from instances where software in ('pleroma', 'mastodon', 'friendica', 'misskey', 'gotosocial')" + "select domain, software from instances where domain = 'mstdn.social'" +) for blocker, software in c.fetchall(): + blockdict = [] + blocker = tidyup(blocker) if software == "pleroma": print(blocker) try: # Blocks - c.execute("delete from blocks where blocker = ?", (blocker,)) - federation = get(f"https://{blocker}/nodeinfo/2.1.json", headers=headers, timeout=5).json()["metadata"]["federation"] + federation = get( + f"https://{blocker}/nodeinfo/2.1.json", headers=headers, timeout=5 + ).json()["metadata"]["federation"] if "mrf_simple" in federation: - for block_level, blocks in (federation["mrf_simple"] | {"quarantined_instances": federation["quarantined_instances"]}).items(): + for block_level, blocks in ( + {**federation["mrf_simple"], + **{"quarantined_instances": federation["quarantined_instances"]}} + ).items(): for blocked in blocks: + blocked = tidyup(blocked) if blocked == "": continue - c.execute("select domain from instances where domain = ?", (blocked,)) + if blocked.count("*") > 1: + # -ACK!-oma also started obscuring domains without hash + c.execute( + "select domain from instances where domain like ? order by rowid limit 1", (blocked.replace("*", "_"),) + ) + searchres = c.fetchone() + if searchres != None: + blocked = searchres[0] + + c.execute( + "select domain from instances where domain = ?", (blocked,) + ) + if c.fetchone() == None: + c.execute( + "insert into instances select ?, ?, ?", + (blocked, get_hash(blocked), get_type(blocked)), + ) + timestamp = int(time()) + c.execute( + "select * from blocks where blocker = ? and blocked = ? and block_level = ?", + (blocker, blocked, block_level), + ) if c.fetchone() == None: - c.execute("insert into instances select ?, ?, ?", (blocked, get_hash(blocked), get_type(blocked))) - c.execute("insert into blocks select ?, ?, '', ?", (blocker, blocked, block_level)) + c.execute( + "insert into blocks select ?, ?, '', ?, ?, ?", + (blocker, blocked, block_level, timestamp, timestamp), + ) + if block_level == "reject": + blockdict.append( + { + "blocked": blocked, + "reason": None + }) + else: + c.execute( + "update blocks set last_seen = ? where blocker = ? and blocked = ? and block_level = ?", + (timestamp, blocker, blocked, block_level) + ) conn.commit() # Reasons if "mrf_simple_info" in federation: - for block_level, info in (federation["mrf_simple_info"] | federation["quarantined_instances_info"] if "quarantined_instances_info" in federation else {}).items(): + for block_level, info in ( + {**federation["mrf_simple_info"], + **(federation["quarantined_instances_info"] + if "quarantined_instances_info" in federation + else {})} + ).items(): for blocked, reason in info.items(): - c.execute("update blocks set reason = ? where blocker = ? and blocked = ? and block_level = ?", (reason["reason"], blocker, blocked, block_level)) + blocked = tidyup(blocked) + if blocked == "": + continue + if blocked.count("*") > 1: + # same domain guess as above, but for reasons field + c.execute( + "select domain from instances where domain like ? order by rowid limit 1", (blocked.replace("*", "_"),) + ) + searchres = c.fetchone() + if searchres != None: + blocked = searchres[0] + c.execute( + "update blocks set reason = ? where blocker = ? and blocked = ? and block_level = ? and reason = ''", + (reason["reason"], blocker, blocked, block_level), + ) + for entry in blockdict: + if entry["blocked"] == blocked: + entry["reason"] = reason["reason"] + conn.commit() except Exception as e: print("error:", e, blocker) elif software == "mastodon": print(blocker) try: - c.execute("delete from blocks where blocker = ?", (blocker,)) - json = get_mastodon_blocks(blocker) - for block_level in json: - for blocked in json[block_level]: - if blocked["domain"].count("*") > 1: - # instance is censored, check if domain of hash is known, if not, insert the hash - c.execute("insert into blocks select ?, ifnull((select domain from instances where hash = ?), ?), ?, ?", (blocker, blocked["hash"], blocked["hash"], blocked['reason'], block_level)) - else: - # instance is not censored - c.execute("select domain from instances where domain = ?", (blocked["domain"],)) + # json endpoint for newer mastodongs + try: + json = { + "reject": [], + "media_removal": [], + "followers_only": [], + "report_removal": [] + } + + # handling CSRF, I've saw at least one server requiring it to access the endpoint + meta = BeautifulSoup( + get(f"https://{blocker}/about", headers=headers, timeout=5).text, + "html.parser", + ) + try: + csrf = meta.find("meta", attrs={"name": "csrf-token"})["content"] + reqheaders = {**headers, **{"x-csrf-token": csrf}} + except: + reqheaders = headers + + blocks = get( + f"https://{blocker}/api/v1/instance/domain_blocks", headers=reqheaders, timeout=5 + ).json() + for block in blocks: + entry = {'domain': block['domain'], 'hash': block['digest'], 'reason': block['comment']} + if block['severity'] == 'suspend': + json['reject'].append(entry) + elif block['severity'] == 'silence': + json['followers_only'].append(entry) + elif block['severity'] == 'reject_media': + json['media_removal'].append(entry) + elif block['severity'] == 'reject_reports': + json['report_removal'].append(entry) + except: + json = get_mastodon_blocks(blocker) + + for block_level, blocks in json.items(): + for instance in blocks: + blocked, blocked_hash, reason = instance.values() + blocked = tidyup(blocked) + if blocked.count("*") <= 1: + c.execute( + "select hash from instances where hash = ?", (blocked_hash,) + ) if c.fetchone() == None: - # if instance not known, add it - c.execute("insert into instances select ?, ?, ?", (blocked["domain"], get_hash(blocked["domain"]), get_type(blocked["domain"]))) - c.execute("insert into blocks select ?, ?, ?, ?", (blocker, blocked["domain"], blocked["reason"], block_level)) + c.execute( + "insert into instances select ?, ?, ?", + (blocked, get_hash(blocked), get_type(blocked)), + ) + else: + # Doing the hash search for instance names as well to tidy up DB + c.execute( + "select domain from instances where hash = ?", (blocked_hash,) + ) + searchres = c.fetchone() + if searchres != None: + blocked = searchres[0] + + timestamp = int(time()) + c.execute( + "select * from blocks where blocker = ? and blocked = ? and block_level = ?", + (blocker, blocked if blocked.count("*") <= 1 else blocked_hash, block_level), + ) + if c.fetchone() == None: + c.execute( + "insert into blocks select ?, ?, ?, ?, ?, ?", + ( + blocker, + blocked if blocked.count("*") <= 1 else blocked_hash, + reason, + block_level, + timestamp, + timestamp, + ), + ) + if block_level == "reject": + blockdict.append( + { + "blocked": blocked, + "reason": reason + }) + else: + c.execute( + "update blocks set last_seen = ? where blocker = ? and blocked = ? and block_level = ?", + (timestamp, blocker, blocked if blocked.count("*") <= 1 else blocked_hash, block_level), + ) + if reason != '': + c.execute( + "update blocks set reason = ? where blocker = ? and blocked = ? and block_level = ? and reason = ''", + (reason, blocker, blocked if blocked.count("*") <= 1 else blocked_hash, block_level), + ) conn.commit() except Exception as e: print("error:", e, blocker) + elif software == "friendica" or software == "misskey": + print(blocker) + try: + if software == "friendica": + json = get_friendica_blocks(blocker) + elif software == "misskey": + json = get_pisskey_blocks(blocker) + for block_level, blocks in json.items(): + for instance in blocks: + blocked, reason = instance.values() + blocked = tidyup(blocked) + + if blocked.count("*") > 0: + # Some friendica servers also obscure domains without hash + c.execute( + "select domain from instances where domain like ? order by rowid limit 1", (blocked.replace("*", "_"),) + ) + searchres = c.fetchone() + if searchres != None: + blocked = searchres[0] + + if blocked.count("?") > 0: + # Some obscure them with question marks, not sure if that's dependent on version or not + c.execute( + "select domain from instances where domain like ? order by rowid limit 1", (blocked.replace("?", "_"),) + ) + searchres = c.fetchone() + if searchres != None: + blocked = searchres[0] + + c.execute( + "select domain from instances where domain = ?", (blocked,) + ) + if c.fetchone() == None: + c.execute( + "insert into instances select ?, ?, ?", + (blocked, get_hash(blocked), get_type(blocked)), + ) + + timestamp = int(time()) + c.execute( + "select * from blocks where blocker = ? and blocked = ?", + (blocker, blocked), + ) + if c.fetchone() == None: + c.execute( + "insert into blocks select ?, ?, ?, ?, ?, ?", + ( + blocker, + blocked, + reason, + block_level, + timestamp, + timestamp + ), + ) + if block_level == "reject": + blockdict.append( + { + "blocked": blocked, + "reason": reason + }) + else: + c.execute( + "update blocks set last_seen = ? where blocker = ? and blocked = ? and block_level = ?", + (timestamp, blocker, blocked, block_level), + ) + if reason != '': + c.execute( + "update blocks set reason = ? where blocker = ? and blocked = ? and block_level = ? and reason = ''", + (reason, blocker, blocked, block_level), + ) + conn.commit() + except Exception as e: + print("error:", e, blocker) + elif software == "gotosocial": + print(blocker) + try: + # Blocks + federation = get( + f"https://{blocker}/api/v1/instance/peers?filter=suspended", headers=headers, timeout=5 + ).json() + for peer in federation: + blocked = peer["domain"].lower() + + if blocked.count("*") > 0: + # GTS does not have hashes for obscured domains, so we have to guess it + c.execute( + "select domain from instances where domain like ? order by rowid limit 1", (blocked.replace("*", "_"),) + ) + searchres = c.fetchone() + if searchres != None: + blocked = searchres[0] + + c.execute( + "select domain from instances where domain = ?", (blocked,) + ) + if c.fetchone() == None: + c.execute( + "insert into instances select ?, ?, ?", + (blocked, get_hash(blocked), get_type(blocked)), + ) + c.execute( + "select * from blocks where blocker = ? and blocked = ? and block_level = ?", + (blocker, blocked, "reject"), + ) + timestamp = int(time()) + if c.fetchone() == None: + c.execute( + "insert into blocks select ?, ?, ?, ?, ?, ?", + (blocker, blocked, "", "reject", timestamp, timestamp), + ) + blockdict.append( + { + "blocked": blocked, + "reason": None + }) + else: + c.execute( + "update blocks set last_seen = ? where blocker = ? and blocked = ? and block_level = ?", + (timestamp, blocker, blocked, "reject"), + ) + if "public_comment" in peer: + reason = peer["public_comment"] + c.execute( + "update blocks set reason = ? where blocker = ? and blocked = ? and block_level = ? and reason = ''", + (reason, blocker, blocked, "reject"), + ) + for entry in blockdict: + if entry["blocked"] == blocked: + entry["reason"] = reason + conn.commit() + except Exception as e: + print("error:", e, blocker) + + if config["bot_enabled"] and len(blockdict) > 0: + send_bot_post(blocker, blockdict) + blockdict = [] + conn.close()