X-Git-Url: https://git.mxchange.org/?a=blobdiff_plain;f=fetch_blocks.py;h=3338ab0cb31c4ab1832b3765e9146621928cee6c;hb=b9c85dd99459d91830fd7c065aabdb2984370d12;hp=f79a206d830214cec6f273f0b0d1716d99d7eacd;hpb=5bd02655d8ddb4c6281e2a7b9bee1b022e38fe5b;p=fba.git diff --git a/fetch_blocks.py b/fetch_blocks.py index f79a206..3338ab0 100644 --- a/fetch_blocks.py +++ b/fetch_blocks.py @@ -1,243 +1,22 @@ -from reqto import get -from reqto import post -from hashlib import sha256 -import sqlite3 -from bs4 import BeautifulSoup -from json import dumps -from json import loads -import re -from time import time +import time +import bs4 +import fba import itertools +import re -with open("config.json") as f: - config = loads(f.read()) - -headers = { - "user-agent": config["useragent"] -} - -def send_bot_post(instance: str, blocks: dict): - message = instance + " has blocked the following instances:\n\n" - truncated = False - if len(blocks) > 20: - truncated = True - blocks = blocks[0 : 19] - for block in blocks: - if block["reason"] == None or block["reason"] == '': - message = message + block["blocked"] + " with unspecified reason\n" - else: - message = message + block["blocked"] + ' for "' + block["reason"] + '"\n' - if truncated: - message = message + "(the list has been truncated to the first 20 entries)" - - botheaders = {**headers, **{"Authorization": "Bearer " + config["bot_token"]}} - req = post(f"{config['bot_instance']}/api/v1/statuses", - data={"status":message, "visibility":config['bot_visibility'], "content_type":"text/plain"}, - headers=botheaders, timeout=10).json() - print(req) - return True - -def get_mastodon_blocks(domain: str) -> dict: - blocks = { - "Suspended servers": [], - "Filtered media": [], - "Limited servers": [], - "Silenced servers": [], - } - - translations = { - "Silenced instances": "Silenced servers", - "Suspended instances": "Suspended servers", - "Gesperrte Server": "Suspended servers", - "Gefilterte Medien": "Filtered media", - "Stummgeschaltete Server": "Silenced servers", - "停止済みのサーバー": "Suspended servers", - "メディアを拒否しているサーバー": "Filtered media", - "サイレンス済みのサーバー": "Silenced servers", - "שרתים מושעים": "Suspended servers", - "מדיה מסוננת": "Filtered media", - "שרתים מוגבלים": "Silenced servers", - "Serveurs suspendus": "Suspended servers", - "Médias filtrés": "Filtered media", - "Serveurs limités": "Silenced servers", - } - - try: - doc = BeautifulSoup( - get(f"https://{domain}/about/more", headers=headers, timeout=5).text, - "html.parser", - ) - except: - return {} - - for header in doc.find_all("h3"): - header_text = header.text - if header_text in translations: - header_text = translations[header_text] - if header_text in blocks: - # replaced find_next_siblings with find_all_next to account for instances that e.g. hide lists in dropdown menu - for line in header.find_all_next("table")[0].find_all("tr")[1:]: - blocks[header_text].append( - { - "domain": line.find("span").text, - "hash": line.find("span")["title"][9:], - "reason": line.find_all("td")[1].text.strip(), - } - ) - return { - "reject": blocks["Suspended servers"], - "media_removal": blocks["Filtered media"], - "followers_only": blocks["Limited servers"] - + blocks["Silenced servers"], - } - -def get_friendica_blocks(domain: str) -> dict: - blocks = [] - - try: - doc = BeautifulSoup( - get(f"https://{domain}/friendica", headers=headers, timeout=5).text, - "html.parser", - ) - except: - return {} - - blocklist = doc.find(id="about_blocklist") - for line in blocklist.find("table").find_all("tr")[1:]: - blocks.append( - { - "domain": line.find_all("td")[0].text.strip(), - "reason": line.find_all("td")[1].text.strip() - } - ) - - return { - "reject": blocks - } - -def get_pisskey_blocks(domain: str) -> dict: - blocks = { - "suspended": [], - "blocked": [] - } - - try: - counter = 0 - step = 99 - while True: - # iterating through all "suspended" (follow-only in its terminology) instances page-by-page, since that troonware doesn't support sending them all at once - try: - if counter == 0: - doc = post(f"https://{domain}/api/federation/instances", data=dumps({"sort":"+caughtAt","host":None,"suspended":True,"limit":step}), headers=headers, timeout=5).json() - if doc == []: raise - else: - doc = post(f"https://{domain}/api/federation/instances", data=dumps({"sort":"+caughtAt","host":None,"suspended":True,"limit":step,"offset":counter-1}), headers=headers, timeout=5).json() - if doc == []: raise - for instance in doc: - # just in case - if instance["isSuspended"]: - blocks["suspended"].append( - { - "domain": instance["host"], - # no reason field, nothing - "reason": "" - } - ) - counter = counter + step - except: - counter = 0 - break - - while True: - # same shit, different asshole ("blocked" aka full suspend) - try: - if counter == 0: - doc = post(f"https://{domain}/api/federation/instances", data=dumps({"sort":"+caughtAt","host":None,"blocked":True,"limit":step}), headers=headers, timeout=5).json() - if doc == []: raise - else: - doc = post(f"https://{domain}/api/federation/instances", data=dumps({"sort":"+caughtAt","host":None,"blocked":True,"limit":step,"offset":counter-1}), headers=headers, timeout=5).json() - if doc == []: raise - for instance in doc: - if instance["isBlocked"]: - blocks["blocked"].append( - { - "domain": instance["host"], - "reason": "" - } - ) - counter = counter + step - except: - counter = 0 - break - - return { - "reject": blocks["blocked"], - "followers_only": blocks["suspended"] - } - - except: - return {} - -def get_hash(domain: str) -> str: - return sha256(domain.encode("utf-8")).hexdigest() - - -def get_type(domain: str) -> str: - try: - res = get(f"https://{domain}/nodeinfo/2.1.json", headers=headers, timeout=5) - if res.status_code == 404: - res = get(f"https://{domain}/nodeinfo/2.0", headers=headers, timeout=5) - if res.status_code == 404: - res = get(f"https://{domain}/nodeinfo/2.0.json", headers=headers, timeout=5) - if res.ok and "text/html" in res.headers["content-type"]: - res = get(f"https://{domain}/nodeinfo/2.1", headers=headers, timeout=5) - if res.ok: - if res.json()["software"]["name"] in ["akkoma", "rebased"]: - return "pleroma" - elif res.json()["software"]["name"] in ["hometown", "ecko"]: - return "mastodon" - elif res.json()["software"]["name"] in ["calckey", "groundpolis", "foundkey", "cherrypick"]: - return "misskey" - else: - return res.json()["software"]["name"] - elif res.status_code == 404: - res = get(f"https://{domain}/api/v1/instance", headers=headers, timeout=5) - if res.ok: - return "mastodon" - except: - return None - -def tidyup(domain: str) -> str: - # some retards put their blocks in variable case - domain = domain.lower() - # other retards put the port - domain = re.sub("\:\d+$", "", domain) - # bigger retards put the schema in their blocklist, sometimes even without slashes - domain = re.sub("^https?\:(\/*)", "", domain) - # and trailing slash - domain = re.sub("\/$", "", domain) - # and the @ - domain = re.sub("^\@", "", domain) - # the biggest retards of them all try to block individual users - domain = re.sub("(.+)\@", "", domain) - return domain - -conn = sqlite3.connect("blocks.db") -c = conn.cursor() - -c.execute( - #"select domain, software from instances where software in ('pleroma', 'mastodon', 'friendica', 'misskey', 'gotosocial')" - "select domain, software from instances where domain = 'mstdn.social'" +fba.c.execute( + "SELECT domain, software FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'gotosocial')" ) -for blocker, software in c.fetchall(): +for blocker, software in fba.c.fetchall(): + print("DEBUG: blocker,software:", blocker, software) blockdict = [] - blocker = tidyup(blocker) + blocker = fba.tidyup(blocker) if software == "pleroma": - print(blocker) + print("DEBUG: blocker:", blocker) try: # Blocks - federation = get( + federation = reqto.get( f"https://{blocker}/nodeinfo/2.1.json", headers=headers, timeout=5 ).json()["metadata"]["federation"] if "mrf_simple" in federation: @@ -246,36 +25,41 @@ for blocker, software in c.fetchall(): **{"quarantined_instances": federation["quarantined_instances"]}} ).items(): for blocked in blocks: - blocked = tidyup(blocked) + blocked = fba.tidyup(blocked) + if blocked == "": + print("WARNING: blocked is empty after fba.tidyup():", blocker, block_level) continue + if blocked.count("*") > 1: # -ACK!-oma also started obscuring domains without hash - c.execute( - "select domain from instances where domain like ? order by rowid limit 1", (blocked.replace("*", "_"),) + fba.c.execute( + "SELECT domain FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", (blocked.replace("*", "_"),) ) - searchres = c.fetchone() + searchres = fba.c.fetchone() if searchres != None: blocked = searchres[0] - c.execute( - "select domain from instances where domain = ?", (blocked,) + fba.c.execute( + "SELECT domain FROM instances WHERE domain = ?", (blocked) ) - if c.fetchone() == None: - c.execute( - "insert into instances select ?, ?, ?", - (blocked, get_hash(blocked), get_type(blocked)), - ) - timestamp = int(time()) - c.execute( - "select * from blocks where blocker = ? and blocked = ? and block_level = ?", - (blocker, blocked, block_level), + + if fba.c.fetchone() == None: + print("DEBUG: Hash wasn't found, adding:", blocked) + fba.add_instance(blocked) + + timestamp = int(time.time()) + fba.c.execute( + "SELECT * FROM blocks WHERE blocker = ? AND blocked = ? AND block_level = ?", + ( + blocker, + blocked, + block_level + ), ) - if c.fetchone() == None: - c.execute( - "insert into blocks select ?, ?, '', ?, ?, ?", - (blocker, blocked, block_level, timestamp, timestamp), - ) + if fba.c.fetchone() == None: + fba.block_instance(blocker, blocked, reason, block_level, timestamp, timestamp) + if block_level == "reject": blockdict.append( { @@ -283,11 +67,9 @@ for blocker, software in c.fetchall(): "reason": None }) else: - c.execute( - "update blocks set last_seen = ? where blocker = ? and blocked = ? and block_level = ?", - (timestamp, blocker, blocked, block_level) - ) - conn.commit() + fba.update_last_seen(timestamp, blocker, blocked, block_level) + + fba.conn.commit() # Reasons if "mrf_simple_info" in federation: for block_level, info in ( @@ -297,30 +79,33 @@ for blocker, software in c.fetchall(): else {})} ).items(): for blocked, reason in info.items(): - blocked = tidyup(blocked) + blocked = fba.tidyup(blocked) + if blocked == "": + print("WARNING: blocked is empty after fba.tidyup():", blocker, block_level) continue + if blocked.count("*") > 1: # same domain guess as above, but for reasons field - c.execute( - "select domain from instances where domain like ? order by rowid limit 1", (blocked.replace("*", "_"),) + fba.c.execute( + "SELECT domain FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", (blocked.replace("*", "_"),) ) - searchres = c.fetchone() + searchres = fba.c.fetchone() + if searchres != None: blocked = searchres[0] - c.execute( - "update blocks set reason = ? where blocker = ? and blocked = ? and block_level = ? and reason = ''", - (reason["reason"], blocker, blocked, block_level), - ) + + fba.update_block_reason(reason["reason"], blocker, blocked, block_level) + for entry in blockdict: if entry["blocked"] == blocked: entry["reason"] = reason["reason"] - conn.commit() + fba.conn.commit() except Exception as e: - print("error:", e, blocker) + print("error:", e, blocker, software) elif software == "mastodon": - print(blocker) + print("DEBUG: blocker:", blocker) try: # json endpoint for newer mastodongs try: @@ -332,8 +117,8 @@ for blocker, software in c.fetchall(): } # handling CSRF, I've saw at least one server requiring it to access the endpoint - meta = BeautifulSoup( - get(f"https://{blocker}/about", headers=headers, timeout=5).text, + meta = bs4.BeautifulSoup( + reqto.get(f"https://{blocker}/about", headers=headers, timeout=5).text, "html.parser", ) try: @@ -342,11 +127,15 @@ for blocker, software in c.fetchall(): except: reqheaders = headers - blocks = get( + blocks = reqto.get( f"https://{blocker}/api/v1/instance/domain_blocks", headers=reqheaders, timeout=5 ).json() + + print("DEBUG: blocks():", len(blocks)) for block in blocks: entry = {'domain': block['domain'], 'hash': block['digest'], 'reason': block['comment']} + + print("DEBUG: severity,domain,hash,comment:", block['severity'], block['domain'], block['digest'], block['comment']) if block['severity'] == 'suspend': json['reject'].append(entry) elif block['severity'] == 'silence': @@ -355,48 +144,52 @@ for blocker, software in c.fetchall(): json['media_removal'].append(entry) elif block['severity'] == 'reject_reports': json['report_removal'].append(entry) + else: + print("WARNING: Unknown severity:", block['severity'], block['domain']) except: - json = get_mastodon_blocks(blocker) + json = fba.get_mastodon_blocks(blocker) for block_level, blocks in json.items(): for instance in blocks: blocked, blocked_hash, reason = instance.values() - blocked = tidyup(blocked) - if blocked.count("*") <= 1: - c.execute( - "select hash from instances where hash = ?", (blocked_hash,) + print("DEBUG: blocked,hash,reason:", blocked, blocked_hash, reason) + + blocked = fba.tidyup(blocked) + print("DEBUG: blocked:", blocked) + + if blocked.count("*") < 1: + # No obsfucation for this instance + fba.c.execute( + "SELECT hash FROM instances WHERE domain = ? LIMIT 1", (blocked,) ) - if c.fetchone() == None: - c.execute( - "insert into instances select ?, ?, ?", - (blocked, get_hash(blocked), get_type(blocked)), - ) + + if fba.c.fetchone() == None: + print("DEBUG: Hash wasn't found, adding:", blocked) + fba.add_instance(blocked) else: # Doing the hash search for instance names as well to tidy up DB - c.execute( - "select domain from instances where hash = ?", (blocked_hash,) + fba.c.execute( + "SELECT domain FROM instances WHERE hash = ? LIMIT 1", (blocked_hash,) ) - searchres = c.fetchone() + searchres = fba.c.fetchone() + if searchres != None: + print("DEBUG: Updating domain: ", searchres[0]) blocked = searchres[0] - timestamp = int(time()) - c.execute( - "select * from blocks where blocker = ? and blocked = ? and block_level = ?", - (blocker, blocked if blocked.count("*") <= 1 else blocked_hash, block_level), + timestamp = int(time.time()) + fba.c.execute( + "SELECT * FROM blocks WHERE blocker = ? AND blocked = ? AND block_level = ?", + ( + blocker, + blocked if blocked.count("*") <= 1 else blocked_hash, + block_level + ), ) - if c.fetchone() == None: - c.execute( - "insert into blocks select ?, ?, ?, ?, ?, ?", - ( - blocker, - blocked if blocked.count("*") <= 1 else blocked_hash, - reason, - block_level, - timestamp, - timestamp, - ), - ) + + if fba.c.fetchone() == None: + fba.block_instance(blocker, blocked if blocked.count("*") <= 1 else blocked_hash, reason, block_level, timestamp, timestamp) + if block_level == "reject": blockdict.append( { @@ -404,74 +197,62 @@ for blocker, software in c.fetchall(): "reason": reason }) else: - c.execute( - "update blocks set last_seen = ? where blocker = ? and blocked = ? and block_level = ?", - (timestamp, blocker, blocked if blocked.count("*") <= 1 else blocked_hash, block_level), - ) + fba.update_last_seen(timestamp, blocker, blocked if blocked.count("*") <= 1 else blocked_hash, block_level) + if reason != '': - c.execute( - "update blocks set reason = ? where blocker = ? and blocked = ? and block_level = ? and reason = ''", - (reason, blocker, blocked if blocked.count("*") <= 1 else blocked_hash, block_level), - ) - conn.commit() + fba.update_block_reason(reason, blocker, blocked if blocked.count("*") <= 1 else blocked_hash, block_level) + + fba.conn.commit() except Exception as e: - print("error:", e, blocker) + print("error:", e, blocker, software) elif software == "friendica" or software == "misskey": - print(blocker) + print("DEBUG: blocker:", blocker) try: if software == "friendica": - json = get_friendica_blocks(blocker) + json = fba.get_friendica_blocks(blocker) elif software == "misskey": - json = get_pisskey_blocks(blocker) + json = fba.get_misskey_blocks(blocker) for block_level, blocks in json.items(): for instance in blocks: blocked, reason = instance.values() - blocked = tidyup(blocked) + blocked = fba.tidyup(blocked) + print("BEFORE-blocked:", blocked) if blocked.count("*") > 0: # Some friendica servers also obscure domains without hash - c.execute( - "select domain from instances where domain like ? order by rowid limit 1", (blocked.replace("*", "_"),) + fba.c.execute( + "SELECT domain FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", (blocked.replace("*", "_"),) ) - searchres = c.fetchone() + searchres = fba.c.fetchone() if searchres != None: blocked = searchres[0] if blocked.count("?") > 0: # Some obscure them with question marks, not sure if that's dependent on version or not - c.execute( - "select domain from instances where domain like ? order by rowid limit 1", (blocked.replace("?", "_"),) + fba.c.execute( + "SELECT domain FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", (blocked.replace("?", "_"),) ) - searchres = c.fetchone() + searchres = fba.c.fetchone() if searchres != None: blocked = searchres[0] - c.execute( - "select domain from instances where domain = ?", (blocked,) + print("AFTER-blocked:", blocked) + fba.c.execute( + "SELECT domain FROM instances WHERE domain = ?", (blocked,) ) - if c.fetchone() == None: - c.execute( - "insert into instances select ?, ?, ?", - (blocked, get_hash(blocked), get_type(blocked)), - ) - timestamp = int(time()) - c.execute( - "select * from blocks where blocker = ? and blocked = ?", + if fba.c.fetchone() == None: + print("DEBUG: Hash wasn't found, adding:", blocked) + fba.add_instance(blocked) + + timestamp = int(time.time()) + fba.c.execute( + "SELECT * FROM blocks WHERE blocker = ? AND blocked = ?", (blocker, blocked), ) - if c.fetchone() == None: - c.execute( - "insert into blocks select ?, ?, ?, ?, ?, ?", - ( - blocker, - blocked, - reason, - block_level, - timestamp, - timestamp - ), - ) + if fba.c.fetchone() == None: + fba.block_instance(blocker, blocked, reason, block_level, timestamp, timestamp) + if block_level == "reject": blockdict.append( { @@ -479,80 +260,83 @@ for blocker, software in c.fetchall(): "reason": reason }) else: - c.execute( - "update blocks set last_seen = ? where blocker = ? and blocked = ? and block_level = ?", - (timestamp, blocker, blocked, block_level), - ) + fba.update_last_seen(timestamp, blocker, blocked, block_level) + if reason != '': - c.execute( - "update blocks set reason = ? where blocker = ? and blocked = ? and block_level = ? and reason = ''", - (reason, blocker, blocked, block_level), - ) - conn.commit() + fba.update_block_reason(reason, blocker, blocked, block_level) + + fba.conn.commit() except Exception as e: - print("error:", e, blocker) + print("error:", e, blocker, software) elif software == "gotosocial": - print(blocker) + print("DEBUG: blocker:", blocker) try: # Blocks - federation = get( + federation = reqto.get( f"https://{blocker}/api/v1/instance/peers?filter=suspended", headers=headers, timeout=5 ).json() - for peer in federation: - blocked = peer["domain"].lower() - if blocked.count("*") > 0: - # GTS does not have hashes for obscured domains, so we have to guess it - c.execute( - "select domain from instances where domain like ? order by rowid limit 1", (blocked.replace("*", "_"),) - ) - searchres = c.fetchone() - if searchres != None: - blocked = searchres[0] + if (federation == None): + print("WARNING: No valid response:", blocker); + else: + for peer in federation: + blocked = peer["domain"].lower() - c.execute( - "select domain from instances where domain = ?", (blocked,) - ) - if c.fetchone() == None: - c.execute( - "insert into instances select ?, ?, ?", - (blocked, get_hash(blocked), get_type(blocked)), - ) - c.execute( - "select * from blocks where blocker = ? and blocked = ? and block_level = ?", - (blocker, blocked, "reject"), - ) - timestamp = int(time()) - if c.fetchone() == None: - c.execute( - "insert into blocks select ?, ?, ?, ?, ?, ?", - (blocker, blocked, "", "reject", timestamp, timestamp), - ) - blockdict.append( - { - "blocked": blocked, - "reason": None - }) - else: - c.execute( - "update blocks set last_seen = ? where blocker = ? and blocked = ? and block_level = ?", - (timestamp, blocker, blocked, "reject"), + if blocked.count("*") > 0: + # GTS does not have hashes for obscured domains, so we have to guess it + fba.c.execute( + "SELECT domain FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", (blocked.replace("*", "_"),) + ) + searchres = fba.c.fetchone() + + if searchres != None: + blocked = searchres[0] + + fba.c.execute( + "SELECT domain FROM instances WHERE domain = ?", (blocked,) ) - if "public_comment" in peer: - reason = peer["public_comment"] - c.execute( - "update blocks set reason = ? where blocker = ? and blocked = ? and block_level = ? and reason = ''", - (reason, blocker, blocked, "reject"), + + if fba.c.fetchone() == None: + print("DEBUG: Hash wasn't found, adding:", blocked) + fba.add_instance(blocked) + + fba.c.execute( + "SELECT * FROM blocks WHERE blocker = ? AND blocked = ? AND block_level = ?", + ( + blocker, + blocked, + "reject" + ), ) - for entry in blockdict: - if entry["blocked"] == blocked: - entry["reason"] = reason - conn.commit() + timestamp = int(time.time()) + + if fba.c.fetchone() == None: + fba.block_instance(blocker, blocked, "", "reject", timestamp, timestamp) + + blockdict.append( + { + "blocked": blocked, + "reason": None + }) + else: + fba.update_last_seen(timestamp, blocker, blocked, "reject") + + if "public_comment" in peer: + reason = peer["public_comment"] + fba.update_block_reason(reason, blocker, blocked, "reject") + + for entry in blockdict: + if entry["blocked"] == blocked: + entry["reason"] = reason + fba.conn.commit() except Exception as e: - print("error:", e, blocker) + print("error:", e, blocker, software) + else: + print("WARNING: Unknown software:", software) - if config["bot_enabled"] and len(blockdict) > 0: + if fba.config["bot_enabled"] and len(blockdict) > 0: send_bot_post(blocker, blockdict) + blockdict = [] -conn.close() +fba.conn.close()