X-Git-Url: https://git.mxchange.org/?a=blobdiff_plain;f=fetch_blocks.py;h=3338ab0cb31c4ab1832b3765e9146621928cee6c;hb=b9c85dd99459d91830fd7c065aabdb2984370d12;hp=3b1bd84e0af5f3694daccc028bb7ac6486451420;hpb=cd985a676a9bb715aa3ed20cef01767956ab7ea9;p=fba.git diff --git a/fetch_blocks.py b/fetch_blocks.py index 3b1bd84..3338ab0 100644 --- a/fetch_blocks.py +++ b/fetch_blocks.py @@ -1,70 +1,342 @@ -from requests import get -from json import loads -from hashlib import sha256 -import sqlite3 +import time +import bs4 +import fba +import itertools +import re -conn = sqlite3.connect("blocks.db") -c = conn.cursor() +fba.c.execute( + "SELECT domain, software FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'gotosocial')" +) -with open("pleroma_instances.txt", "r") as f: - while blocker := f.readline().strip(): - print(blocker) +for blocker, software in fba.c.fetchall(): + print("DEBUG: blocker,software:", blocker, software) + blockdict = [] + blocker = fba.tidyup(blocker) + if software == "pleroma": + print("DEBUG: blocker:", blocker) try: - c.execute("delete from blocks where blocker = ?", (blocker,)) - json = loads(get(f"https://{blocker}/nodeinfo/2.1.json").text) - for mrf in json["metadata"]["federation"]["mrf_simple"]: - for blocked in json["metadata"]["federation"]["mrf_simple"][mrf]: - c.execute("select case when ? in (select domain from instances) then 1 else 0 end", (blocked,)) - if c.fetchone() == (0,): - c.execute("insert into instances select ?, ?", (blocked, sha256(bytes(blocked, "utf-8")).hexdigest())) - c.execute("insert into blocks select ?, ?, '', ?", (blocker, blocked, mrf)) - for blocked in json["metadata"]["federation"]["quarantined_instances"]: - c.execute("select case when ? in (select domain from instances) then 1 else 0 end", (blocked,)) - if c.fetchone() == (0,): - c.execute("insert into instances select ?, ?", (blocked, sha256(bytes(blocked, "utf-8")).hexdigest())) - c.execute("insert into blocks select ?, ?, '', 'quarantined_instances'", (blocker, blocked)) - conn.commit() - for mrf in json["metadata"]["federation"]["mrf_simple_info"]: - for blocked in json["metadata"]["federation"]["mrf_simple_info"][mrf]: - c.execute("update blocks set reason = ? where blocker = ? and blocked = ? and block_level = ?", (json["metadata"]["federation"]["mrf_simple_info"][mrf][blocked]["reason"], blocker, blocked, mrf)) - for blocked in json["metadata"]["federation"]["quarantined_instances_info"]["quarantined_instances"]: - c.execute("update blocks set reason = ? where blocker = ? and blocked = ? and block_level = 'quarantined_instances'", (json["metadata"]["federation"]["quarantined_instances_info"]["quarantined_instances"][blocked]["reason"], blocker, blocked)) - conn.commit() - except: - pass - -with open("mastodon_instances.txt", "r") as f: - while blocker := f.readline().strip(): - print(blocker) + # Blocks + federation = reqto.get( + f"https://{blocker}/nodeinfo/2.1.json", headers=headers, timeout=5 + ).json()["metadata"]["federation"] + if "mrf_simple" in federation: + for block_level, blocks in ( + {**federation["mrf_simple"], + **{"quarantined_instances": federation["quarantined_instances"]}} + ).items(): + for blocked in blocks: + blocked = fba.tidyup(blocked) + + if blocked == "": + print("WARNING: blocked is empty after fba.tidyup():", blocker, block_level) + continue + + if blocked.count("*") > 1: + # -ACK!-oma also started obscuring domains without hash + fba.c.execute( + "SELECT domain FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", (blocked.replace("*", "_"),) + ) + searchres = fba.c.fetchone() + if searchres != None: + blocked = searchres[0] + + fba.c.execute( + "SELECT domain FROM instances WHERE domain = ?", (blocked) + ) + + if fba.c.fetchone() == None: + print("DEBUG: Hash wasn't found, adding:", blocked) + fba.add_instance(blocked) + + timestamp = int(time.time()) + fba.c.execute( + "SELECT * FROM blocks WHERE blocker = ? AND blocked = ? AND block_level = ?", + ( + blocker, + blocked, + block_level + ), + ) + if fba.c.fetchone() == None: + fba.block_instance(blocker, blocked, reason, block_level, timestamp, timestamp) + + if block_level == "reject": + blockdict.append( + { + "blocked": blocked, + "reason": None + }) + else: + fba.update_last_seen(timestamp, blocker, blocked, block_level) + + fba.conn.commit() + # Reasons + if "mrf_simple_info" in federation: + for block_level, info in ( + {**federation["mrf_simple_info"], + **(federation["quarantined_instances_info"] + if "quarantined_instances_info" in federation + else {})} + ).items(): + for blocked, reason in info.items(): + blocked = fba.tidyup(blocked) + + if blocked == "": + print("WARNING: blocked is empty after fba.tidyup():", blocker, block_level) + continue + + if blocked.count("*") > 1: + # same domain guess as above, but for reasons field + fba.c.execute( + "SELECT domain FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", (blocked.replace("*", "_"),) + ) + searchres = fba.c.fetchone() + + if searchres != None: + blocked = searchres[0] + + fba.update_block_reason(reason["reason"], blocker, blocked, block_level) + + for entry in blockdict: + if entry["blocked"] == blocked: + entry["reason"] = reason["reason"] + + fba.conn.commit() + except Exception as e: + print("error:", e, blocker, software) + elif software == "mastodon": + print("DEBUG: blocker:", blocker) + try: + # json endpoint for newer mastodongs + try: + json = { + "reject": [], + "media_removal": [], + "followers_only": [], + "report_removal": [] + } + + # handling CSRF, I've saw at least one server requiring it to access the endpoint + meta = bs4.BeautifulSoup( + reqto.get(f"https://{blocker}/about", headers=headers, timeout=5).text, + "html.parser", + ) + try: + csrf = meta.find("meta", attrs={"name": "csrf-token"})["content"] + reqheaders = {**headers, **{"x-csrf-token": csrf}} + except: + reqheaders = headers + + blocks = reqto.get( + f"https://{blocker}/api/v1/instance/domain_blocks", headers=reqheaders, timeout=5 + ).json() + + print("DEBUG: blocks():", len(blocks)) + for block in blocks: + entry = {'domain': block['domain'], 'hash': block['digest'], 'reason': block['comment']} + + print("DEBUG: severity,domain,hash,comment:", block['severity'], block['domain'], block['digest'], block['comment']) + if block['severity'] == 'suspend': + json['reject'].append(entry) + elif block['severity'] == 'silence': + json['followers_only'].append(entry) + elif block['severity'] == 'reject_media': + json['media_removal'].append(entry) + elif block['severity'] == 'reject_reports': + json['report_removal'].append(entry) + else: + print("WARNING: Unknown severity:", block['severity'], block['domain']) + except: + json = fba.get_mastodon_blocks(blocker) + + for block_level, blocks in json.items(): + for instance in blocks: + blocked, blocked_hash, reason = instance.values() + print("DEBUG: blocked,hash,reason:", blocked, blocked_hash, reason) + + blocked = fba.tidyup(blocked) + print("DEBUG: blocked:", blocked) + + if blocked.count("*") < 1: + # No obsfucation for this instance + fba.c.execute( + "SELECT hash FROM instances WHERE domain = ? LIMIT 1", (blocked,) + ) + + if fba.c.fetchone() == None: + print("DEBUG: Hash wasn't found, adding:", blocked) + fba.add_instance(blocked) + else: + # Doing the hash search for instance names as well to tidy up DB + fba.c.execute( + "SELECT domain FROM instances WHERE hash = ? LIMIT 1", (blocked_hash,) + ) + searchres = fba.c.fetchone() + + if searchres != None: + print("DEBUG: Updating domain: ", searchres[0]) + blocked = searchres[0] + + timestamp = int(time.time()) + fba.c.execute( + "SELECT * FROM blocks WHERE blocker = ? AND blocked = ? AND block_level = ?", + ( + blocker, + blocked if blocked.count("*") <= 1 else blocked_hash, + block_level + ), + ) + + if fba.c.fetchone() == None: + fba.block_instance(blocker, blocked if blocked.count("*") <= 1 else blocked_hash, reason, block_level, timestamp, timestamp) + + if block_level == "reject": + blockdict.append( + { + "blocked": blocked, + "reason": reason + }) + else: + fba.update_last_seen(timestamp, blocker, blocked if blocked.count("*") <= 1 else blocked_hash, block_level) + + if reason != '': + fba.update_block_reason(reason, blocker, blocked if blocked.count("*") <= 1 else blocked_hash, block_level) + + fba.conn.commit() + except Exception as e: + print("error:", e, blocker, software) + elif software == "friendica" or software == "misskey": + print("DEBUG: blocker:", blocker) try: - c.execute("delete from blocks where blocker = ?", (blocker,)) - json = loads(get(f"http://127.0.0.1:8069/{blocker}").text) - for blocked in json["reject"]: - if blocked["domain"].count("*") > 1: - c.execute("insert into blocks select ?, ifnull((select domain from instances where hash = ?), ?), ?, 'reject'", (blocker, blocked["hash"], blocked["hash"], blocked['reason'])) - else: - c.execute("select case when ? in (select domain from instances) then 1 else 0 end", (blocked["domain"],)) - if c.fetchone() == (0,): - c.execute("insert into instances select ?, ?", (blocked["domain"], sha256(bytes(blocked["domain"], "utf-8")).hexdigest())) - c.execute("insert into blocks select ?, ?, ?, 'reject'", (blocker, blocked["domain"], blocked["reason"])) - for blocked in json["media_removal"]: - if blocked["domain"].count("*") > 1: - c.execute("insert into blocks select ?, ifnull((select domain from instances where hash = ?), ?), ?, 'media_removal'", (blocker, blocked["hash"], blocked["hash"], blocked['reason'])) - else: - c.execute("select case when ? in (select domain from instances) then 1 else 0 end", (blocked["domain"],)) - if c.fetchone() == (0,): - c.execute("insert into instances select ?, ?", (blocked["domain"], sha256(bytes(blocked["domain"], "utf-8")).hexdigest())) - c.execute("insert into blocks select ?, ?, ?, 'media_removal'", (blocker, blocked["domain"], blocked["reason"])) - for blocked in json["federated_timeline_removal"]: - if blocked["domain"].count("*") > 1: - c.execute("insert into blocks select ?, ifnull((select domain from instances where hash = ?), ?), ?, 'federated_timeline_removal'", (blocker, blocked["hash"], blocked["hash"], blocked['reason'])) - else: - c.execute("select case when ? in (select domain from instances) then 1 else 0 end", (blocked["domain"],)) - if c.fetchone() == (0,): - c.execute("insert into instances select ?, ?", (blocked["domain"], sha256(bytes(blocked["domain"], "utf-8")).hexdigest())) - c.execute("insert into blocks select ?, ?, ?, 'federated_timeline_removal'", (blocker, blocked["domain"], blocked["reason"])) - conn.commit() - except: - pass - -conn.close() \ No newline at end of file + if software == "friendica": + json = fba.get_friendica_blocks(blocker) + elif software == "misskey": + json = fba.get_misskey_blocks(blocker) + for block_level, blocks in json.items(): + for instance in blocks: + blocked, reason = instance.values() + blocked = fba.tidyup(blocked) + + print("BEFORE-blocked:", blocked) + if blocked.count("*") > 0: + # Some friendica servers also obscure domains without hash + fba.c.execute( + "SELECT domain FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", (blocked.replace("*", "_"),) + ) + searchres = fba.c.fetchone() + if searchres != None: + blocked = searchres[0] + + if blocked.count("?") > 0: + # Some obscure them with question marks, not sure if that's dependent on version or not + fba.c.execute( + "SELECT domain FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", (blocked.replace("?", "_"),) + ) + searchres = fba.c.fetchone() + if searchres != None: + blocked = searchres[0] + + print("AFTER-blocked:", blocked) + fba.c.execute( + "SELECT domain FROM instances WHERE domain = ?", (blocked,) + ) + + if fba.c.fetchone() == None: + print("DEBUG: Hash wasn't found, adding:", blocked) + fba.add_instance(blocked) + + timestamp = int(time.time()) + fba.c.execute( + "SELECT * FROM blocks WHERE blocker = ? AND blocked = ?", + (blocker, blocked), + ) + if fba.c.fetchone() == None: + fba.block_instance(blocker, blocked, reason, block_level, timestamp, timestamp) + + if block_level == "reject": + blockdict.append( + { + "blocked": blocked, + "reason": reason + }) + else: + fba.update_last_seen(timestamp, blocker, blocked, block_level) + + if reason != '': + fba.update_block_reason(reason, blocker, blocked, block_level) + + fba.conn.commit() + except Exception as e: + print("error:", e, blocker, software) + elif software == "gotosocial": + print("DEBUG: blocker:", blocker) + try: + # Blocks + federation = reqto.get( + f"https://{blocker}/api/v1/instance/peers?filter=suspended", headers=headers, timeout=5 + ).json() + + if (federation == None): + print("WARNING: No valid response:", blocker); + else: + for peer in federation: + blocked = peer["domain"].lower() + + if blocked.count("*") > 0: + # GTS does not have hashes for obscured domains, so we have to guess it + fba.c.execute( + "SELECT domain FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", (blocked.replace("*", "_"),) + ) + searchres = fba.c.fetchone() + + if searchres != None: + blocked = searchres[0] + + fba.c.execute( + "SELECT domain FROM instances WHERE domain = ?", (blocked,) + ) + + if fba.c.fetchone() == None: + print("DEBUG: Hash wasn't found, adding:", blocked) + fba.add_instance(blocked) + + fba.c.execute( + "SELECT * FROM blocks WHERE blocker = ? AND blocked = ? AND block_level = ?", + ( + blocker, + blocked, + "reject" + ), + ) + timestamp = int(time.time()) + + if fba.c.fetchone() == None: + fba.block_instance(blocker, blocked, "", "reject", timestamp, timestamp) + + blockdict.append( + { + "blocked": blocked, + "reason": None + }) + else: + fba.update_last_seen(timestamp, blocker, blocked, "reject") + + if "public_comment" in peer: + reason = peer["public_comment"] + fba.update_block_reason(reason, blocker, blocked, "reject") + + for entry in blockdict: + if entry["blocked"] == blocked: + entry["reason"] = reason + fba.conn.commit() + except Exception as e: + print("error:", e, blocker, software) + else: + print("WARNING: Unknown software:", software) + + if fba.config["bot_enabled"] and len(blockdict) > 0: + send_bot_post(blocker, blockdict) + + blockdict = [] + +fba.conn.close()