-from requests import get
-from json import loads
-from hashlib import sha256
-import sqlite3
-from bs4 import BeautifulSoup
-
-
-def get_mastodon_blocks(domain: str) -> dict:
- try:
- reject = []
- media_removal = []
- federated_timeline_removal = []
-
- doc = BeautifulSoup(get(f"https://{domain}/about/more").text, "html.parser")
- for header in doc.find_all("h3"):
- if header.text == "Suspended servers":
- for line in header.find_next_siblings("table")[0].find_all("tr")[1:]:
- reject.append({"domain": line.find("span").text, "hash": line.find("span")["title"][9:], "reason": line.find_all("td")[1].text.strip()})
- elif header.text == "Filtered media":
- for line in header.find_next_siblings("table")[0].find_all("tr")[1:]:
- media_removal.append({"domain": line.find("span").text, "hash": line.find("span")["title"][9:], "reason": line.find_all("td")[1].text.strip()})
- elif header.text in ["Limited servers", "Silenced servers"]:
- for line in header.find_next_siblings("table")[0].find_all("tr")[1:]:
- federated_timeline_removal.append({"domain": line.find("span").text, "hash": line.find("span")["title"][9:], "reason": line.find_all("td")[1].text.strip()})
- finally:
- return {"reject": reject, "media_removal": media_removal, "federated_timeline_removal": federated_timeline_removal}
-
-
-def get_type(domain: str) -> str:
- try:
- res = get("https://"+domain, timeout=5)
- if "pleroma" in res.text.lower():
- print("pleroma")
- return "pleroma"
- elif "mastodon" in res.text.lower():
- print("mastodon")
- return "mastodon"
- return ""
- except Exception as e:
- print("error:", e, domain)
- return ""
-
-conn = sqlite3.connect("blocks.db")
-c = conn.cursor()
-
-c.execute("select domain, software from instances where software in ('pleroma', 'mastodon')")
-
-for instance in c.fetchall():
- if instance[1] == "pleroma":
- blocker = instance[0]
- print(blocker)
+import reqto
+import time
+import bs4
+import fba
+import itertools
+import re
+
+fba.c.execute(
+ "SELECT domain, software FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'gotosocial')"
+)
+
+for blocker, software in fba.c.fetchall():
+ print("DEBUG: blocker,software:", blocker, software)
+ blockdict = []
+ blocker = fba.tidyup(blocker)
+ if software == "pleroma":
+ print("INFO: blocker:", blocker)
try:
# Blocks
- c.execute("delete from blocks where blocker = ?", (blocker,))
- json = loads(get(f"https://{blocker}/nodeinfo/2.1.json").text)
- if "mrf_simple" in json["metadata"]["federation"]:
- for mrf in json["metadata"]["federation"]["mrf_simple"]:
- for blocked in json["metadata"]["federation"]["mrf_simple"][mrf]:
+ federation = reqto.get(
+ f"https://{blocker}/nodeinfo/2.1.json", headers=fba.headers, timeout=5
+ ).json()["metadata"]["federation"]
+ if "mrf_simple" in federation:
+ for block_level, blocks in (
+ {**federation["mrf_simple"],
+ **{"quarantined_instances": federation["quarantined_instances"]}}
+ ).items():
+ for blocked in blocks:
+ print("DEBUG: BEFORE blocked:", blocked)
+ blocked = fba.tidyup(blocked)
+ print("DEBUG: AFTER blocked:", blocked)
+
if blocked == "":
+ print("WARNING: blocked is empty after fba.tidyup():", blocker, block_level)
continue
- c.execute("select domain from instances where domain = ?", (blocked,))
- if c.fetchone() == None:
- c.execute("insert into instances select ?, ?, ?", (blocked, sha256(bytes(blocked, "utf-8")).hexdigest(), get_type(blocked)))
- c.execute("insert into blocks select ?, ?, '', ?", (blocker, blocked, mrf))
- # Quarantined Instances
- if "quarantined_instances" in json["metadata"]["federation"]:
- for blocked in json["metadata"]["federation"]["quarantined_instances"]:
- if blocked == "":
- continue
- c.execute("select domain from instances where domain = ?", (blocked,))
- if c.fetchone() == None:
- c.execute("insert into instances select ?, ?, ?", (blocked, sha256(bytes(blocked, "utf-8")).hexdigest(), get_type(blocked)))
- c.execute("insert into blocks select ?, ?, '', 'quarantined_instances'", (blocker, blocked))
- conn.commit()
+
+ if blocked.count("*") > 1:
+ # -ACK!-oma also started obscuring domains without hash
+ fba.c.execute(
+ "SELECT domain FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", (blocked.replace("*", "_"),)
+ )
+ searchres = fba.c.fetchone()
+ print("DEBUG: searchres[]:", type(searchres))
+ if searchres != None:
+ blocked = searchres[0]
+
+ print("DEBUG: Looking up instance by domain:", blocked)
+ fba.c.execute(
+ "SELECT domain FROM instances WHERE domain = ?", (blocked,)
+ )
+
+ if fba.c.fetchone() == None:
+ print("DEBUG: Domain wasn't found, adding:", blocked)
+ fba.add_instance(blocked)
+
+ timestamp = int(time.time())
+ fba.c.execute(
+ "SELECT * FROM blocks WHERE blocker = ? AND blocked = ? AND block_level = ?",
+ (
+ blocker,
+ blocked,
+ block_level
+ ),
+ )
+
+ if fba.c.fetchone() == None:
+ fba.block_instance(blocker, blocked, "unknown", block_level, timestamp, timestamp)
+
+ if block_level == "reject":
+ blockdict.append(
+ {
+ "blocked": blocked,
+ "reason": None
+ })
+ else:
+ fba.update_last_seen(timestamp, blocker, blocked, block_level)
+
+ fba.conn.commit()
+
# Reasons
- if "mrf_simple_info" in json["metadata"]["federation"]:
- for mrf in json["metadata"]["federation"]["mrf_simple_info"]:
- for blocked in json["metadata"]["federation"]["mrf_simple_info"][mrf]:
- c.execute("update blocks set reason = ? where blocker = ? and blocked = ? and block_level = ?", (json["metadata"]["federation"]["mrf_simple_info"][mrf][blocked]["reason"], blocker, blocked, mrf))
- if "quarantined_instances_info" in json["metadata"]["federation"]:
- for blocked in json["metadata"]["federation"]["quarantined_instances_info"]["quarantined_instances"]:
- c.execute("update blocks set reason = ? where blocker = ? and blocked = ? and block_level = 'quarantined_instances'", (json["metadata"]["federation"]["quarantined_instances_info"]["quarantined_instances"][blocked]["reason"], blocker, blocked))
- conn.commit()
+ if "mrf_simple_info" in federation:
+ print("DEBUG: Found mrf_simple_info:", blocker)
+ for block_level, info in (
+ {**federation["mrf_simple_info"],
+ **(federation["quarantined_instances_info"]
+ if "quarantined_instances_info" in federation
+ else {})}
+ ).items():
+ print("DEBUG: block_level, info.items():", block_level, len(info.items()))
+ for blocked, reason in info.items():
+ print("DEBUG: BEFORE blocked:", blocked)
+ blocked = fba.tidyup(blocked)
+ print("DEBUG: AFTER blocked:", blocked)
+
+ if blocked == "":
+ print("WARNING: blocked is empty after fba.tidyup():", blocker, block_level)
+ continue
+
+ if blocked.count("*") > 1:
+ # same domain guess as above, but for reasons field
+ fba.c.execute(
+ "SELECT domain FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", (blocked.replace("*", "_"),)
+ )
+ searchres = fba.c.fetchone()
+
+ if searchres != None:
+ blocked = searchres[0]
+
+ print("DEBUG: Updating block reason:", blocker, blocked, reason["reason"])
+ fba.update_block_reason(reason["reason"], blocker, blocked, block_level)
+
+ for entry in blockdict:
+ if entry["blocked"] == blocked:
+ print("DEBUG: Updating entry reason:", blocked)
+ entry["reason"] = reason["reason"]
+
+ fba.conn.commit()
except Exception as e:
- print("error:", e, blocker)
- elif instance[1] == "mastodon":
- blocker = instance[0]
- print(blocker)
+ print("error:", e, blocker, software)
+ elif software == "mastodon":
+ print("INFO: blocker:", blocker)
try:
- c.execute("delete from blocks where blocker = ?", (blocker,))
- json = get_mastodon_blocks(blocker)
- for block_level in ["reject", "media_removal", "federated_timeline_removal"]:
- for blocked in json[block_level]:
- if blocked["domain"].count("*") > 1:
- c.execute("insert into blocks select ?, ifnull((select domain from instances where hash = ?), ?), ?, ?", (blocker, blocked["hash"], blocked["hash"], blocked['reason'], block_level))
+ # json endpoint for newer mastodongs
+ try:
+ json = {
+ "reject": [],
+ "media_removal": [],
+ "followers_only": [],
+ "report_removal": []
+ }
+
+ # handling CSRF, I've saw at least one server requiring it to access the endpoint
+ print("DEBUG: Fetching meta:", blocker)
+ meta = bs4.BeautifulSoup(
+ reqto.get(f"https://{blocker}/about", headers=fba.headers, timeout=5).text,
+ "html.parser",
+ )
+ try:
+ csrf = meta.find("meta", attrs={"name": "csrf-token"})["content"]
+ print("DEBUG: Adding CSRF token:", blocker, csrf)
+ reqheaders = {**fba.headers, **{"x-csrf-token": csrf}}
+ except:
+ print("DEBUG: No CSRF token found, using normal headers:", blocker)
+ reqheaders = fba.headers
+
+ print("DEBUG: Quering API domain_blocks:", blocker)
+ blocks = reqto.get(
+ f"https://{blocker}/api/v1/instance/domain_blocks", headers=reqheaders, timeout=5
+ ).json()
+
+ print("DEBUG: blocks():", len(blocks))
+ for block in blocks:
+ entry = {'domain': block['domain'], 'hash': block['digest'], 'reason': block['comment']}
+
+ print("DEBUG: severity,domain,hash,comment:", block['severity'], block['domain'], block['digest'], block['comment'])
+ if block['severity'] == 'suspend':
+ json['reject'].append(entry)
+ elif block['severity'] == 'silence':
+ json['followers_only'].append(entry)
+ elif block['severity'] == 'reject_media':
+ json['media_removal'].append(entry)
+ elif block['severity'] == 'reject_reports':
+ json['report_removal'].append(entry)
+ else:
+ print("WARNING: Unknown severity:", block['severity'], block['domain'])
+ except:
+ print("DEBUG: Failed, Trying mastodon-specific fetches:", blocker)
+ json = fba.get_mastodon_blocks(blocker)
+
+ print("DEBUG: json.items():", blocker, len(json.items()))
+ for block_level, blocks in json.items():
+ print("DEBUG: blocker,block_level,blocks():", blocker, block_level, len(blocks))
+ for instance in blocks:
+ blocked, blocked_hash, reason = instance.values()
+ print("DEBUG: blocked,hash,reason:", blocked, blocked_hash, reason)
+
+ blocked = fba.tidyup(blocked)
+ print("DEBUG: blocked:", blocked)
+
+ if blocked.count("*") < 1:
+ # No obsfucation for this instance
+ fba.c.execute(
+ "SELECT hash FROM instances WHERE domain = ? LIMIT 1", (blocked,)
+ )
+
+ if fba.c.fetchone() == None:
+ print("DEBUG: Hash wasn't found, adding:", blocked)
+ fba.add_instance(blocked)
+ else:
+ # Doing the hash search for instance names as well to tidy up DB
+ fba.c.execute(
+ "SELECT domain FROM instances WHERE hash = ? LIMIT 1", (blocked_hash,)
+ )
+ searchres = fba.c.fetchone()
+
+ if searchres != None:
+ print("DEBUG: Updating domain: ", searchres[0])
+ blocked = searchres[0]
+
+ timestamp = int(time.time())
+ fba.c.execute(
+ "SELECT * FROM blocks WHERE blocker = ? AND blocked = ? AND block_level = ?",
+ (
+ blocker,
+ blocked if blocked.count("*") <= 1 else blocked_hash,
+ block_level
+ ),
+ )
+
+ if fba.c.fetchone() == None:
+ fba.block_instance(blocker, blocked if blocked.count("*") <= 1 else blocked_hash, reason, block_level, timestamp, timestamp)
+
+ if block_level == "reject":
+ blockdict.append(
+ {
+ "blocked": blocked,
+ "reason": reason
+ })
else:
- c.execute("select domain from instances where domain = ?", (blocked["domain"],))
- if c.fetchone() == None:
- c.execute("insert into instances select ?, ?, ?", (blocked["domain"], sha256(bytes(blocked["domain"], "utf-8")).hexdigest(), get_type(blocked["domain"])))
- c.execute("insert into blocks select ?, ?, ?, ?", (blocker, blocked["domain"], blocked["reason"], block_level))
- conn.commit()
+ fba.update_last_seen(timestamp, blocker, blocked if blocked.count("*") <= 1 else blocked_hash, block_level)
+
+ if reason != '':
+ print("DEBUG: Updating block reason:", blocker, blocked, reason)
+ fba.update_block_reason(reason, blocker, blocked if blocked.count("*") <= 1 else blocked_hash, block_level)
+
+ fba.conn.commit()
+ except Exception as e:
+ print("error:", e, blocker, software)
+ elif software == "friendica" or software == "misskey":
+ print("INFO: blocker:", blocker)
+ try:
+ if software == "friendica":
+ json = fba.get_friendica_blocks(blocker)
+ elif software == "misskey":
+ json = fba.get_misskey_blocks(blocker)
+ for block_level, blocks in json.items():
+ for instance in blocks:
+ blocked, reason = instance.values()
+ blocked = fba.tidyup(blocked)
+
+ print("BEFORE-blocked:", blocked)
+ if blocked.count("*") > 0:
+ # Some friendica servers also obscure domains without hash
+ fba.c.execute(
+ "SELECT domain FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", (blocked.replace("*", "_"),)
+ )
+ searchres = fba.c.fetchone()
+ if searchres != None:
+ blocked = searchres[0]
+
+ if blocked.count("?") > 0:
+ # Some obscure them with question marks, not sure if that's dependent on version or not
+ fba.c.execute(
+ "SELECT domain FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", (blocked.replace("?", "_"),)
+ )
+ searchres = fba.c.fetchone()
+ if searchres != None:
+ blocked = searchres[0]
+
+ print("AFTER-blocked:", blocked)
+ fba.c.execute(
+ "SELECT domain FROM instances WHERE domain = ?", (blocked,)
+ )
+
+ if fba.c.fetchone() == None:
+ print("DEBUG: Hash wasn't found, adding:", blocked)
+ fba.add_instance(blocked)
+
+ timestamp = int(time.time())
+ fba.c.execute(
+ "SELECT * FROM blocks WHERE blocker = ? AND blocked = ?",
+ (blocker, blocked),
+ )
+ if fba.c.fetchone() == None:
+ fba.block_instance(blocker, blocked, reason, block_level, timestamp, timestamp)
+
+ if block_level == "reject":
+ blockdict.append(
+ {
+ "blocked": blocked,
+ "reason": reason
+ })
+ else:
+ fba.update_last_seen(timestamp, blocker, blocked, block_level)
+
+ if reason != '':
+ print("DEBUG: Updating block reason:", blocker, blocked, reason)
+ fba.update_block_reason(reason, blocker, blocked, block_level)
+
+ fba.conn.commit()
+ except Exception as e:
+ print("error:", e, blocker, software)
+ elif software == "gotosocial":
+ print("INFO: blocker:", blocker)
+ try:
+ # Blocks
+ federation = reqto.get(
+ f"https://{blocker}/api/v1/instance/peers?filter=suspended", headers=fba.headers, timeout=5
+ ).json()
+
+ if (federation == None):
+ print("WARNING: No valid response:", blocker);
+ else:
+ for peer in federation:
+ print("DEBUG: peer(),[]:", len(peer), type(peer))
+ if (isinstance(peer, str) and peer == "error"):
+ print("WARNING: Cannot continue, maybe authentication required?", blocker)
+ break
+
+ blocked = peer["domain"].lower()
+ print("DEBUG: blocked:", blocked)
+
+ if blocked.count("*") > 0:
+ # GTS does not have hashes for obscured domains, so we have to guess it
+ fba.c.execute(
+ "SELECT domain FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", (blocked.replace("*", "_"),)
+ )
+ searchres = fba.c.fetchone()
+
+ if searchres != None:
+ blocked = searchres[0]
+
+ fba.c.execute(
+ "SELECT domain FROM instances WHERE domain = ?", (blocked,)
+ )
+
+ if fba.c.fetchone() == None:
+ print("DEBUG: Hash wasn't found, adding:", blocked)
+ fba.add_instance(blocked)
+
+ fba.c.execute(
+ "SELECT * FROM blocks WHERE blocker = ? AND blocked = ? AND block_level = ?",
+ (
+ blocker,
+ blocked,
+ "reject"
+ ),
+ )
+ timestamp = int(time.time())
+
+ if fba.c.fetchone() == None:
+ fba.block_instance(blocker, blocked, "", "reject", timestamp, timestamp)
+
+ blockdict.append(
+ {
+ "blocked": blocked,
+ "reason": None
+ })
+ else:
+ fba.update_last_seen(timestamp, blocker, blocked, "reject")
+
+ if "public_comment" in peer:
+ reason = peer["public_comment"]
+ print("DEBUG: Updating block reason:", blocker, blocked, reason)
+ fba.update_block_reason(reason, blocker, blocked, "reject")
+
+ for entry in blockdict:
+ if entry["blocked"] == blocked:
+ entry["reason"] = reason
+
+ fba.conn.commit()
except Exception as e:
- print("error:", e, blocker)
-conn.close()
+ print("error:", e, blocker, software)
+ else:
+ print("WARNING: Unknown software:", software)
+
+ if fba.config["bot_enabled"] and len(blockdict) > 0:
+ send_bot_post(blocker, blockdict)
+
+ blockdict = []
+
+fba.conn.close()