3 from hashlib import sha256
5 from bs4 import BeautifulSoup
11 with open("config.json") as f:
12 config = loads(f.read())
15 "user-agent": config["useragent"]
19 def get_mastodon_blocks(domain: str) -> dict:
21 "Suspended servers": [],
23 "Limited servers": [],
24 "Silenced servers": [],
28 "Silenced instances": "Silenced servers",
29 "Suspended instances": "Suspended servers",
30 "Gesperrte Server": "Suspended servers",
31 "Gefilterte Medien": "Filtered media",
32 "Stummgeschaltete Server": "Silenced servers",
33 "停止済みのサーバー": "Suspended servers",
34 "メディアを拒否しているサーバー": "Filtered media",
35 "サイレンス済みのサーバー": "Silenced servers",
36 "שרתים מושעים": "Suspended servers",
37 "מדיה מסוננת": "Filtered media",
38 "שרתים מוגבלים": "Silenced servers",
39 "Serveurs suspendus": "Suspended servers",
40 "Médias filtrés": "Filtered media",
41 "Serveurs limités": "Silenced servers",
46 get(f"https://{domain}/about/more", headers=headers, timeout=5).text,
52 for header in doc.find_all("h3"):
53 header_text = header.text
54 if header_text in translations:
55 header_text = translations[header_text]
56 if header_text in blocks:
57 # replaced find_next_siblings with find_all_next to account for instances that e.g. hide lists in dropdown menu
58 for line in header.find_all_next("table")[0].find_all("tr")[1:]:
59 blocks[header_text].append(
61 "domain": line.find("span").text,
62 "hash": line.find("span")["title"][9:],
63 "reason": line.find_all("td")[1].text.strip(),
67 "reject": blocks["Suspended servers"],
68 "media_removal": blocks["Filtered media"],
69 "followers_only": blocks["Limited servers"]
70 + blocks["Silenced servers"],
73 def get_friendica_blocks(domain: str) -> dict:
78 get(f"https://{domain}/friendica", headers=headers, timeout=5).text,
84 blocklist = doc.find(id="about_blocklist")
85 for line in blocklist.find("table").find_all("tr")[1:]:
88 "domain": line.find_all("td")[0].text.strip(),
89 "reason": line.find_all("td")[1].text.strip()
97 def get_pisskey_blocks(domain: str) -> dict:
107 # iterating through all "suspended" (follow-only in its terminology) instances page-by-page, since that troonware doesn't support sending them all at once
110 doc = post(f"https://{domain}/api/federation/instances", data=dumps({"sort":"+caughtAt","host":None,"suspended":True,"limit":step}), headers=headers, timeout=5).json()
113 doc = post(f"https://{domain}/api/federation/instances", data=dumps({"sort":"+caughtAt","host":None,"suspended":True,"limit":step,"offset":counter-1}), headers=headers, timeout=5).json()
117 if instance["isSuspended"]:
118 blocks["suspended"].append(
120 "domain": instance["host"],
121 # no reason field, nothing
125 counter = counter + step
131 # same shit, different asshole ("blocked" aka full suspend)
134 doc = post(f"https://{domain}/api/federation/instances", data=dumps({"sort":"+caughtAt","host":None,"blocked":True,"limit":step}), headers=headers, timeout=5).json()
137 doc = post(f"https://{domain}/api/federation/instances", data=dumps({"sort":"+caughtAt","host":None,"blocked":True,"limit":step,"offset":counter-1}), headers=headers, timeout=5).json()
140 if instance["isBlocked"]:
141 blocks["blocked"].append(
143 "domain": instance["host"],
147 counter = counter + step
153 "reject": blocks["blocked"],
154 "followers_only": blocks["suspended"]
160 def get_hash(domain: str) -> str:
161 return sha256(domain.encode("utf-8")).hexdigest()
164 def get_type(domain: str) -> str:
166 res = get(f"https://{domain}/nodeinfo/2.1.json", headers=headers, timeout=5)
167 if res.status_code == 404:
168 res = get(f"https://{domain}/nodeinfo/2.0", headers=headers, timeout=5)
169 if res.status_code == 404:
170 res = get(f"https://{domain}/nodeinfo/2.0.json", headers=headers, timeout=5)
171 if res.ok and "text/html" in res.headers["content-type"]:
172 res = get(f"https://{domain}/nodeinfo/2.1", headers=headers, timeout=5)
174 if res.json()["software"]["name"] in ["akkoma", "rebased"]:
176 elif res.json()["software"]["name"] in ["hometown", "ecko"]:
178 elif res.json()["software"]["name"] in ["calckey", "groundpolis", "foundkey", "cherrypick"]:
181 return res.json()["software"]["name"]
182 elif res.status_code == 404:
183 res = get(f"https://{domain}/api/v1/instance", headers=headers, timeout=5)
189 def tidyup(domain: str) -> str:
190 # some retards put their blocks in variable case
191 domain = domain.lower()
192 # other retards put the port
193 domain = re.sub("\:\d+$", "", domain)
194 # bigger retards put the schema in their blocklist, sometimes even without slashes
195 domain = re.sub("^https?\:(\/*)", "", domain)
197 domain = re.sub("\/$", "", domain)
199 domain = re.sub("^\@", "", domain)
200 # the biggest retards of them all try to block individual users
201 domain = re.sub("(.+)\@", "", domain)
204 conn = sqlite3.connect("blocks.db")
208 "select domain, software from instances where software in ('pleroma', 'mastodon', 'friendica', 'misskey', 'gotosocial')"
211 for blocker, software in c.fetchall():
212 blocker = tidyup(blocker)
213 if software == "pleroma":
218 f"https://{blocker}/nodeinfo/2.1.json", headers=headers, timeout=5
219 ).json()["metadata"]["federation"]
220 if "mrf_simple" in federation:
221 for block_level, blocks in (
222 {**federation["mrf_simple"],
223 **{"quarantined_instances": federation["quarantined_instances"]}}
225 for blocked in blocks:
226 blocked = tidyup(blocked)
229 if blocked.count("*") > 1:
230 # -ACK!-oma also started obscuring domains without hash
232 "select domain from instances where domain like ? order by rowid limit 1", (blocked.replace("*", "_"),)
234 searchres = c.fetchone()
235 if searchres != None:
236 blocked = searchres[0]
239 "select domain from instances where domain = ?", (blocked,)
241 if c.fetchone() == None:
243 "insert into instances select ?, ?, ?",
244 (blocked, get_hash(blocked), get_type(blocked)),
246 timestamp = int(time())
248 "select * from blocks where blocker = ? and blocked = ? and block_level = ?",
249 (blocker, blocked, block_level),
251 if c.fetchone() == None:
253 "insert into blocks select ?, ?, '', ?, ?, ?",
254 (blocker, blocked, block_level, timestamp, timestamp),
258 "update blocks set last_seen = ? where blocker = ? and blocked = ? and block_level = ?",
259 (timestamp, blocker, blocked, block_level)
263 if "mrf_simple_info" in federation:
264 for block_level, info in (
265 {**federation["mrf_simple_info"],
266 **(federation["quarantined_instances_info"]
267 if "quarantined_instances_info" in federation
270 for blocked, reason in info.items():
271 blocked = tidyup(blocked)
274 if blocked.count("*") > 1:
275 # same domain guess as above, but for reasons field
277 "select domain from instances where domain like ? order by rowid limit 1", (blocked.replace("*", "_"),)
279 searchres = c.fetchone()
280 if searchres != None:
281 blocked = searchres[0]
283 "update blocks set reason = ? where blocker = ? and blocked = ? and block_level = ? and reason = ''",
284 (reason["reason"], blocker, blocked, block_level),
287 except Exception as e:
288 print("error:", e, blocker)
289 elif software == "mastodon":
292 # json endpoint for newer mastodongs
297 "followers_only": [],
301 # handling CSRF, I've saw at least one server requiring it to access the endpoint
302 meta = BeautifulSoup(
303 get(f"https://{blocker}/about", headers=headers, timeout=5).text,
307 csrf = meta.find("meta", attrs={"name": "csrf-token"})["content"]
308 reqheaders = {**headers, **{"x-csrf-token": csrf}}
313 f"https://{blocker}/api/v1/instance/domain_blocks", headers=reqheaders, timeout=5
316 entry = {'domain': block['domain'], 'hash': block['digest'], 'reason': block['comment']}
317 if block['severity'] == 'suspend':
318 json['reject'].append(entry)
319 elif block['severity'] == 'silence':
320 json['followers_only'].append(entry)
321 elif block['severity'] == 'reject_media':
322 json['media_removal'].append(entry)
323 elif block['severity'] == 'reject_reports':
324 json['report_removal'].append(entry)
326 json = get_mastodon_blocks(blocker)
328 for block_level, blocks in json.items():
329 for instance in blocks:
330 blocked, blocked_hash, reason = instance.values()
331 blocked = tidyup(blocked)
332 if blocked.count("*") <= 1:
334 "select hash from instances where hash = ?", (blocked_hash,)
336 if c.fetchone() == None:
338 "insert into instances select ?, ?, ?",
339 (blocked, get_hash(blocked), get_type(blocked)),
342 # Doing the hash search for instance names as well to tidy up DB
344 "select domain from instances where hash = ?", (blocked_hash,)
346 searchres = c.fetchone()
347 if searchres != None:
348 blocked = searchres[0]
350 timestamp = int(time())
352 "select * from blocks where blocker = ? and blocked = ? and block_level = ?",
353 (blocker, blocked if blocked.count("*") <= 1 else blocked_hash, block_level),
355 if c.fetchone() == None:
357 "insert into blocks select ?, ?, ?, ?, ?, ?",
360 blocked if blocked.count("*") <= 1 else blocked_hash,
369 "update blocks set last_seen = ? where blocker = ? and blocked = ? and block_level = ?",
370 (timestamp, blocker, blocked if blocked.count("*") <= 1 else blocked_hash, block_level),
374 "update blocks set reason = ? where blocker = ? and blocked = ? and block_level = ? and reason = ''",
375 (reason, blocker, blocked if blocked.count("*") <= 1 else blocked_hash, block_level),
378 except Exception as e:
379 print("error:", e, blocker)
380 elif software == "friendica" or software == "misskey":
383 if software == "friendica":
384 json = get_friendica_blocks(blocker)
385 elif software == "misskey":
386 json = get_pisskey_blocks(blocker)
387 for block_level, blocks in json.items():
388 for instance in blocks:
389 blocked, reason = instance.values()
390 blocked = tidyup(blocked)
392 if blocked.count("*") > 0:
393 # Some friendica servers also obscure domains without hash
395 "select domain from instances where domain like ? order by rowid limit 1", (blocked.replace("*", "_"),)
397 searchres = c.fetchone()
398 if searchres != None:
399 blocked = searchres[0]
401 if blocked.count("?") > 0:
402 # Some obscure them with question marks, not sure if that's dependent on version or not
404 "select domain from instances where domain like ? order by rowid limit 1", (blocked.replace("?", "_"),)
406 searchres = c.fetchone()
407 if searchres != None:
408 blocked = searchres[0]
411 "select domain from instances where domain = ?", (blocked,)
413 if c.fetchone() == None:
415 "insert into instances select ?, ?, ?",
416 (blocked, get_hash(blocked), get_type(blocked)),
419 timestamp = int(time())
421 "select * from blocks where blocker = ? and blocked = ? and reason = ?",
422 (blocker, blocked, reason),
424 if c.fetchone() == None:
426 "insert into blocks select ?, ?, ?, ?, ?, ?",
438 "update blocks set last_seen = ? where blocker = ? and blocked = ? and block_level = ?",
439 (timestamp, blocker, blocked, block_level),
443 "update blocks set reason = ? where blocker = ? and blocked = ? and block_level = ? and reason = ''",
444 (reason, blocker, blocked, block_level),
447 except Exception as e:
448 print("error:", e, blocker)
449 elif software == "gotosocial":
454 f"https://{blocker}/api/v1/instance/peers?filter=suspended", headers=headers, timeout=5
456 for peer in federation:
457 blocked = peer["domain"].lower()
459 if blocked.count("*") > 0:
460 # GTS does not have hashes for obscured domains, so we have to guess it
462 "select domain from instances where domain like ? order by rowid limit 1", (blocked.replace("*", "_"),)
464 searchres = c.fetchone()
465 if searchres != None:
466 blocked = searchres[0]
469 "select domain from instances where domain = ?", (blocked,)
471 if c.fetchone() == None:
473 "insert into instances select ?, ?, ?",
474 (blocked, get_hash(blocked), get_type(blocked)),
477 "select * from blocks where blocker = ? and blocked = ? and block_level = ?",
478 (blocker, blocked, "reject"),
480 timestamp = int(time())
481 if c.fetchone() == None:
483 "insert into blocks select ?, ?, ?, ?, ?, ?",
484 (blocker, blocked, "", "reject", timestamp, timestamp),
488 "update blocks set last_seen = ? where blocker = ? and blocked = ? and block_level = ?",
489 (timestamp, blocker, blocked, "reject"),
491 if "public_comment" in peer:
492 reason = peer["public_comment"]
494 "update blocks set reason = ? where blocker = ? and blocked = ? and block_level = ? and reason = ''",
495 (reason, blocker, blocked, "reject"),
498 except Exception as e:
499 print("error:", e, blocker)