1 from requests import get
2 from requests import post
3 from hashlib import sha256
5 from bs4 import BeautifulSoup
11 "user-agent": "Mozilla/5.0 (Windows NT 10.0; rv:102.0) Gecko/20100101 Firefox/102.0"
15 def get_mastodon_blocks(domain: str) -> dict:
17 "Suspended servers": [],
19 "Limited servers": [],
20 "Silenced servers": [],
24 "Silenced instances": "Silenced servers",
25 "Suspended instances": "Suspended servers",
26 "Gesperrte Server": "Suspended servers",
27 "Gefilterte Medien": "Filtered media",
28 "Stummgeschaltete Server": "Silenced servers",
29 "停止済みのサーバー": "Suspended servers",
30 "メディアを拒否しているサーバー": "Filtered media",
31 "サイレンス済みのサーバー": "Silenced servers",
32 "שרתים מושעים": "Suspended servers",
33 "מדיה מסוננת": "Filtered media",
34 "שרתים מוגבלים": "Silenced servers",
35 "Serveurs suspendus": "Suspended servers",
36 "Médias filtrés": "Filtered media",
37 "Serveurs limités": "Silenced servers",
42 get(f"https://{domain}/about/more", headers=headers, timeout=5).text,
48 for header in doc.find_all("h3"):
49 header_text = header.text
50 if header_text in translations:
51 header_text = translations[header_text]
52 if header_text in blocks:
53 # replaced find_next_siblings with find_all_next to account for instances that e.g. hide lists in dropdown menu
54 for line in header.find_all_next("table")[0].find_all("tr")[1:]:
55 blocks[header_text].append(
57 "domain": line.find("span").text,
58 "hash": line.find("span")["title"][9:],
59 "reason": line.find_all("td")[1].text.strip(),
63 "reject": blocks["Suspended servers"],
64 "media_removal": blocks["Filtered media"],
65 "followers_only": blocks["Limited servers"]
66 + blocks["Silenced servers"],
69 def get_friendica_blocks(domain: str) -> dict:
74 get(f"https://{domain}/friendica", headers=headers, timeout=5).text,
80 blocklist = doc.find(id="about_blocklist")
81 for line in blocklist.find("table").find_all("tr")[1:]:
84 "domain": line.find_all("td")[0].text.strip(),
85 "reason": line.find_all("td")[1].text.strip()
93 def get_pisskey_blocks(domain: str) -> dict:
103 # iterating through all "suspended" (follow-only in its terminology) instances page-by-page, since that troonware doesn't support sending them all at once
106 doc = post(f"https://{domain}/api/federation/instances", data=dumps({"sort":"+caughtAt","host":None,"suspended":True,"limit":step}), headers=headers, timeout=5).json()
109 doc = post(f"https://{domain}/api/federation/instances", data=dumps({"sort":"+caughtAt","host":None,"suspended":True,"limit":step,"offset":counter-1}), headers=headers, timeout=5).json()
113 if instance["isSuspended"]:
114 blocks["suspended"].append(
116 "domain": instance["host"],
117 # no reason field, nothing
121 counter = counter + step
127 # same shit, different asshole ("blocked" aka full suspend)
130 doc = post(f"https://{domain}/api/federation/instances", data=dumps({"sort":"+caughtAt","host":None,"blocked":True,"limit":step}), headers=headers, timeout=5).json()
133 doc = post(f"https://{domain}/api/federation/instances", data=dumps({"sort":"+caughtAt","host":None,"blocked":True,"limit":step,"offset":counter-1}), headers=headers, timeout=5).json()
136 if instance["isBlocked"]:
137 blocks["blocked"].append(
139 "domain": instance["host"],
143 counter = counter + step
149 "reject": blocks["blocked"],
150 "followers_only": blocks["suspended"]
156 def get_hash(domain: str) -> str:
157 return sha256(domain.encode("utf-8")).hexdigest()
160 def get_type(domain: str) -> str:
162 res = get(f"https://{domain}/nodeinfo/2.1.json", headers=headers, timeout=5)
163 if res.status_code == 404:
164 res = get(f"https://{domain}/nodeinfo/2.0", headers=headers, timeout=5)
165 if res.status_code == 404:
166 res = get(f"https://{domain}/nodeinfo/2.0.json", headers=headers, timeout=5)
167 if res.ok and "text/html" in res.headers["content-type"]:
168 res = get(f"https://{domain}/nodeinfo/2.1", headers=headers, timeout=5)
170 if res.json()["software"]["name"] in ["akkoma", "rebased"]:
172 elif res.json()["software"]["name"] in ["hometown", "ecko"]:
174 elif res.json()["software"]["name"] in ["calckey", "groundpolis", "foundkey", "cherrypick"]:
177 return res.json()["software"]["name"]
178 elif res.status_code == 404:
179 res = get(f"https://{domain}/api/v1/instance", headers=headers, timeout=5)
185 def tidyup(domain: str) -> str:
186 # some retards put their blocks in variable case
187 domain = domain.lower()
188 # other retards put the port
189 domain = re.sub("\:\d+$", "", domain)
190 # bigger retards put the schema in their blocklist, sometimes even without slashes
191 domain = re.sub("^https?\:(\/*)", "", domain)
193 domain = re.sub("\/$", "", domain)
195 domain = re.sub("^\@", "", domain)
196 # the biggest retards of them all try to block individual users
197 domain = re.sub("(.+)\@", "", domain)
200 conn = sqlite3.connect("blocks.db")
204 "select domain, software from instances where software in ('pleroma', 'mastodon', 'friendica', 'misskey', 'gotosocial')"
207 for blocker, software in c.fetchall():
208 blocker = tidyup(blocker)
209 if software == "pleroma":
214 f"https://{blocker}/nodeinfo/2.1.json", headers=headers, timeout=5
215 ).json()["metadata"]["federation"]
216 if "mrf_simple" in federation:
217 for block_level, blocks in (
218 {**federation["mrf_simple"],
219 **{"quarantined_instances": federation["quarantined_instances"]}}
221 for blocked in blocks:
222 blocked = tidyup(blocked)
225 if blocked.count("*") > 1:
226 # -ACK!-oma also started obscuring domains without hash
228 "select domain from instances where domain like ? order by rowid limit 1", (blocked.replace("*", "_"),)
230 searchres = c.fetchone()
231 if searchres != None:
232 blocked = searchres[0]
235 "select domain from instances where domain = ?", (blocked,)
237 if c.fetchone() == None:
239 "insert into instances select ?, ?, ?",
240 (blocked, get_hash(blocked), get_type(blocked)),
242 timestamp = int(time())
244 "select * from blocks where blocker = ? and blocked = ? and block_level = ?",
245 (blocker, blocked, block_level),
247 if c.fetchone() == None:
249 "insert into blocks select ?, ?, '', ?, ?, ?",
250 (blocker, blocked, block_level, timestamp, timestamp),
254 "update blocks set last_seen = ? where blocker = ? and blocked = ? and block_level = ?",
255 (timestamp, blocker, blocked, block_level)
259 if "mrf_simple_info" in federation:
260 for block_level, info in (
261 {**federation["mrf_simple_info"],
262 **(federation["quarantined_instances_info"]
263 if "quarantined_instances_info" in federation
266 for blocked, reason in info.items():
267 blocked = tidyup(blocked)
270 if blocked.count("*") > 1:
271 # same domain guess as above, but for reasons field
273 "select domain from instances where domain like ? order by rowid limit 1", (blocked.replace("*", "_"),)
275 searchres = c.fetchone()
276 if searchres != None:
277 blocked = searchres[0]
279 "update blocks set reason = ? where blocker = ? and blocked = ? and block_level = ? and reason = ''",
280 (reason["reason"], blocker, blocked, block_level),
283 except Exception as e:
284 print("error:", e, blocker)
285 elif software == "mastodon":
288 # json endpoint for newer mastodongs
293 "followers_only": [],
297 f"https://{blocker}/api/v1/instance/domain_blocks", headers=headers, timeout=5
300 entry = {'domain': block['domain'], 'hash': block['digest'], 'reason': block['comment']}
301 if block['severity'] == 'suspend':
302 json['reject'].append(entry)
303 elif block['severity'] == 'silence':
304 json['followers_only'].append(entry)
305 elif block['severity'] == 'reject_media':
306 json['media_removal'].append(entry)
307 elif block['severity'] == 'reject_reports':
308 json['report_removal'].append(entry)
310 json = get_mastodon_blocks(blocker)
312 for block_level, blocks in json.items():
313 for instance in blocks:
314 blocked, blocked_hash, reason = instance.values()
315 blocked = tidyup(blocked)
316 if blocked.count("*") <= 1:
318 "select hash from instances where hash = ?", (blocked_hash,)
320 if c.fetchone() == None:
322 "insert into instances select ?, ?, ?",
323 (blocked, get_hash(blocked), get_type(blocked)),
326 # Doing the hash search for instance names as well to tidy up DB
328 "select domain from instances where hash = ?", (blocked_hash,)
330 searchres = c.fetchone()
331 if searchres != None:
332 blocked = searchres[0]
334 timestamp = int(time())
336 "select * from blocks where blocker = ? and blocked = ? and block_level = ?",
337 (blocker, blocked if blocked.count("*") <= 1 else blocked_hash, block_level),
339 if c.fetchone() == None:
341 "insert into blocks select ?, ?, ?, ?, ?, ?",
344 blocked if blocked.count("*") <= 1 else blocked_hash,
353 "update blocks set last_seen = ? where blocker = ? and blocked = ? and block_level = ?",
354 (timestamp, blocker, blocked if blocked.count("*") <= 1 else blocked_hash, block_level),
358 "update blocks set reason = ? where blocker = ? and blocked = ? and block_level = ? and reason = ''",
359 (reason, blocker, blocked if blocked.count("*") <= 1 else blocked_hash, block_level),
362 except Exception as e:
363 print("error:", e, blocker)
364 elif software == "friendica" or software == "misskey":
367 if software == "friendica":
368 json = get_friendica_blocks(blocker)
369 elif software == "misskey":
370 json = get_pisskey_blocks(blocker)
371 for block_level, blocks in json.items():
372 for instance in blocks:
373 blocked, reason = instance.values()
374 blocked = tidyup(blocked)
376 if blocked.count("*") > 0:
377 # Some friendica servers also obscure domains without hash
379 "select domain from instances where domain like ? order by rowid limit 1", (blocked.replace("*", "_"),)
381 searchres = c.fetchone()
382 if searchres != None:
383 blocked = searchres[0]
385 if blocked.count("?") > 0:
386 # Some obscure them with question marks, not sure if that's dependent on version or not
388 "select domain from instances where domain like ? order by rowid limit 1", (blocked.replace("?", "_"),)
390 searchres = c.fetchone()
391 if searchres != None:
392 blocked = searchres[0]
395 "select domain from instances where domain = ?", (blocked,)
397 if c.fetchone() == None:
399 "insert into instances select ?, ?, ?",
400 (blocked, get_hash(blocked), get_type(blocked)),
403 timestamp = int(time())
405 "select * from blocks where blocker = ? and blocked = ? and reason = ?",
406 (blocker, blocked, reason),
408 if c.fetchone() == None:
410 "insert into blocks select ?, ?, ?, ?, ?, ?",
422 "update blocks set last_seen = ? where blocker = ? and blocked = ? and block_level = ?",
423 (timestamp, blocker, blocked, block_level),
427 "update blocks set reason = ? where blocker = ? and blocked = ? and block_level = ? and reason = ''",
428 (reason, blocker, blocked, block_level),
431 except Exception as e:
432 print("error:", e, blocker)
433 elif software == "gotosocial":
438 f"https://{blocker}/api/v1/instance/peers?filter=suspended", headers=headers, timeout=5
440 for peer in federation:
441 blocked = peer["domain"].lower()
443 if blocked.count("*") > 0:
444 # GTS does not have hashes for obscured domains, so we have to guess it
446 "select domain from instances where domain like ? order by rowid limit 1", (blocked.replace("*", "_"),)
448 searchres = c.fetchone()
449 if searchres != None:
450 blocked = searchres[0]
453 "select domain from instances where domain = ?", (blocked,)
455 if c.fetchone() == None:
457 "insert into instances select ?, ?, ?",
458 (blocked, get_hash(blocked), get_type(blocked)),
461 "select * from blocks where blocker = ? and blocked = ? and block_level = ?",
462 (blocker, blocked, "reject"),
464 timestamp = int(time())
465 if c.fetchone() == None:
467 "insert into blocks select ?, ?, ?, ?, ?, ?",
468 (blocker, blocked, "", "reject", timestamp, timestamp),
472 "update blocks set last_seen = ? where blocker = ? and blocked = ? and block_level = ?",
473 (timestamp, blocker, blocked, "reject"),
475 if "public_comment" in peer:
476 reason = peer["public_comment"]
478 "update blocks set reason = ? where blocker = ? and blocked = ? and block_level = ? and reason = ''",
479 (reason, blocker, blocked, "reject"),
482 except Exception as e:
483 print("error:", e, blocker)