]> git.mxchange.org Git - fba.git/blob - fetch_blocks.py
ALWAYS write SQL keywords in uppercase!
[fba.git] / fetch_blocks.py
1 from reqto import get
2 from reqto import post
3 from hashlib import sha256
4 import sqlite3
5 from bs4 import BeautifulSoup
6 from json import dumps
7 from json import loads
8 import re
9 from time import time
10 import itertools
11
12 with open("config.json") as f:
13     config = loads(f.read())
14
15 headers = {
16     "user-agent": config["useragent"]
17 }
18
19 def send_bot_post(instance: str, blocks: dict):
20     message = instance + " has blocked the following instances:\n\n"
21     truncated = False
22     if len(blocks) > 20:
23         truncated = True
24         blocks = blocks[0 : 19]
25     for block in blocks:
26         if block["reason"] == None or block["reason"] == '':
27             message = message + block["blocked"] + " with unspecified reason\n"
28         else:
29             if len(block["reason"]) > 420:
30                 block["reason"] = block["reason"][0:419] + "[…]"
31             message = message + block["blocked"] + ' for "' + block["reason"].replace("@", "@\u200b") + '"\n'
32     if truncated:
33         message = message + "(the list has been truncated to the first 20 entries)"
34
35     botheaders = {**headers, **{"Authorization": "Bearer " + config["bot_token"]}}
36     req = post(f"{config['bot_instance']}/api/v1/statuses",
37         data={"status":message, "visibility":config['bot_visibility'], "content_type":"text/plain"},
38         headers=botheaders, timeout=10).json()
39     return True
40
41 def get_mastodon_blocks(domain: str) -> dict:
42     blocks = {
43         "Suspended servers": [],
44         "Filtered media": [],
45         "Limited servers": [],
46         "Silenced servers": [],
47     }
48
49     translations = {
50         "Silenced instances": "Silenced servers",
51         "Suspended instances": "Suspended servers",
52         "Gesperrte Server": "Suspended servers",
53         "Gefilterte Medien": "Filtered media",
54         "Stummgeschaltete Server": "Silenced servers",
55         "停止済みのサーバー": "Suspended servers",
56         "メディアを拒否しているサーバー": "Filtered media",
57         "サイレンス済みのサーバー": "Silenced servers",
58         "שרתים מושעים": "Suspended servers",
59         "מדיה מסוננת": "Filtered media",
60         "שרתים מוגבלים": "Silenced servers",
61         "Serveurs suspendus": "Suspended servers",
62         "Médias filtrés": "Filtered media",
63         "Serveurs limités": "Silenced servers",
64     }
65
66     try:
67         doc = BeautifulSoup(
68             get(f"https://{domain}/about/more", headers=headers, timeout=5).text,
69             "html.parser",
70         )
71     except:
72         return {}
73
74     for header in doc.find_all("h3"):
75         header_text = header.text
76         if header_text in translations:
77             header_text = translations[header_text]
78         if header_text in blocks:
79             # replaced find_next_siblings with find_all_next to account for instances that e.g. hide lists in dropdown menu
80             for line in header.find_all_next("table")[0].find_all("tr")[1:]:
81                 blocks[header_text].append(
82                     {
83                         "domain": line.find("span").text,
84                         "hash": line.find("span")["title"][9:],
85                         "reason": line.find_all("td")[1].text.strip(),
86                     }
87                 )
88     return {
89         "reject": blocks["Suspended servers"],
90         "media_removal": blocks["Filtered media"],
91         "followers_only": blocks["Limited servers"]
92         + blocks["Silenced servers"],
93     }
94
95 def get_friendica_blocks(domain: str) -> dict:
96     blocks = []
97
98     try:
99         doc = BeautifulSoup(
100             get(f"https://{domain}/friendica", headers=headers, timeout=5).text,
101             "html.parser",
102         )
103     except:
104         return {}
105
106     blocklist = doc.find(id="about_blocklist")
107     for line in blocklist.find("table").find_all("tr")[1:]:
108             blocks.append(
109                 {
110                     "domain": line.find_all("td")[0].text.strip(),
111                     "reason": line.find_all("td")[1].text.strip()
112                 }
113             )
114
115     return {
116         "reject": blocks
117     }
118
119 def get_pisskey_blocks(domain: str) -> dict:
120     blocks = {
121         "suspended": [],
122         "blocked": []
123     }
124
125     try:
126         counter = 0
127         step = 99
128         while True:
129             # iterating through all "suspended" (follow-only in its terminology) instances page-by-page, since that troonware doesn't support sending them all at once
130             try:
131                 if counter == 0:
132                     doc = post(f"https://{domain}/api/federation/instances", data=dumps({"sort":"+caughtAt","host":None,"suspended":True,"limit":step}), headers=headers, timeout=5).json()
133                     if doc == []: raise
134                 else:
135                     doc = post(f"https://{domain}/api/federation/instances", data=dumps({"sort":"+caughtAt","host":None,"suspended":True,"limit":step,"offset":counter-1}), headers=headers, timeout=5).json()
136                     if doc == []: raise
137                 for instance in doc:
138                     # just in case
139                     if instance["isSuspended"]:
140                         blocks["suspended"].append(
141                             {
142                                 "domain": instance["host"],
143                                 # no reason field, nothing
144                                 "reason": ""
145                             }
146                         )
147                 counter = counter + step
148             except:
149                 counter = 0
150                 break
151
152         while True:
153             # same shit, different asshole ("blocked" aka full suspend)
154             try:
155                 if counter == 0:
156                     doc = post(f"https://{domain}/api/federation/instances", data=dumps({"sort":"+caughtAt","host":None,"blocked":True,"limit":step}), headers=headers, timeout=5).json()
157                     if doc == []: raise
158                 else:
159                     doc = post(f"https://{domain}/api/federation/instances", data=dumps({"sort":"+caughtAt","host":None,"blocked":True,"limit":step,"offset":counter-1}), headers=headers, timeout=5).json()
160                     if doc == []: raise
161                 for instance in doc:
162                     if instance["isBlocked"]:
163                         blocks["blocked"].append(
164                             {
165                                 "domain": instance["host"],
166                                 "reason": ""
167                             }
168                         )
169                 counter = counter + step
170             except:
171                 counter = 0
172                 break
173
174         return {
175             "reject": blocks["blocked"],
176             "followers_only": blocks["suspended"]
177         }
178
179     except:
180         return {}
181
182 def get_hash(domain: str) -> str:
183     return sha256(domain.encode("utf-8")).hexdigest()
184
185
186 def get_type(domain: str) -> str:
187     try:
188         res = get(f"https://{domain}/nodeinfo/2.1.json", headers=headers, timeout=5)
189         if res.status_code == 404:
190             res = get(f"https://{domain}/nodeinfo/2.0", headers=headers, timeout=5)
191         if res.status_code == 404:
192             res = get(f"https://{domain}/nodeinfo/2.0.json", headers=headers, timeout=5)
193         if res.ok and "text/html" in res.headers["content-type"]:
194             res = get(f"https://{domain}/nodeinfo/2.1", headers=headers, timeout=5)
195         if res.ok:
196             if res.json()["software"]["name"] in ["akkoma", "rebased"]:
197                 return "pleroma"
198             elif res.json()["software"]["name"] in ["hometown", "ecko"]:
199                 return "mastodon"
200             elif res.json()["software"]["name"] in ["calckey", "groundpolis", "foundkey", "cherrypick"]:
201                 return "misskey"
202             else:
203                 return res.json()["software"]["name"]
204         elif res.status_code == 404:
205             res = get(f"https://{domain}/api/v1/instance", headers=headers, timeout=5)
206         if res.ok:
207             return "mastodon"
208     except:
209         return None
210
211 def tidyup(domain: str) -> str:
212     # some retards put their blocks in variable case
213     domain = domain.lower()
214     # other retards put the port
215     domain = re.sub("\:\d+$", "", domain)
216     # bigger retards put the schema in their blocklist, sometimes even without slashes
217     domain = re.sub("^https?\:(\/*)", "", domain)
218     # and trailing slash
219     domain = re.sub("\/$", "", domain)
220     # and the @
221     domain = re.sub("^\@", "", domain)
222     # the biggest retards of them all try to block individual users
223     domain = re.sub("(.+)\@", "", domain)
224     return domain
225
226 conn = sqlite3.connect("blocks.db")
227 c = conn.cursor()
228
229 c.execute(
230     "SELECT domain, software FROM  instances WHERE software in ('pleroma', 'mastodon', 'friendica', 'misskey', 'gotosocial')"
231 )
232
233 for blocker, software in c.fetchall():
234     blockdict = []
235     blocker = tidyup(blocker)
236     if software == "pleroma":
237         print(blocker)
238         try:
239             # Blocks
240             federation = get(
241                 f"https://{blocker}/nodeinfo/2.1.json", headers=headers, timeout=5
242             ).json()["metadata"]["federation"]
243             if "mrf_simple" in federation:
244                 for block_level, blocks in (
245                     {**federation["mrf_simple"],
246                     **{"quarantined_instances": federation["quarantined_instances"]}}
247                 ).items():
248                     for blocked in blocks:
249                         blocked = tidyup(blocked)
250                         if blocked == "":
251                             continue
252                         if blocked.count("*") > 1:
253                             # -ACK!-oma also started obscuring domains without hash
254                             c.execute(
255                                 "SELECT domain FROM  instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", (blocked.replace("*", "_"),)
256                             )
257                             searchres = c.fetchone()
258                             if searchres != None:
259                                 blocked = searchres[0]
260
261                         c.execute(
262                             "SELECT domain FROM  instances WHERE domain = ?", (blocked,)
263                         )
264                         if c.fetchone() == None:
265                             c.execute(
266                                 "INSERT INTO instances SELECT ?, ?, ?",
267                                 (blocked, get_hash(blocked), get_type(blocked)),
268                             )
269                         timestamp = int(time())
270                         c.execute(
271                             "SELECT * FROM  blocks WHERE blocker = ? AND blocked = ? AND block_level = ?",
272                             (blocker, blocked, block_level),
273                         )
274                         if c.fetchone() == None:
275                             c.execute(
276                                 "INSERT INTO blocks SELECT ?, ?, '', ?, ?, ?",
277                                 (blocker, blocked, block_level, timestamp, timestamp),
278                             )
279                             if block_level == "reject":
280                                 blockdict.append(
281                                     {
282                                         "blocked": blocked,
283                                         "reason": None
284                                     })
285                         else:
286                             c.execute(
287                                 "UPDATE blocks set last_seen = ? WHERE blocker = ? AND blocked = ? AND block_level = ?",
288                                 (timestamp, blocker, blocked, block_level)
289                             )
290             conn.commit()
291             # Reasons
292             if "mrf_simple_info" in federation:
293                 for block_level, info in (
294                     {**federation["mrf_simple_info"],
295                     **(federation["quarantined_instances_info"]
296                     if "quarantined_instances_info" in federation
297                     else {})}
298                 ).items():
299                     for blocked, reason in info.items():
300                         blocked = tidyup(blocked)
301                         if blocked == "":
302                             continue
303                         if blocked.count("*") > 1:
304                             # same domain guess as above, but for reasons field
305                             c.execute(
306                                 "SELECT domain FROM  instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", (blocked.replace("*", "_"),)
307                             )
308                             searchres = c.fetchone()
309                             if searchres != None:
310                                 blocked = searchres[0]
311                         c.execute(
312                             "UPDATE blocks set reason = ? WHERE blocker = ? AND blocked = ? AND block_level = ? AND reason = ''",
313                             (reason["reason"], blocker, blocked, block_level),
314                         )
315                         for entry in blockdict:
316                             if entry["blocked"] == blocked:
317                                 entry["reason"] = reason["reason"]
318
319             conn.commit()
320         except Exception as e:
321             print("error:", e, blocker)
322     elif software == "mastodon":
323         print(blocker)
324         try:
325             # json endpoint for newer mastodongs
326             try:
327                 json = {
328                     "reject": [],
329                     "media_removal": [],
330                     "followers_only": [],
331                     "report_removal": []
332                 }
333
334                 # handling CSRF, I've saw at least one server requiring it to access the endpoint
335                 meta = BeautifulSoup(
336                     get(f"https://{blocker}/about", headers=headers, timeout=5).text,
337                     "html.parser",
338                 )
339                 try:
340                     csrf = meta.find("meta", attrs={"name": "csrf-token"})["content"]
341                     reqheaders = {**headers, **{"x-csrf-token": csrf}}
342                 except:
343                     reqheaders = headers
344
345                 blocks = get(
346                     f"https://{blocker}/api/v1/instance/domain_blocks", headers=reqheaders, timeout=5
347                 ).json()
348                 for block in blocks:
349                     entry = {'domain': block['domain'], 'hash': block['digest'], 'reason': block['comment']}
350                     if block['severity'] == 'suspend':
351                         json['reject'].append(entry)
352                     elif block['severity'] == 'silence':
353                         json['followers_only'].append(entry)
354                     elif block['severity'] == 'reject_media':
355                         json['media_removal'].append(entry)
356                     elif block['severity'] == 'reject_reports':
357                         json['report_removal'].append(entry)
358             except:
359                 json = get_mastodon_blocks(blocker)
360
361             for block_level, blocks in json.items():
362                 for instance in blocks:
363                     blocked, blocked_hash, reason = instance.values()
364                     blocked = tidyup(blocked)
365                     if blocked.count("*") <= 1:
366                         c.execute(
367                             "SELECT hash FROM  instances WHERE hash = ?", (blocked_hash,)
368                         )
369                         if c.fetchone() == None:
370                             c.execute(
371                                 "INSERT INTO instances SELECT ?, ?, ?",
372                                 (blocked, get_hash(blocked), get_type(blocked)),
373                             )
374                     else:
375                         # Doing the hash search for instance names as well to tidy up DB
376                         c.execute(
377                             "SELECT domain FROM  instances WHERE hash = ?", (blocked_hash,)
378                         )
379                         searchres = c.fetchone()
380                         if searchres != None:
381                             blocked = searchres[0]
382
383                     timestamp = int(time())
384                     c.execute(
385                         "SELECT * FROM  blocks WHERE blocker = ? AND blocked = ? AND block_level = ?",
386                         (blocker, blocked if blocked.count("*") <= 1 else blocked_hash, block_level),
387                     )
388                     if c.fetchone() == None:
389                         c.execute(
390                             "INSERT INTO blocks SELECT ?, ?, ?, ?, ?, ?",
391                             (
392                                 blocker,
393                                 blocked if blocked.count("*") <= 1 else blocked_hash,
394                                 reason,
395                                 block_level,
396                                 timestamp,
397                                 timestamp,
398                             ),
399                         )
400                         if block_level == "reject":
401                             blockdict.append(
402                                 {
403                                     "blocked": blocked,
404                                     "reason": reason
405                                 })
406                     else:
407                         c.execute(
408                             "UPDATE blocks set last_seen = ? WHERE blocker = ? AND blocked = ? AND block_level = ?",
409                             (timestamp, blocker, blocked if blocked.count("*") <= 1 else blocked_hash, block_level),
410                         )
411                     if reason != '':
412                         c.execute(
413                             "UPDATE blocks set reason = ? WHERE blocker = ? AND blocked = ? AND block_level = ? AND reason = ''",
414                             (reason, blocker, blocked if blocked.count("*") <= 1 else blocked_hash, block_level),
415                         )
416             conn.commit()
417         except Exception as e:
418             print("error:", e, blocker)
419     elif software == "friendica" or software == "misskey":
420         print(blocker)
421         try:
422             if software == "friendica":
423                 json = get_friendica_blocks(blocker)
424             elif software == "misskey":
425                 json = get_pisskey_blocks(blocker)
426             for block_level, blocks in json.items():
427                 for instance in blocks:
428                     blocked, reason = instance.values()
429                     blocked = tidyup(blocked)
430
431                     if blocked.count("*") > 0:
432                         # Some friendica servers also obscure domains without hash
433                         c.execute(
434                             "SELECT domain FROM  instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", (blocked.replace("*", "_"),)
435                         )
436                         searchres = c.fetchone()
437                         if searchres != None:
438                             blocked = searchres[0]
439
440                     if blocked.count("?") > 0:
441                         # Some obscure them with question marks, not sure if that's dependent on version or not
442                         c.execute(
443                             "SELECT domain FROM  instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", (blocked.replace("?", "_"),)
444                         )
445                         searchres = c.fetchone()
446                         if searchres != None:
447                             blocked = searchres[0]
448
449                     c.execute(
450                         "SELECT domain FROM  instances WHERE domain = ?", (blocked,)
451                     )
452                     if c.fetchone() == None:
453                         c.execute(
454                             "INSERT INTO instances SELECT ?, ?, ?",
455                             (blocked, get_hash(blocked), get_type(blocked)),
456                         )
457
458                     timestamp = int(time())
459                     c.execute(
460                         "SELECT * FROM  blocks WHERE blocker = ? AND blocked = ?",
461                         (blocker, blocked),
462                     )
463                     if c.fetchone() == None:
464                         c.execute(
465                             "INSERT INTO blocks SELECT ?, ?, ?, ?, ?, ?",
466                             (
467                                 blocker,
468                                 blocked,
469                                 reason,
470                                 block_level,
471                                 timestamp,
472                                 timestamp
473                             ),
474                         )
475                         if block_level == "reject":
476                             blockdict.append(
477                                 {
478                                     "blocked": blocked,
479                                     "reason": reason
480                                 })
481                     else:
482                         c.execute(
483                             "UPDATE blocks set last_seen = ? WHERE blocker = ? AND blocked = ? AND block_level = ?",
484                             (timestamp, blocker, blocked, block_level),
485                         )
486                     if reason != '':
487                         c.execute(
488                             "UPDATE blocks set reason = ? WHERE blocker = ? AND blocked = ? AND block_level = ? AND reason = ''",
489                             (reason, blocker, blocked, block_level),
490                         )
491             conn.commit()
492         except Exception as e:
493             print("error:", e, blocker)
494     elif software == "gotosocial":
495         print(blocker)
496         try:
497             # Blocks
498             federation = get(
499                 f"https://{blocker}/api/v1/instance/peers?filter=suspended", headers=headers, timeout=5
500             ).json()
501             for peer in federation:
502                 blocked = peer["domain"].lower()
503
504                 if blocked.count("*") > 0:
505                     # GTS does not have hashes for obscured domains, so we have to guess it
506                     c.execute(
507                         "SELECT domain FROM  instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", (blocked.replace("*", "_"),)
508                     )
509                     searchres = c.fetchone()
510                     if searchres != None:
511                         blocked = searchres[0]
512
513                 c.execute(
514                     "SELECT domain FROM  instances WHERE domain = ?", (blocked,)
515                 )
516                 if c.fetchone() == None:
517                     c.execute(
518                         "INSERT INTO instances SELECT ?, ?, ?",
519                         (blocked, get_hash(blocked), get_type(blocked)),
520                     )
521                 c.execute(
522                     "SELECT * FROM  blocks WHERE blocker = ? AND blocked = ? AND block_level = ?",
523                     (blocker, blocked, "reject"),
524                 )
525                 timestamp = int(time())
526                 if c.fetchone() == None:
527                     c.execute(
528                         "INSERT INTO blocks SELECT ?, ?, ?, ?, ?, ?",
529                            (blocker, blocked, "", "reject", timestamp, timestamp),
530                     )
531                     blockdict.append(
532                         {
533                             "blocked": blocked,
534                             "reason": None
535                         })
536                 else:
537                     c.execute(
538                         "UPDATE blocks set last_seen = ? WHERE blocker = ? AND blocked = ? AND block_level = ?",
539                         (timestamp, blocker, blocked, "reject"),
540                     )
541                 if "public_comment" in peer:
542                     reason = peer["public_comment"]
543                     c.execute(
544                         "UPDATE blocks set reason = ? WHERE blocker = ? AND blocked = ? AND block_level = ? AND reason = ''",
545                         (reason, blocker, blocked, "reject"),
546                     )
547                     for entry in blockdict:
548                         if entry["blocked"] == blocked:
549                             entry["reason"] = reason
550             conn.commit()
551         except Exception as e:
552             print("error:", e, blocker)
553
554     if config["bot_enabled"] and len(blockdict) > 0:
555         send_bot_post(blocker, blockdict)
556     blockdict = []
557
558 conn.close()