]> git.mxchange.org Git - fba.git/blob - fetch_blocks.py
Handle CSRF tokens on masto endpoint
[fba.git] / fetch_blocks.py
1 from reqto import get
2 from reqto import post
3 from hashlib import sha256
4 import sqlite3
5 from bs4 import BeautifulSoup
6 from json import dumps
7 from json import loads
8 import re
9 from time import time
10
11 with open("config.json") as f:
12     config = loads(f.read())
13
14 headers = {
15     "user-agent": config["useragent"]
16 }
17
18
19 def get_mastodon_blocks(domain: str) -> dict:
20     blocks = {
21         "Suspended servers": [],
22         "Filtered media": [],
23         "Limited servers": [],
24         "Silenced servers": [],
25     }
26
27     translations = {
28         "Silenced instances": "Silenced servers",
29         "Suspended instances": "Suspended servers",
30         "Gesperrte Server": "Suspended servers",
31         "Gefilterte Medien": "Filtered media",
32         "Stummgeschaltete Server": "Silenced servers",
33         "停止済みのサーバー": "Suspended servers",
34         "メディアを拒否しているサーバー": "Filtered media",
35         "サイレンス済みのサーバー": "Silenced servers",
36         "שרתים מושעים": "Suspended servers",
37         "מדיה מסוננת": "Filtered media",
38         "שרתים מוגבלים": "Silenced servers",
39         "Serveurs suspendus": "Suspended servers",
40         "Médias filtrés": "Filtered media",
41         "Serveurs limités": "Silenced servers",
42     }
43
44     try:
45         doc = BeautifulSoup(
46             get(f"https://{domain}/about/more", headers=headers, timeout=5).text,
47             "html.parser",
48         )
49     except:
50         return {}
51
52     for header in doc.find_all("h3"):
53         header_text = header.text
54         if header_text in translations:
55             header_text = translations[header_text]
56         if header_text in blocks:
57             # replaced find_next_siblings with find_all_next to account for instances that e.g. hide lists in dropdown menu
58             for line in header.find_all_next("table")[0].find_all("tr")[1:]:
59                 blocks[header_text].append(
60                     {
61                         "domain": line.find("span").text,
62                         "hash": line.find("span")["title"][9:],
63                         "reason": line.find_all("td")[1].text.strip(),
64                     }
65                 )
66     return {
67         "reject": blocks["Suspended servers"],
68         "media_removal": blocks["Filtered media"],
69         "followers_only": blocks["Limited servers"]
70         + blocks["Silenced servers"],
71     }
72
73 def get_friendica_blocks(domain: str) -> dict:
74     blocks = []
75
76     try:
77         doc = BeautifulSoup(
78             get(f"https://{domain}/friendica", headers=headers, timeout=5).text,
79             "html.parser",
80         )
81     except:
82         return {}
83
84     blocklist = doc.find(id="about_blocklist")
85     for line in blocklist.find("table").find_all("tr")[1:]:
86             blocks.append(
87                 {
88                     "domain": line.find_all("td")[0].text.strip(),
89                     "reason": line.find_all("td")[1].text.strip()
90                 }
91             )
92
93     return {
94         "reject": blocks
95     }
96
97 def get_pisskey_blocks(domain: str) -> dict:
98     blocks = {
99         "suspended": [],
100         "blocked": []
101     }
102
103     try:
104         counter = 0
105         step = 99
106         while True:
107             # iterating through all "suspended" (follow-only in its terminology) instances page-by-page, since that troonware doesn't support sending them all at once
108             try:
109                 if counter == 0:
110                     doc = post(f"https://{domain}/api/federation/instances", data=dumps({"sort":"+caughtAt","host":None,"suspended":True,"limit":step}), headers=headers, timeout=5).json()
111                     if doc == []: raise
112                 else:
113                     doc = post(f"https://{domain}/api/federation/instances", data=dumps({"sort":"+caughtAt","host":None,"suspended":True,"limit":step,"offset":counter-1}), headers=headers, timeout=5).json()
114                     if doc == []: raise
115                 for instance in doc:
116                     # just in case
117                     if instance["isSuspended"]:
118                         blocks["suspended"].append(
119                             {
120                                 "domain": instance["host"],
121                                 # no reason field, nothing
122                                 "reason": ""
123                             }
124                         )
125                 counter = counter + step
126             except:
127                 counter = 0
128                 break
129
130         while True:
131             # same shit, different asshole ("blocked" aka full suspend)
132             try:
133                 if counter == 0:
134                     doc = post(f"https://{domain}/api/federation/instances", data=dumps({"sort":"+caughtAt","host":None,"blocked":True,"limit":step}), headers=headers, timeout=5).json()
135                     if doc == []: raise
136                 else:
137                     doc = post(f"https://{domain}/api/federation/instances", data=dumps({"sort":"+caughtAt","host":None,"blocked":True,"limit":step,"offset":counter-1}), headers=headers, timeout=5).json()
138                     if doc == []: raise
139                 for instance in doc:
140                     if instance["isBlocked"]:
141                         blocks["blocked"].append(
142                             {
143                                 "domain": instance["host"],
144                                 "reason": ""
145                             }
146                         )
147                 counter = counter + step
148             except:
149                 counter = 0
150                 break
151
152         return {
153             "reject": blocks["blocked"],
154             "followers_only": blocks["suspended"]
155         }
156
157     except:
158         return {}
159
160 def get_hash(domain: str) -> str:
161     return sha256(domain.encode("utf-8")).hexdigest()
162
163
164 def get_type(domain: str) -> str:
165     try:
166         res = get(f"https://{domain}/nodeinfo/2.1.json", headers=headers, timeout=5)
167         if res.status_code == 404:
168             res = get(f"https://{domain}/nodeinfo/2.0", headers=headers, timeout=5)
169         if res.status_code == 404:
170             res = get(f"https://{domain}/nodeinfo/2.0.json", headers=headers, timeout=5)
171         if res.ok and "text/html" in res.headers["content-type"]:
172             res = get(f"https://{domain}/nodeinfo/2.1", headers=headers, timeout=5)
173         if res.ok:
174             if res.json()["software"]["name"] in ["akkoma", "rebased"]:
175                 return "pleroma"
176             elif res.json()["software"]["name"] in ["hometown", "ecko"]:
177                 return "mastodon"
178             elif res.json()["software"]["name"] in ["calckey", "groundpolis", "foundkey", "cherrypick"]:
179                 return "misskey"
180             else:
181                 return res.json()["software"]["name"]
182         elif res.status_code == 404:
183             res = get(f"https://{domain}/api/v1/instance", headers=headers, timeout=5)
184         if res.ok:
185             return "mastodon"
186     except:
187         return None
188
189 def tidyup(domain: str) -> str:
190     # some retards put their blocks in variable case
191     domain = domain.lower()
192     # other retards put the port
193     domain = re.sub("\:\d+$", "", domain)
194     # bigger retards put the schema in their blocklist, sometimes even without slashes
195     domain = re.sub("^https?\:(\/*)", "", domain)
196     # and trailing slash
197     domain = re.sub("\/$", "", domain)
198     # and the @
199     domain = re.sub("^\@", "", domain)
200     # the biggest retards of them all try to block individual users
201     domain = re.sub("(.+)\@", "", domain)
202     return domain
203
204 conn = sqlite3.connect("blocks.db")
205 c = conn.cursor()
206
207 c.execute(
208     "select domain, software from instances where software in ('pleroma', 'mastodon', 'friendica', 'misskey', 'gotosocial')"
209 )
210
211 for blocker, software in c.fetchall():
212     blocker = tidyup(blocker)
213     if software == "pleroma":
214         print(blocker)
215         try:
216             # Blocks
217             federation = get(
218                 f"https://{blocker}/nodeinfo/2.1.json", headers=headers, timeout=5
219             ).json()["metadata"]["federation"]
220             if "mrf_simple" in federation:
221                 for block_level, blocks in (
222                     {**federation["mrf_simple"],
223                     **{"quarantined_instances": federation["quarantined_instances"]}}
224                 ).items():
225                     for blocked in blocks:
226                         blocked = tidyup(blocked)
227                         if blocked == "":
228                             continue
229                         if blocked.count("*") > 1:
230                             # -ACK!-oma also started obscuring domains without hash
231                             c.execute(
232                                 "select domain from instances where domain like ? order by rowid limit 1", (blocked.replace("*", "_"),)
233                             )
234                             searchres = c.fetchone()
235                             if searchres != None:
236                                 blocked = searchres[0]
237
238                         c.execute(
239                             "select domain from instances where domain = ?", (blocked,)
240                         )
241                         if c.fetchone() == None:
242                             c.execute(
243                                 "insert into instances select ?, ?, ?",
244                                 (blocked, get_hash(blocked), get_type(blocked)),
245                             )
246                         timestamp = int(time())
247                         c.execute(
248                             "select * from blocks where blocker = ? and blocked = ? and block_level = ?",
249                             (blocker, blocked, block_level),
250                         )
251                         if c.fetchone() == None:
252                             c.execute(
253                                 "insert into blocks select ?, ?, '', ?, ?, ?",
254                                 (blocker, blocked, block_level, timestamp, timestamp),
255                             )
256                         else:
257                             c.execute(
258                                 "update blocks set last_seen = ? where blocker = ? and blocked = ? and block_level = ?",
259                                 (timestamp, blocker, blocked, block_level)
260                             )
261             conn.commit()
262             # Reasons
263             if "mrf_simple_info" in federation:
264                 for block_level, info in (
265                     {**federation["mrf_simple_info"],
266                     **(federation["quarantined_instances_info"]
267                     if "quarantined_instances_info" in federation
268                     else {})}
269                 ).items():
270                     for blocked, reason in info.items():
271                         blocked = tidyup(blocked)
272                         if blocked == "":
273                             continue
274                         if blocked.count("*") > 1:
275                             # same domain guess as above, but for reasons field
276                             c.execute(
277                                 "select domain from instances where domain like ? order by rowid limit 1", (blocked.replace("*", "_"),)
278                             )
279                             searchres = c.fetchone()
280                             if searchres != None:
281                                 blocked = searchres[0]
282                         c.execute(
283                             "update blocks set reason = ? where blocker = ? and blocked = ? and block_level = ? and reason = ''",
284                             (reason["reason"], blocker, blocked, block_level),
285                         )
286             conn.commit()
287         except Exception as e:
288             print("error:", e, blocker)
289     elif software == "mastodon":
290         print(blocker)
291         try:
292             # json endpoint for newer mastodongs
293             try:
294                 json = {
295                     "reject": [],
296                     "media_removal": [],
297                     "followers_only": [],
298                     "report_removal": []
299                 }
300
301                 # handling CSRF, I've saw at least one server requiring it to access the endpoint
302                 meta = BeautifulSoup(
303                     get(f"https://{blocker}/about", headers=headers, timeout=5).text,
304                     "html.parser",
305                 )
306                 try:
307                     csrf = meta.find("meta", attrs={"name": "csrf-token"})["content"]
308                     reqheaders = {**headers, **{"x-csrf-token": csrf}}
309                 except:
310                     reqheaders = headers
311
312                 blocks = get(
313                     f"https://{blocker}/api/v1/instance/domain_blocks", headers=reqheaders, timeout=5
314                 ).json()
315                 for block in blocks:
316                     entry = {'domain': block['domain'], 'hash': block['digest'], 'reason': block['comment']}
317                     if block['severity'] == 'suspend':
318                         json['reject'].append(entry)
319                     elif block['severity'] == 'silence':
320                         json['followers_only'].append(entry)
321                     elif block['severity'] == 'reject_media':
322                         json['media_removal'].append(entry)
323                     elif block['severity'] == 'reject_reports':
324                         json['report_removal'].append(entry)
325             except:
326                 json = get_mastodon_blocks(blocker)
327
328             for block_level, blocks in json.items():
329                 for instance in blocks:
330                     blocked, blocked_hash, reason = instance.values()
331                     blocked = tidyup(blocked)
332                     if blocked.count("*") <= 1:
333                         c.execute(
334                             "select hash from instances where hash = ?", (blocked_hash,)
335                         )
336                         if c.fetchone() == None:
337                             c.execute(
338                                 "insert into instances select ?, ?, ?",
339                                 (blocked, get_hash(blocked), get_type(blocked)),
340                             )
341                     else:
342                         # Doing the hash search for instance names as well to tidy up DB
343                         c.execute(
344                             "select domain from instances where hash = ?", (blocked_hash,)
345                         )
346                         searchres = c.fetchone()
347                         if searchres != None:
348                             blocked = searchres[0]
349
350                     timestamp = int(time())
351                     c.execute(
352                         "select * from blocks where blocker = ? and blocked = ? and block_level = ?",
353                         (blocker, blocked if blocked.count("*") <= 1 else blocked_hash, block_level),
354                     )
355                     if c.fetchone() == None:
356                         c.execute(
357                             "insert into blocks select ?, ?, ?, ?, ?, ?",
358                             (
359                                 blocker,
360                                 blocked if blocked.count("*") <= 1 else blocked_hash,
361                                 reason,
362                                 block_level,
363                                 timestamp,
364                                 timestamp,
365                             ),
366                         )
367                     else:
368                         c.execute(
369                             "update blocks set last_seen = ? where blocker = ? and blocked = ? and block_level = ?",
370                             (timestamp, blocker, blocked if blocked.count("*") <= 1 else blocked_hash, block_level),
371                         )
372                     if reason != '':
373                         c.execute(
374                             "update blocks set reason = ? where blocker = ? and blocked = ? and block_level = ? and reason = ''",
375                             (reason, blocker, blocked if blocked.count("*") <= 1 else blocked_hash, block_level),
376                         )
377             conn.commit()
378         except Exception as e:
379             print("error:", e, blocker)
380     elif software == "friendica" or software == "misskey":
381         print(blocker)
382         try:
383             if software == "friendica":
384                 json = get_friendica_blocks(blocker)
385             elif software == "misskey":
386                 json = get_pisskey_blocks(blocker)
387             for block_level, blocks in json.items():
388                 for instance in blocks:
389                     blocked, reason = instance.values()
390                     blocked = tidyup(blocked)
391
392                     if blocked.count("*") > 0:
393                         # Some friendica servers also obscure domains without hash
394                         c.execute(
395                             "select domain from instances where domain like ? order by rowid limit 1", (blocked.replace("*", "_"),)
396                         )
397                         searchres = c.fetchone()
398                         if searchres != None:
399                             blocked = searchres[0]
400
401                     if blocked.count("?") > 0:
402                         # Some obscure them with question marks, not sure if that's dependent on version or not
403                         c.execute(
404                             "select domain from instances where domain like ? order by rowid limit 1", (blocked.replace("?", "_"),)
405                         )
406                         searchres = c.fetchone()
407                         if searchres != None:
408                             blocked = searchres[0]
409
410                     c.execute(
411                         "select domain from instances where domain = ?", (blocked,)
412                     )
413                     if c.fetchone() == None:
414                         c.execute(
415                             "insert into instances select ?, ?, ?",
416                             (blocked, get_hash(blocked), get_type(blocked)),
417                         )
418
419                     timestamp = int(time())
420                     c.execute(
421                         "select * from blocks where blocker = ? and blocked = ? and reason = ?",
422                         (blocker, blocked, reason),
423                     )
424                     if c.fetchone() == None:
425                         c.execute(
426                             "insert into blocks select ?, ?, ?, ?, ?, ?",
427                             (
428                                 blocker,
429                                 blocked,
430                                 reason,
431                                 block_level,
432                                 timestamp,
433                                 timestamp
434                             ),
435                         )
436                     else:
437                         c.execute(
438                             "update blocks set last_seen = ? where blocker = ? and blocked = ? and block_level = ?",
439                             (timestamp, blocker, blocked, block_level),
440                         )
441                     if reason != '':
442                         c.execute(
443                             "update blocks set reason = ? where blocker = ? and blocked = ? and block_level = ? and reason = ''",
444                             (reason, blocker, blocked, block_level),
445                         )
446             conn.commit()
447         except Exception as e:
448             print("error:", e, blocker)
449     elif software == "gotosocial":
450         print(blocker)
451         try:
452             # Blocks
453             federation = get(
454                 f"https://{blocker}/api/v1/instance/peers?filter=suspended", headers=headers, timeout=5
455             ).json()
456             for peer in federation:
457                 blocked = peer["domain"].lower()
458
459                 if blocked.count("*") > 0:
460                     # GTS does not have hashes for obscured domains, so we have to guess it
461                     c.execute(
462                         "select domain from instances where domain like ? order by rowid limit 1", (blocked.replace("*", "_"),)
463                     )
464                     searchres = c.fetchone()
465                     if searchres != None:
466                         blocked = searchres[0]
467
468                 c.execute(
469                     "select domain from instances where domain = ?", (blocked,)
470                 )
471                 if c.fetchone() == None:
472                     c.execute(
473                         "insert into instances select ?, ?, ?",
474                         (blocked, get_hash(blocked), get_type(blocked)),
475                     )
476                 c.execute(
477                     "select * from blocks where blocker = ? and blocked = ? and block_level = ?",
478                     (blocker, blocked, "reject"),
479                 )
480                 timestamp = int(time())
481                 if c.fetchone() == None:
482                     c.execute(
483                         "insert into blocks select ?, ?, ?, ?, ?, ?",
484                            (blocker, blocked, "", "reject", timestamp, timestamp),
485                     )
486                 else:
487                     c.execute(
488                         "update blocks set last_seen = ? where blocker = ? and blocked = ? and block_level = ?",
489                         (timestamp, blocker, blocked, "reject"),
490                     )
491                 if "public_comment" in peer:
492                     reason = peer["public_comment"]
493                     c.execute(
494                         "update blocks set reason = ? where blocker = ? and blocked = ? and block_level = ? and reason = ''",
495                         (reason, blocker, blocked, "reject"),
496                     )
497             conn.commit()
498         except Exception as e:
499             print("error:", e, blocker)
500 conn.close()