]> git.mxchange.org Git - fba.git/blob - fetch_blocks.py
Report findings to a bot
[fba.git] / fetch_blocks.py
1 from reqto import get
2 from reqto import post
3 from hashlib import sha256
4 import sqlite3
5 from bs4 import BeautifulSoup
6 from json import dumps
7 from json import loads
8 import re
9 from time import time
10 import itertools
11
12 with open("config.json") as f:
13     config = loads(f.read())
14
15 headers = {
16     "user-agent": config["useragent"]
17 }
18
19 def send_bot_post(instance: str, blocks: dict):
20     message = instance + " has blocked the following instances:\n\n"
21     truncated = False
22     if len(blocks) > 20:
23         truncated = True
24         blocks = blocks[0 : 19]
25     for block in blocks:
26         if block["reason"] == None or block["reason"] == '':
27             message = message + block["blocked"] + " with unspecified reason\n"
28         else:
29             message = message + block["blocked"] + ' for "' + block["reason"] + '"\n'
30     if truncated:
31         message = message + "(the list has been truncated to the first 20 entries)"
32
33     botheaders = {**headers, **{"Authorization": "Bearer " + config["bot_token"]}}
34     req = post(f"{config['bot_instance']}/api/v1/statuses",
35         data={"status":message, "visibility":config['bot_visibility'], "content_type":"text/plain"},
36         headers=botheaders, timeout=10).json()
37     print(req)
38     return True
39
40 def get_mastodon_blocks(domain: str) -> dict:
41     blocks = {
42         "Suspended servers": [],
43         "Filtered media": [],
44         "Limited servers": [],
45         "Silenced servers": [],
46     }
47
48     translations = {
49         "Silenced instances": "Silenced servers",
50         "Suspended instances": "Suspended servers",
51         "Gesperrte Server": "Suspended servers",
52         "Gefilterte Medien": "Filtered media",
53         "Stummgeschaltete Server": "Silenced servers",
54         "停止済みのサーバー": "Suspended servers",
55         "メディアを拒否しているサーバー": "Filtered media",
56         "サイレンス済みのサーバー": "Silenced servers",
57         "שרתים מושעים": "Suspended servers",
58         "מדיה מסוננת": "Filtered media",
59         "שרתים מוגבלים": "Silenced servers",
60         "Serveurs suspendus": "Suspended servers",
61         "Médias filtrés": "Filtered media",
62         "Serveurs limités": "Silenced servers",
63     }
64
65     try:
66         doc = BeautifulSoup(
67             get(f"https://{domain}/about/more", headers=headers, timeout=5).text,
68             "html.parser",
69         )
70     except:
71         return {}
72
73     for header in doc.find_all("h3"):
74         header_text = header.text
75         if header_text in translations:
76             header_text = translations[header_text]
77         if header_text in blocks:
78             # replaced find_next_siblings with find_all_next to account for instances that e.g. hide lists in dropdown menu
79             for line in header.find_all_next("table")[0].find_all("tr")[1:]:
80                 blocks[header_text].append(
81                     {
82                         "domain": line.find("span").text,
83                         "hash": line.find("span")["title"][9:],
84                         "reason": line.find_all("td")[1].text.strip(),
85                     }
86                 )
87     return {
88         "reject": blocks["Suspended servers"],
89         "media_removal": blocks["Filtered media"],
90         "followers_only": blocks["Limited servers"]
91         + blocks["Silenced servers"],
92     }
93
94 def get_friendica_blocks(domain: str) -> dict:
95     blocks = []
96
97     try:
98         doc = BeautifulSoup(
99             get(f"https://{domain}/friendica", headers=headers, timeout=5).text,
100             "html.parser",
101         )
102     except:
103         return {}
104
105     blocklist = doc.find(id="about_blocklist")
106     for line in blocklist.find("table").find_all("tr")[1:]:
107             blocks.append(
108                 {
109                     "domain": line.find_all("td")[0].text.strip(),
110                     "reason": line.find_all("td")[1].text.strip()
111                 }
112             )
113
114     return {
115         "reject": blocks
116     }
117
118 def get_pisskey_blocks(domain: str) -> dict:
119     blocks = {
120         "suspended": [],
121         "blocked": []
122     }
123
124     try:
125         counter = 0
126         step = 99
127         while True:
128             # iterating through all "suspended" (follow-only in its terminology) instances page-by-page, since that troonware doesn't support sending them all at once
129             try:
130                 if counter == 0:
131                     doc = post(f"https://{domain}/api/federation/instances", data=dumps({"sort":"+caughtAt","host":None,"suspended":True,"limit":step}), headers=headers, timeout=5).json()
132                     if doc == []: raise
133                 else:
134                     doc = post(f"https://{domain}/api/federation/instances", data=dumps({"sort":"+caughtAt","host":None,"suspended":True,"limit":step,"offset":counter-1}), headers=headers, timeout=5).json()
135                     if doc == []: raise
136                 for instance in doc:
137                     # just in case
138                     if instance["isSuspended"]:
139                         blocks["suspended"].append(
140                             {
141                                 "domain": instance["host"],
142                                 # no reason field, nothing
143                                 "reason": ""
144                             }
145                         )
146                 counter = counter + step
147             except:
148                 counter = 0
149                 break
150
151         while True:
152             # same shit, different asshole ("blocked" aka full suspend)
153             try:
154                 if counter == 0:
155                     doc = post(f"https://{domain}/api/federation/instances", data=dumps({"sort":"+caughtAt","host":None,"blocked":True,"limit":step}), headers=headers, timeout=5).json()
156                     if doc == []: raise
157                 else:
158                     doc = post(f"https://{domain}/api/federation/instances", data=dumps({"sort":"+caughtAt","host":None,"blocked":True,"limit":step,"offset":counter-1}), headers=headers, timeout=5).json()
159                     if doc == []: raise
160                 for instance in doc:
161                     if instance["isBlocked"]:
162                         blocks["blocked"].append(
163                             {
164                                 "domain": instance["host"],
165                                 "reason": ""
166                             }
167                         )
168                 counter = counter + step
169             except:
170                 counter = 0
171                 break
172
173         return {
174             "reject": blocks["blocked"],
175             "followers_only": blocks["suspended"]
176         }
177
178     except:
179         return {}
180
181 def get_hash(domain: str) -> str:
182     return sha256(domain.encode("utf-8")).hexdigest()
183
184
185 def get_type(domain: str) -> str:
186     try:
187         res = get(f"https://{domain}/nodeinfo/2.1.json", headers=headers, timeout=5)
188         if res.status_code == 404:
189             res = get(f"https://{domain}/nodeinfo/2.0", headers=headers, timeout=5)
190         if res.status_code == 404:
191             res = get(f"https://{domain}/nodeinfo/2.0.json", headers=headers, timeout=5)
192         if res.ok and "text/html" in res.headers["content-type"]:
193             res = get(f"https://{domain}/nodeinfo/2.1", headers=headers, timeout=5)
194         if res.ok:
195             if res.json()["software"]["name"] in ["akkoma", "rebased"]:
196                 return "pleroma"
197             elif res.json()["software"]["name"] in ["hometown", "ecko"]:
198                 return "mastodon"
199             elif res.json()["software"]["name"] in ["calckey", "groundpolis", "foundkey", "cherrypick"]:
200                 return "misskey"
201             else:
202                 return res.json()["software"]["name"]
203         elif res.status_code == 404:
204             res = get(f"https://{domain}/api/v1/instance", headers=headers, timeout=5)
205         if res.ok:
206             return "mastodon"
207     except:
208         return None
209
210 def tidyup(domain: str) -> str:
211     # some retards put their blocks in variable case
212     domain = domain.lower()
213     # other retards put the port
214     domain = re.sub("\:\d+$", "", domain)
215     # bigger retards put the schema in their blocklist, sometimes even without slashes
216     domain = re.sub("^https?\:(\/*)", "", domain)
217     # and trailing slash
218     domain = re.sub("\/$", "", domain)
219     # and the @
220     domain = re.sub("^\@", "", domain)
221     # the biggest retards of them all try to block individual users
222     domain = re.sub("(.+)\@", "", domain)
223     return domain
224
225 conn = sqlite3.connect("blocks.db")
226 c = conn.cursor()
227
228 c.execute(
229     #"select domain, software from instances where software in ('pleroma', 'mastodon', 'friendica', 'misskey', 'gotosocial')"
230     "select domain, software from instances where domain = 'mstdn.social'"
231 )
232
233 for blocker, software in c.fetchall():
234     blockdict = []
235     blocker = tidyup(blocker)
236     if software == "pleroma":
237         print(blocker)
238         try:
239             # Blocks
240             federation = get(
241                 f"https://{blocker}/nodeinfo/2.1.json", headers=headers, timeout=5
242             ).json()["metadata"]["federation"]
243             if "mrf_simple" in federation:
244                 for block_level, blocks in (
245                     {**federation["mrf_simple"],
246                     **{"quarantined_instances": federation["quarantined_instances"]}}
247                 ).items():
248                     for blocked in blocks:
249                         blocked = tidyup(blocked)
250                         if blocked == "":
251                             continue
252                         if blocked.count("*") > 1:
253                             # -ACK!-oma also started obscuring domains without hash
254                             c.execute(
255                                 "select domain from instances where domain like ? order by rowid limit 1", (blocked.replace("*", "_"),)
256                             )
257                             searchres = c.fetchone()
258                             if searchres != None:
259                                 blocked = searchres[0]
260
261                         c.execute(
262                             "select domain from instances where domain = ?", (blocked,)
263                         )
264                         if c.fetchone() == None:
265                             c.execute(
266                                 "insert into instances select ?, ?, ?",
267                                 (blocked, get_hash(blocked), get_type(blocked)),
268                             )
269                         timestamp = int(time())
270                         c.execute(
271                             "select * from blocks where blocker = ? and blocked = ? and block_level = ?",
272                             (blocker, blocked, block_level),
273                         )
274                         if c.fetchone() == None:
275                             c.execute(
276                                 "insert into blocks select ?, ?, '', ?, ?, ?",
277                                 (blocker, blocked, block_level, timestamp, timestamp),
278                             )
279                             if block_level == "reject":
280                                 blockdict.append(
281                                     {
282                                         "blocked": blocked,
283                                         "reason": None
284                                     })
285                         else:
286                             c.execute(
287                                 "update blocks set last_seen = ? where blocker = ? and blocked = ? and block_level = ?",
288                                 (timestamp, blocker, blocked, block_level)
289                             )
290             conn.commit()
291             # Reasons
292             if "mrf_simple_info" in federation:
293                 for block_level, info in (
294                     {**federation["mrf_simple_info"],
295                     **(federation["quarantined_instances_info"]
296                     if "quarantined_instances_info" in federation
297                     else {})}
298                 ).items():
299                     for blocked, reason in info.items():
300                         blocked = tidyup(blocked)
301                         if blocked == "":
302                             continue
303                         if blocked.count("*") > 1:
304                             # same domain guess as above, but for reasons field
305                             c.execute(
306                                 "select domain from instances where domain like ? order by rowid limit 1", (blocked.replace("*", "_"),)
307                             )
308                             searchres = c.fetchone()
309                             if searchres != None:
310                                 blocked = searchres[0]
311                         c.execute(
312                             "update blocks set reason = ? where blocker = ? and blocked = ? and block_level = ? and reason = ''",
313                             (reason["reason"], blocker, blocked, block_level),
314                         )
315                         for entry in blockdict:
316                             if entry["blocked"] == blocked:
317                                 entry["reason"] = reason["reason"]
318
319             conn.commit()
320         except Exception as e:
321             print("error:", e, blocker)
322     elif software == "mastodon":
323         print(blocker)
324         try:
325             # json endpoint for newer mastodongs
326             try:
327                 json = {
328                     "reject": [],
329                     "media_removal": [],
330                     "followers_only": [],
331                     "report_removal": []
332                 }
333
334                 # handling CSRF, I've saw at least one server requiring it to access the endpoint
335                 meta = BeautifulSoup(
336                     get(f"https://{blocker}/about", headers=headers, timeout=5).text,
337                     "html.parser",
338                 )
339                 try:
340                     csrf = meta.find("meta", attrs={"name": "csrf-token"})["content"]
341                     reqheaders = {**headers, **{"x-csrf-token": csrf}}
342                 except:
343                     reqheaders = headers
344
345                 blocks = get(
346                     f"https://{blocker}/api/v1/instance/domain_blocks", headers=reqheaders, timeout=5
347                 ).json()
348                 for block in blocks:
349                     entry = {'domain': block['domain'], 'hash': block['digest'], 'reason': block['comment']}
350                     if block['severity'] == 'suspend':
351                         json['reject'].append(entry)
352                     elif block['severity'] == 'silence':
353                         json['followers_only'].append(entry)
354                     elif block['severity'] == 'reject_media':
355                         json['media_removal'].append(entry)
356                     elif block['severity'] == 'reject_reports':
357                         json['report_removal'].append(entry)
358             except:
359                 json = get_mastodon_blocks(blocker)
360
361             for block_level, blocks in json.items():
362                 for instance in blocks:
363                     blocked, blocked_hash, reason = instance.values()
364                     blocked = tidyup(blocked)
365                     if blocked.count("*") <= 1:
366                         c.execute(
367                             "select hash from instances where hash = ?", (blocked_hash,)
368                         )
369                         if c.fetchone() == None:
370                             c.execute(
371                                 "insert into instances select ?, ?, ?",
372                                 (blocked, get_hash(blocked), get_type(blocked)),
373                             )
374                     else:
375                         # Doing the hash search for instance names as well to tidy up DB
376                         c.execute(
377                             "select domain from instances where hash = ?", (blocked_hash,)
378                         )
379                         searchres = c.fetchone()
380                         if searchres != None:
381                             blocked = searchres[0]
382
383                     timestamp = int(time())
384                     c.execute(
385                         "select * from blocks where blocker = ? and blocked = ? and block_level = ?",
386                         (blocker, blocked if blocked.count("*") <= 1 else blocked_hash, block_level),
387                     )
388                     if c.fetchone() == None:
389                         c.execute(
390                             "insert into blocks select ?, ?, ?, ?, ?, ?",
391                             (
392                                 blocker,
393                                 blocked if blocked.count("*") <= 1 else blocked_hash,
394                                 reason,
395                                 block_level,
396                                 timestamp,
397                                 timestamp,
398                             ),
399                         )
400                         if block_level == "reject":
401                             blockdict.append(
402                                 {
403                                     "blocked": blocked,
404                                     "reason": reason
405                                 })
406                     else:
407                         c.execute(
408                             "update blocks set last_seen = ? where blocker = ? and blocked = ? and block_level = ?",
409                             (timestamp, blocker, blocked if blocked.count("*") <= 1 else blocked_hash, block_level),
410                         )
411                     if reason != '':
412                         c.execute(
413                             "update blocks set reason = ? where blocker = ? and blocked = ? and block_level = ? and reason = ''",
414                             (reason, blocker, blocked if blocked.count("*") <= 1 else blocked_hash, block_level),
415                         )
416             conn.commit()
417         except Exception as e:
418             print("error:", e, blocker)
419     elif software == "friendica" or software == "misskey":
420         print(blocker)
421         try:
422             if software == "friendica":
423                 json = get_friendica_blocks(blocker)
424             elif software == "misskey":
425                 json = get_pisskey_blocks(blocker)
426             for block_level, blocks in json.items():
427                 for instance in blocks:
428                     blocked, reason = instance.values()
429                     blocked = tidyup(blocked)
430
431                     if blocked.count("*") > 0:
432                         # Some friendica servers also obscure domains without hash
433                         c.execute(
434                             "select domain from instances where domain like ? order by rowid limit 1", (blocked.replace("*", "_"),)
435                         )
436                         searchres = c.fetchone()
437                         if searchres != None:
438                             blocked = searchres[0]
439
440                     if blocked.count("?") > 0:
441                         # Some obscure them with question marks, not sure if that's dependent on version or not
442                         c.execute(
443                             "select domain from instances where domain like ? order by rowid limit 1", (blocked.replace("?", "_"),)
444                         )
445                         searchres = c.fetchone()
446                         if searchres != None:
447                             blocked = searchres[0]
448
449                     c.execute(
450                         "select domain from instances where domain = ?", (blocked,)
451                     )
452                     if c.fetchone() == None:
453                         c.execute(
454                             "insert into instances select ?, ?, ?",
455                             (blocked, get_hash(blocked), get_type(blocked)),
456                         )
457
458                     timestamp = int(time())
459                     c.execute(
460                         "select * from blocks where blocker = ? and blocked = ?",
461                         (blocker, blocked),
462                     )
463                     if c.fetchone() == None:
464                         c.execute(
465                             "insert into blocks select ?, ?, ?, ?, ?, ?",
466                             (
467                                 blocker,
468                                 blocked,
469                                 reason,
470                                 block_level,
471                                 timestamp,
472                                 timestamp
473                             ),
474                         )
475                         if block_level == "reject":
476                             blockdict.append(
477                                 {
478                                     "blocked": blocked,
479                                     "reason": reason
480                                 })
481                     else:
482                         c.execute(
483                             "update blocks set last_seen = ? where blocker = ? and blocked = ? and block_level = ?",
484                             (timestamp, blocker, blocked, block_level),
485                         )
486                     if reason != '':
487                         c.execute(
488                             "update blocks set reason = ? where blocker = ? and blocked = ? and block_level = ? and reason = ''",
489                             (reason, blocker, blocked, block_level),
490                         )
491             conn.commit()
492         except Exception as e:
493             print("error:", e, blocker)
494     elif software == "gotosocial":
495         print(blocker)
496         try:
497             # Blocks
498             federation = get(
499                 f"https://{blocker}/api/v1/instance/peers?filter=suspended", headers=headers, timeout=5
500             ).json()
501             for peer in federation:
502                 blocked = peer["domain"].lower()
503
504                 if blocked.count("*") > 0:
505                     # GTS does not have hashes for obscured domains, so we have to guess it
506                     c.execute(
507                         "select domain from instances where domain like ? order by rowid limit 1", (blocked.replace("*", "_"),)
508                     )
509                     searchres = c.fetchone()
510                     if searchres != None:
511                         blocked = searchres[0]
512
513                 c.execute(
514                     "select domain from instances where domain = ?", (blocked,)
515                 )
516                 if c.fetchone() == None:
517                     c.execute(
518                         "insert into instances select ?, ?, ?",
519                         (blocked, get_hash(blocked), get_type(blocked)),
520                     )
521                 c.execute(
522                     "select * from blocks where blocker = ? and blocked = ? and block_level = ?",
523                     (blocker, blocked, "reject"),
524                 )
525                 timestamp = int(time())
526                 if c.fetchone() == None:
527                     c.execute(
528                         "insert into blocks select ?, ?, ?, ?, ?, ?",
529                            (blocker, blocked, "", "reject", timestamp, timestamp),
530                     )
531                     blockdict.append(
532                         {
533                             "blocked": blocked,
534                             "reason": None
535                         })
536                 else:
537                     c.execute(
538                         "update blocks set last_seen = ? where blocker = ? and blocked = ? and block_level = ?",
539                         (timestamp, blocker, blocked, "reject"),
540                     )
541                 if "public_comment" in peer:
542                     reason = peer["public_comment"]
543                     c.execute(
544                         "update blocks set reason = ? where blocker = ? and blocked = ? and block_level = ? and reason = ''",
545                         (reason, blocker, blocked, "reject"),
546                     )
547                     for entry in blockdict:
548                         if entry["blocked"] == blocked:
549                             entry["reason"] = reason
550             conn.commit()
551         except Exception as e:
552             print("error:", e, blocker)
553
554     if config["bot_enabled"] and len(blockdict) > 0:
555         send_bot_post(blocker, blockdict)
556     blockdict = []
557
558 conn.close()