]> git.mxchange.org Git - fba.git/blob - fetch_blocks.py
Remove even more debug code
[fba.git] / fetch_blocks.py
1 from reqto import get
2 from reqto import post
3 from hashlib import sha256
4 import sqlite3
5 from bs4 import BeautifulSoup
6 from json import dumps
7 from json import loads
8 import re
9 from time import time
10 import itertools
11
12 with open("config.json") as f:
13     config = loads(f.read())
14
15 headers = {
16     "user-agent": config["useragent"]
17 }
18
19 def send_bot_post(instance: str, blocks: dict):
20     message = instance + " has blocked the following instances:\n\n"
21     truncated = False
22     if len(blocks) > 20:
23         truncated = True
24         blocks = blocks[0 : 19]
25     for block in blocks:
26         if block["reason"] == None or block["reason"] == '':
27             message = message + block["blocked"] + " with unspecified reason\n"
28         else:
29             message = message + block["blocked"] + ' for "' + block["reason"] + '"\n'
30     if truncated:
31         message = message + "(the list has been truncated to the first 20 entries)"
32
33     botheaders = {**headers, **{"Authorization": "Bearer " + config["bot_token"]}}
34     req = post(f"{config['bot_instance']}/api/v1/statuses",
35         data={"status":message, "visibility":config['bot_visibility'], "content_type":"text/plain"},
36         headers=botheaders, timeout=10).json()
37     return True
38
39 def get_mastodon_blocks(domain: str) -> dict:
40     blocks = {
41         "Suspended servers": [],
42         "Filtered media": [],
43         "Limited servers": [],
44         "Silenced servers": [],
45     }
46
47     translations = {
48         "Silenced instances": "Silenced servers",
49         "Suspended instances": "Suspended servers",
50         "Gesperrte Server": "Suspended servers",
51         "Gefilterte Medien": "Filtered media",
52         "Stummgeschaltete Server": "Silenced servers",
53         "停止済みのサーバー": "Suspended servers",
54         "メディアを拒否しているサーバー": "Filtered media",
55         "サイレンス済みのサーバー": "Silenced servers",
56         "שרתים מושעים": "Suspended servers",
57         "מדיה מסוננת": "Filtered media",
58         "שרתים מוגבלים": "Silenced servers",
59         "Serveurs suspendus": "Suspended servers",
60         "Médias filtrés": "Filtered media",
61         "Serveurs limités": "Silenced servers",
62     }
63
64     try:
65         doc = BeautifulSoup(
66             get(f"https://{domain}/about/more", headers=headers, timeout=5).text,
67             "html.parser",
68         )
69     except:
70         return {}
71
72     for header in doc.find_all("h3"):
73         header_text = header.text
74         if header_text in translations:
75             header_text = translations[header_text]
76         if header_text in blocks:
77             # replaced find_next_siblings with find_all_next to account for instances that e.g. hide lists in dropdown menu
78             for line in header.find_all_next("table")[0].find_all("tr")[1:]:
79                 blocks[header_text].append(
80                     {
81                         "domain": line.find("span").text,
82                         "hash": line.find("span")["title"][9:],
83                         "reason": line.find_all("td")[1].text.strip(),
84                     }
85                 )
86     return {
87         "reject": blocks["Suspended servers"],
88         "media_removal": blocks["Filtered media"],
89         "followers_only": blocks["Limited servers"]
90         + blocks["Silenced servers"],
91     }
92
93 def get_friendica_blocks(domain: str) -> dict:
94     blocks = []
95
96     try:
97         doc = BeautifulSoup(
98             get(f"https://{domain}/friendica", headers=headers, timeout=5).text,
99             "html.parser",
100         )
101     except:
102         return {}
103
104     blocklist = doc.find(id="about_blocklist")
105     for line in blocklist.find("table").find_all("tr")[1:]:
106             blocks.append(
107                 {
108                     "domain": line.find_all("td")[0].text.strip(),
109                     "reason": line.find_all("td")[1].text.strip()
110                 }
111             )
112
113     return {
114         "reject": blocks
115     }
116
117 def get_pisskey_blocks(domain: str) -> dict:
118     blocks = {
119         "suspended": [],
120         "blocked": []
121     }
122
123     try:
124         counter = 0
125         step = 99
126         while True:
127             # iterating through all "suspended" (follow-only in its terminology) instances page-by-page, since that troonware doesn't support sending them all at once
128             try:
129                 if counter == 0:
130                     doc = post(f"https://{domain}/api/federation/instances", data=dumps({"sort":"+caughtAt","host":None,"suspended":True,"limit":step}), headers=headers, timeout=5).json()
131                     if doc == []: raise
132                 else:
133                     doc = post(f"https://{domain}/api/federation/instances", data=dumps({"sort":"+caughtAt","host":None,"suspended":True,"limit":step,"offset":counter-1}), headers=headers, timeout=5).json()
134                     if doc == []: raise
135                 for instance in doc:
136                     # just in case
137                     if instance["isSuspended"]:
138                         blocks["suspended"].append(
139                             {
140                                 "domain": instance["host"],
141                                 # no reason field, nothing
142                                 "reason": ""
143                             }
144                         )
145                 counter = counter + step
146             except:
147                 counter = 0
148                 break
149
150         while True:
151             # same shit, different asshole ("blocked" aka full suspend)
152             try:
153                 if counter == 0:
154                     doc = post(f"https://{domain}/api/federation/instances", data=dumps({"sort":"+caughtAt","host":None,"blocked":True,"limit":step}), headers=headers, timeout=5).json()
155                     if doc == []: raise
156                 else:
157                     doc = post(f"https://{domain}/api/federation/instances", data=dumps({"sort":"+caughtAt","host":None,"blocked":True,"limit":step,"offset":counter-1}), headers=headers, timeout=5).json()
158                     if doc == []: raise
159                 for instance in doc:
160                     if instance["isBlocked"]:
161                         blocks["blocked"].append(
162                             {
163                                 "domain": instance["host"],
164                                 "reason": ""
165                             }
166                         )
167                 counter = counter + step
168             except:
169                 counter = 0
170                 break
171
172         return {
173             "reject": blocks["blocked"],
174             "followers_only": blocks["suspended"]
175         }
176
177     except:
178         return {}
179
180 def get_hash(domain: str) -> str:
181     return sha256(domain.encode("utf-8")).hexdigest()
182
183
184 def get_type(domain: str) -> str:
185     try:
186         res = get(f"https://{domain}/nodeinfo/2.1.json", headers=headers, timeout=5)
187         if res.status_code == 404:
188             res = get(f"https://{domain}/nodeinfo/2.0", headers=headers, timeout=5)
189         if res.status_code == 404:
190             res = get(f"https://{domain}/nodeinfo/2.0.json", headers=headers, timeout=5)
191         if res.ok and "text/html" in res.headers["content-type"]:
192             res = get(f"https://{domain}/nodeinfo/2.1", headers=headers, timeout=5)
193         if res.ok:
194             if res.json()["software"]["name"] in ["akkoma", "rebased"]:
195                 return "pleroma"
196             elif res.json()["software"]["name"] in ["hometown", "ecko"]:
197                 return "mastodon"
198             elif res.json()["software"]["name"] in ["calckey", "groundpolis", "foundkey", "cherrypick"]:
199                 return "misskey"
200             else:
201                 return res.json()["software"]["name"]
202         elif res.status_code == 404:
203             res = get(f"https://{domain}/api/v1/instance", headers=headers, timeout=5)
204         if res.ok:
205             return "mastodon"
206     except:
207         return None
208
209 def tidyup(domain: str) -> str:
210     # some retards put their blocks in variable case
211     domain = domain.lower()
212     # other retards put the port
213     domain = re.sub("\:\d+$", "", domain)
214     # bigger retards put the schema in their blocklist, sometimes even without slashes
215     domain = re.sub("^https?\:(\/*)", "", domain)
216     # and trailing slash
217     domain = re.sub("\/$", "", domain)
218     # and the @
219     domain = re.sub("^\@", "", domain)
220     # the biggest retards of them all try to block individual users
221     domain = re.sub("(.+)\@", "", domain)
222     return domain
223
224 conn = sqlite3.connect("blocks.db")
225 c = conn.cursor()
226
227 c.execute(
228     "select domain, software from instances where software in ('pleroma', 'mastodon', 'friendica', 'misskey', 'gotosocial')"
229 )
230
231 for blocker, software in c.fetchall():
232     blockdict = []
233     blocker = tidyup(blocker)
234     if software == "pleroma":
235         print(blocker)
236         try:
237             # Blocks
238             federation = get(
239                 f"https://{blocker}/nodeinfo/2.1.json", headers=headers, timeout=5
240             ).json()["metadata"]["federation"]
241             if "mrf_simple" in federation:
242                 for block_level, blocks in (
243                     {**federation["mrf_simple"],
244                     **{"quarantined_instances": federation["quarantined_instances"]}}
245                 ).items():
246                     for blocked in blocks:
247                         blocked = tidyup(blocked)
248                         if blocked == "":
249                             continue
250                         if blocked.count("*") > 1:
251                             # -ACK!-oma also started obscuring domains without hash
252                             c.execute(
253                                 "select domain from instances where domain like ? order by rowid limit 1", (blocked.replace("*", "_"),)
254                             )
255                             searchres = c.fetchone()
256                             if searchres != None:
257                                 blocked = searchres[0]
258
259                         c.execute(
260                             "select domain from instances where domain = ?", (blocked,)
261                         )
262                         if c.fetchone() == None:
263                             c.execute(
264                                 "insert into instances select ?, ?, ?",
265                                 (blocked, get_hash(blocked), get_type(blocked)),
266                             )
267                         timestamp = int(time())
268                         c.execute(
269                             "select * from blocks where blocker = ? and blocked = ? and block_level = ?",
270                             (blocker, blocked, block_level),
271                         )
272                         if c.fetchone() == None:
273                             c.execute(
274                                 "insert into blocks select ?, ?, '', ?, ?, ?",
275                                 (blocker, blocked, block_level, timestamp, timestamp),
276                             )
277                             if block_level == "reject":
278                                 blockdict.append(
279                                     {
280                                         "blocked": blocked,
281                                         "reason": None
282                                     })
283                         else:
284                             c.execute(
285                                 "update blocks set last_seen = ? where blocker = ? and blocked = ? and block_level = ?",
286                                 (timestamp, blocker, blocked, block_level)
287                             )
288             conn.commit()
289             # Reasons
290             if "mrf_simple_info" in federation:
291                 for block_level, info in (
292                     {**federation["mrf_simple_info"],
293                     **(federation["quarantined_instances_info"]
294                     if "quarantined_instances_info" in federation
295                     else {})}
296                 ).items():
297                     for blocked, reason in info.items():
298                         blocked = tidyup(blocked)
299                         if blocked == "":
300                             continue
301                         if blocked.count("*") > 1:
302                             # same domain guess as above, but for reasons field
303                             c.execute(
304                                 "select domain from instances where domain like ? order by rowid limit 1", (blocked.replace("*", "_"),)
305                             )
306                             searchres = c.fetchone()
307                             if searchres != None:
308                                 blocked = searchres[0]
309                         c.execute(
310                             "update blocks set reason = ? where blocker = ? and blocked = ? and block_level = ? and reason = ''",
311                             (reason["reason"], blocker, blocked, block_level),
312                         )
313                         for entry in blockdict:
314                             if entry["blocked"] == blocked:
315                                 entry["reason"] = reason["reason"]
316
317             conn.commit()
318         except Exception as e:
319             print("error:", e, blocker)
320     elif software == "mastodon":
321         print(blocker)
322         try:
323             # json endpoint for newer mastodongs
324             try:
325                 json = {
326                     "reject": [],
327                     "media_removal": [],
328                     "followers_only": [],
329                     "report_removal": []
330                 }
331
332                 # handling CSRF, I've saw at least one server requiring it to access the endpoint
333                 meta = BeautifulSoup(
334                     get(f"https://{blocker}/about", headers=headers, timeout=5).text,
335                     "html.parser",
336                 )
337                 try:
338                     csrf = meta.find("meta", attrs={"name": "csrf-token"})["content"]
339                     reqheaders = {**headers, **{"x-csrf-token": csrf}}
340                 except:
341                     reqheaders = headers
342
343                 blocks = get(
344                     f"https://{blocker}/api/v1/instance/domain_blocks", headers=reqheaders, timeout=5
345                 ).json()
346                 for block in blocks:
347                     entry = {'domain': block['domain'], 'hash': block['digest'], 'reason': block['comment']}
348                     if block['severity'] == 'suspend':
349                         json['reject'].append(entry)
350                     elif block['severity'] == 'silence':
351                         json['followers_only'].append(entry)
352                     elif block['severity'] == 'reject_media':
353                         json['media_removal'].append(entry)
354                     elif block['severity'] == 'reject_reports':
355                         json['report_removal'].append(entry)
356             except:
357                 json = get_mastodon_blocks(blocker)
358
359             for block_level, blocks in json.items():
360                 for instance in blocks:
361                     blocked, blocked_hash, reason = instance.values()
362                     blocked = tidyup(blocked)
363                     if blocked.count("*") <= 1:
364                         c.execute(
365                             "select hash from instances where hash = ?", (blocked_hash,)
366                         )
367                         if c.fetchone() == None:
368                             c.execute(
369                                 "insert into instances select ?, ?, ?",
370                                 (blocked, get_hash(blocked), get_type(blocked)),
371                             )
372                     else:
373                         # Doing the hash search for instance names as well to tidy up DB
374                         c.execute(
375                             "select domain from instances where hash = ?", (blocked_hash,)
376                         )
377                         searchres = c.fetchone()
378                         if searchres != None:
379                             blocked = searchres[0]
380
381                     timestamp = int(time())
382                     c.execute(
383                         "select * from blocks where blocker = ? and blocked = ? and block_level = ?",
384                         (blocker, blocked if blocked.count("*") <= 1 else blocked_hash, block_level),
385                     )
386                     if c.fetchone() == None:
387                         c.execute(
388                             "insert into blocks select ?, ?, ?, ?, ?, ?",
389                             (
390                                 blocker,
391                                 blocked if blocked.count("*") <= 1 else blocked_hash,
392                                 reason,
393                                 block_level,
394                                 timestamp,
395                                 timestamp,
396                             ),
397                         )
398                         if block_level == "reject":
399                             blockdict.append(
400                                 {
401                                     "blocked": blocked,
402                                     "reason": reason
403                                 })
404                     else:
405                         c.execute(
406                             "update blocks set last_seen = ? where blocker = ? and blocked = ? and block_level = ?",
407                             (timestamp, blocker, blocked if blocked.count("*") <= 1 else blocked_hash, block_level),
408                         )
409                     if reason != '':
410                         c.execute(
411                             "update blocks set reason = ? where blocker = ? and blocked = ? and block_level = ? and reason = ''",
412                             (reason, blocker, blocked if blocked.count("*") <= 1 else blocked_hash, block_level),
413                         )
414             conn.commit()
415         except Exception as e:
416             print("error:", e, blocker)
417     elif software == "friendica" or software == "misskey":
418         print(blocker)
419         try:
420             if software == "friendica":
421                 json = get_friendica_blocks(blocker)
422             elif software == "misskey":
423                 json = get_pisskey_blocks(blocker)
424             for block_level, blocks in json.items():
425                 for instance in blocks:
426                     blocked, reason = instance.values()
427                     blocked = tidyup(blocked)
428
429                     if blocked.count("*") > 0:
430                         # Some friendica servers also obscure domains without hash
431                         c.execute(
432                             "select domain from instances where domain like ? order by rowid limit 1", (blocked.replace("*", "_"),)
433                         )
434                         searchres = c.fetchone()
435                         if searchres != None:
436                             blocked = searchres[0]
437
438                     if blocked.count("?") > 0:
439                         # Some obscure them with question marks, not sure if that's dependent on version or not
440                         c.execute(
441                             "select domain from instances where domain like ? order by rowid limit 1", (blocked.replace("?", "_"),)
442                         )
443                         searchres = c.fetchone()
444                         if searchres != None:
445                             blocked = searchres[0]
446
447                     c.execute(
448                         "select domain from instances where domain = ?", (blocked,)
449                     )
450                     if c.fetchone() == None:
451                         c.execute(
452                             "insert into instances select ?, ?, ?",
453                             (blocked, get_hash(blocked), get_type(blocked)),
454                         )
455
456                     timestamp = int(time())
457                     c.execute(
458                         "select * from blocks where blocker = ? and blocked = ?",
459                         (blocker, blocked),
460                     )
461                     if c.fetchone() == None:
462                         c.execute(
463                             "insert into blocks select ?, ?, ?, ?, ?, ?",
464                             (
465                                 blocker,
466                                 blocked,
467                                 reason,
468                                 block_level,
469                                 timestamp,
470                                 timestamp
471                             ),
472                         )
473                         if block_level == "reject":
474                             blockdict.append(
475                                 {
476                                     "blocked": blocked,
477                                     "reason": reason
478                                 })
479                     else:
480                         c.execute(
481                             "update blocks set last_seen = ? where blocker = ? and blocked = ? and block_level = ?",
482                             (timestamp, blocker, blocked, block_level),
483                         )
484                     if reason != '':
485                         c.execute(
486                             "update blocks set reason = ? where blocker = ? and blocked = ? and block_level = ? and reason = ''",
487                             (reason, blocker, blocked, block_level),
488                         )
489             conn.commit()
490         except Exception as e:
491             print("error:", e, blocker)
492     elif software == "gotosocial":
493         print(blocker)
494         try:
495             # Blocks
496             federation = get(
497                 f"https://{blocker}/api/v1/instance/peers?filter=suspended", headers=headers, timeout=5
498             ).json()
499             for peer in federation:
500                 blocked = peer["domain"].lower()
501
502                 if blocked.count("*") > 0:
503                     # GTS does not have hashes for obscured domains, so we have to guess it
504                     c.execute(
505                         "select domain from instances where domain like ? order by rowid limit 1", (blocked.replace("*", "_"),)
506                     )
507                     searchres = c.fetchone()
508                     if searchres != None:
509                         blocked = searchres[0]
510
511                 c.execute(
512                     "select domain from instances where domain = ?", (blocked,)
513                 )
514                 if c.fetchone() == None:
515                     c.execute(
516                         "insert into instances select ?, ?, ?",
517                         (blocked, get_hash(blocked), get_type(blocked)),
518                     )
519                 c.execute(
520                     "select * from blocks where blocker = ? and blocked = ? and block_level = ?",
521                     (blocker, blocked, "reject"),
522                 )
523                 timestamp = int(time())
524                 if c.fetchone() == None:
525                     c.execute(
526                         "insert into blocks select ?, ?, ?, ?, ?, ?",
527                            (blocker, blocked, "", "reject", timestamp, timestamp),
528                     )
529                     blockdict.append(
530                         {
531                             "blocked": blocked,
532                             "reason": None
533                         })
534                 else:
535                     c.execute(
536                         "update blocks set last_seen = ? where blocker = ? and blocked = ? and block_level = ?",
537                         (timestamp, blocker, blocked, "reject"),
538                     )
539                 if "public_comment" in peer:
540                     reason = peer["public_comment"]
541                     c.execute(
542                         "update blocks set reason = ? where blocker = ? and blocked = ? and block_level = ? and reason = ''",
543                         (reason, blocker, blocked, "reject"),
544                     )
545                     for entry in blockdict:
546                         if entry["blocked"] == blocked:
547                             entry["reason"] = reason
548             conn.commit()
549         except Exception as e:
550             print("error:", e, blocker)
551
552     if config["bot_enabled"] and len(blockdict) > 0:
553         send_bot_post(blocker, blockdict)
554     blockdict = []
555
556 conn.close()