]> git.mxchange.org Git - fba.git/blob - fetch_blocks.py
Remove debug code
[fba.git] / fetch_blocks.py
1 from reqto import get
2 from reqto import post
3 from hashlib import sha256
4 import sqlite3
5 from bs4 import BeautifulSoup
6 from json import dumps
7 from json import loads
8 import re
9 from time import time
10 import itertools
11
12 with open("config.json") as f:
13     config = loads(f.read())
14
15 headers = {
16     "user-agent": config["useragent"]
17 }
18
19 def send_bot_post(instance: str, blocks: dict):
20     message = instance + " has blocked the following instances:\n\n"
21     truncated = False
22     if len(blocks) > 20:
23         truncated = True
24         blocks = blocks[0 : 19]
25     for block in blocks:
26         if block["reason"] == None or block["reason"] == '':
27             message = message + block["blocked"] + " with unspecified reason\n"
28         else:
29             message = message + block["blocked"] + ' for "' + block["reason"] + '"\n'
30     if truncated:
31         message = message + "(the list has been truncated to the first 20 entries)"
32
33     botheaders = {**headers, **{"Authorization": "Bearer " + config["bot_token"]}}
34     req = post(f"{config['bot_instance']}/api/v1/statuses",
35         data={"status":message, "visibility":config['bot_visibility'], "content_type":"text/plain"},
36         headers=botheaders, timeout=10).json()
37     print(req)
38     return True
39
40 def get_mastodon_blocks(domain: str) -> dict:
41     blocks = {
42         "Suspended servers": [],
43         "Filtered media": [],
44         "Limited servers": [],
45         "Silenced servers": [],
46     }
47
48     translations = {
49         "Silenced instances": "Silenced servers",
50         "Suspended instances": "Suspended servers",
51         "Gesperrte Server": "Suspended servers",
52         "Gefilterte Medien": "Filtered media",
53         "Stummgeschaltete Server": "Silenced servers",
54         "停止済みのサーバー": "Suspended servers",
55         "メディアを拒否しているサーバー": "Filtered media",
56         "サイレンス済みのサーバー": "Silenced servers",
57         "שרתים מושעים": "Suspended servers",
58         "מדיה מסוננת": "Filtered media",
59         "שרתים מוגבלים": "Silenced servers",
60         "Serveurs suspendus": "Suspended servers",
61         "Médias filtrés": "Filtered media",
62         "Serveurs limités": "Silenced servers",
63     }
64
65     try:
66         doc = BeautifulSoup(
67             get(f"https://{domain}/about/more", headers=headers, timeout=5).text,
68             "html.parser",
69         )
70     except:
71         return {}
72
73     for header in doc.find_all("h3"):
74         header_text = header.text
75         if header_text in translations:
76             header_text = translations[header_text]
77         if header_text in blocks:
78             # replaced find_next_siblings with find_all_next to account for instances that e.g. hide lists in dropdown menu
79             for line in header.find_all_next("table")[0].find_all("tr")[1:]:
80                 blocks[header_text].append(
81                     {
82                         "domain": line.find("span").text,
83                         "hash": line.find("span")["title"][9:],
84                         "reason": line.find_all("td")[1].text.strip(),
85                     }
86                 )
87     return {
88         "reject": blocks["Suspended servers"],
89         "media_removal": blocks["Filtered media"],
90         "followers_only": blocks["Limited servers"]
91         + blocks["Silenced servers"],
92     }
93
94 def get_friendica_blocks(domain: str) -> dict:
95     blocks = []
96
97     try:
98         doc = BeautifulSoup(
99             get(f"https://{domain}/friendica", headers=headers, timeout=5).text,
100             "html.parser",
101         )
102     except:
103         return {}
104
105     blocklist = doc.find(id="about_blocklist")
106     for line in blocklist.find("table").find_all("tr")[1:]:
107             blocks.append(
108                 {
109                     "domain": line.find_all("td")[0].text.strip(),
110                     "reason": line.find_all("td")[1].text.strip()
111                 }
112             )
113
114     return {
115         "reject": blocks
116     }
117
118 def get_pisskey_blocks(domain: str) -> dict:
119     blocks = {
120         "suspended": [],
121         "blocked": []
122     }
123
124     try:
125         counter = 0
126         step = 99
127         while True:
128             # iterating through all "suspended" (follow-only in its terminology) instances page-by-page, since that troonware doesn't support sending them all at once
129             try:
130                 if counter == 0:
131                     doc = post(f"https://{domain}/api/federation/instances", data=dumps({"sort":"+caughtAt","host":None,"suspended":True,"limit":step}), headers=headers, timeout=5).json()
132                     if doc == []: raise
133                 else:
134                     doc = post(f"https://{domain}/api/federation/instances", data=dumps({"sort":"+caughtAt","host":None,"suspended":True,"limit":step,"offset":counter-1}), headers=headers, timeout=5).json()
135                     if doc == []: raise
136                 for instance in doc:
137                     # just in case
138                     if instance["isSuspended"]:
139                         blocks["suspended"].append(
140                             {
141                                 "domain": instance["host"],
142                                 # no reason field, nothing
143                                 "reason": ""
144                             }
145                         )
146                 counter = counter + step
147             except:
148                 counter = 0
149                 break
150
151         while True:
152             # same shit, different asshole ("blocked" aka full suspend)
153             try:
154                 if counter == 0:
155                     doc = post(f"https://{domain}/api/federation/instances", data=dumps({"sort":"+caughtAt","host":None,"blocked":True,"limit":step}), headers=headers, timeout=5).json()
156                     if doc == []: raise
157                 else:
158                     doc = post(f"https://{domain}/api/federation/instances", data=dumps({"sort":"+caughtAt","host":None,"blocked":True,"limit":step,"offset":counter-1}), headers=headers, timeout=5).json()
159                     if doc == []: raise
160                 for instance in doc:
161                     if instance["isBlocked"]:
162                         blocks["blocked"].append(
163                             {
164                                 "domain": instance["host"],
165                                 "reason": ""
166                             }
167                         )
168                 counter = counter + step
169             except:
170                 counter = 0
171                 break
172
173         return {
174             "reject": blocks["blocked"],
175             "followers_only": blocks["suspended"]
176         }
177
178     except:
179         return {}
180
181 def get_hash(domain: str) -> str:
182     return sha256(domain.encode("utf-8")).hexdigest()
183
184
185 def get_type(domain: str) -> str:
186     try:
187         res = get(f"https://{domain}/nodeinfo/2.1.json", headers=headers, timeout=5)
188         if res.status_code == 404:
189             res = get(f"https://{domain}/nodeinfo/2.0", headers=headers, timeout=5)
190         if res.status_code == 404:
191             res = get(f"https://{domain}/nodeinfo/2.0.json", headers=headers, timeout=5)
192         if res.ok and "text/html" in res.headers["content-type"]:
193             res = get(f"https://{domain}/nodeinfo/2.1", headers=headers, timeout=5)
194         if res.ok:
195             if res.json()["software"]["name"] in ["akkoma", "rebased"]:
196                 return "pleroma"
197             elif res.json()["software"]["name"] in ["hometown", "ecko"]:
198                 return "mastodon"
199             elif res.json()["software"]["name"] in ["calckey", "groundpolis", "foundkey", "cherrypick"]:
200                 return "misskey"
201             else:
202                 return res.json()["software"]["name"]
203         elif res.status_code == 404:
204             res = get(f"https://{domain}/api/v1/instance", headers=headers, timeout=5)
205         if res.ok:
206             return "mastodon"
207     except:
208         return None
209
210 def tidyup(domain: str) -> str:
211     # some retards put their blocks in variable case
212     domain = domain.lower()
213     # other retards put the port
214     domain = re.sub("\:\d+$", "", domain)
215     # bigger retards put the schema in their blocklist, sometimes even without slashes
216     domain = re.sub("^https?\:(\/*)", "", domain)
217     # and trailing slash
218     domain = re.sub("\/$", "", domain)
219     # and the @
220     domain = re.sub("^\@", "", domain)
221     # the biggest retards of them all try to block individual users
222     domain = re.sub("(.+)\@", "", domain)
223     return domain
224
225 conn = sqlite3.connect("blocks.db")
226 c = conn.cursor()
227
228 c.execute(
229     "select domain, software from instances where software in ('pleroma', 'mastodon', 'friendica', 'misskey', 'gotosocial')"
230 )
231
232 for blocker, software in c.fetchall():
233     blockdict = []
234     blocker = tidyup(blocker)
235     if software == "pleroma":
236         print(blocker)
237         try:
238             # Blocks
239             federation = get(
240                 f"https://{blocker}/nodeinfo/2.1.json", headers=headers, timeout=5
241             ).json()["metadata"]["federation"]
242             if "mrf_simple" in federation:
243                 for block_level, blocks in (
244                     {**federation["mrf_simple"],
245                     **{"quarantined_instances": federation["quarantined_instances"]}}
246                 ).items():
247                     for blocked in blocks:
248                         blocked = tidyup(blocked)
249                         if blocked == "":
250                             continue
251                         if blocked.count("*") > 1:
252                             # -ACK!-oma also started obscuring domains without hash
253                             c.execute(
254                                 "select domain from instances where domain like ? order by rowid limit 1", (blocked.replace("*", "_"),)
255                             )
256                             searchres = c.fetchone()
257                             if searchres != None:
258                                 blocked = searchres[0]
259
260                         c.execute(
261                             "select domain from instances where domain = ?", (blocked,)
262                         )
263                         if c.fetchone() == None:
264                             c.execute(
265                                 "insert into instances select ?, ?, ?",
266                                 (blocked, get_hash(blocked), get_type(blocked)),
267                             )
268                         timestamp = int(time())
269                         c.execute(
270                             "select * from blocks where blocker = ? and blocked = ? and block_level = ?",
271                             (blocker, blocked, block_level),
272                         )
273                         if c.fetchone() == None:
274                             c.execute(
275                                 "insert into blocks select ?, ?, '', ?, ?, ?",
276                                 (blocker, blocked, block_level, timestamp, timestamp),
277                             )
278                             if block_level == "reject":
279                                 blockdict.append(
280                                     {
281                                         "blocked": blocked,
282                                         "reason": None
283                                     })
284                         else:
285                             c.execute(
286                                 "update blocks set last_seen = ? where blocker = ? and blocked = ? and block_level = ?",
287                                 (timestamp, blocker, blocked, block_level)
288                             )
289             conn.commit()
290             # Reasons
291             if "mrf_simple_info" in federation:
292                 for block_level, info in (
293                     {**federation["mrf_simple_info"],
294                     **(federation["quarantined_instances_info"]
295                     if "quarantined_instances_info" in federation
296                     else {})}
297                 ).items():
298                     for blocked, reason in info.items():
299                         blocked = tidyup(blocked)
300                         if blocked == "":
301                             continue
302                         if blocked.count("*") > 1:
303                             # same domain guess as above, but for reasons field
304                             c.execute(
305                                 "select domain from instances where domain like ? order by rowid limit 1", (blocked.replace("*", "_"),)
306                             )
307                             searchres = c.fetchone()
308                             if searchres != None:
309                                 blocked = searchres[0]
310                         c.execute(
311                             "update blocks set reason = ? where blocker = ? and blocked = ? and block_level = ? and reason = ''",
312                             (reason["reason"], blocker, blocked, block_level),
313                         )
314                         for entry in blockdict:
315                             if entry["blocked"] == blocked:
316                                 entry["reason"] = reason["reason"]
317
318             conn.commit()
319         except Exception as e:
320             print("error:", e, blocker)
321     elif software == "mastodon":
322         print(blocker)
323         try:
324             # json endpoint for newer mastodongs
325             try:
326                 json = {
327                     "reject": [],
328                     "media_removal": [],
329                     "followers_only": [],
330                     "report_removal": []
331                 }
332
333                 # handling CSRF, I've saw at least one server requiring it to access the endpoint
334                 meta = BeautifulSoup(
335                     get(f"https://{blocker}/about", headers=headers, timeout=5).text,
336                     "html.parser",
337                 )
338                 try:
339                     csrf = meta.find("meta", attrs={"name": "csrf-token"})["content"]
340                     reqheaders = {**headers, **{"x-csrf-token": csrf}}
341                 except:
342                     reqheaders = headers
343
344                 blocks = get(
345                     f"https://{blocker}/api/v1/instance/domain_blocks", headers=reqheaders, timeout=5
346                 ).json()
347                 for block in blocks:
348                     entry = {'domain': block['domain'], 'hash': block['digest'], 'reason': block['comment']}
349                     if block['severity'] == 'suspend':
350                         json['reject'].append(entry)
351                     elif block['severity'] == 'silence':
352                         json['followers_only'].append(entry)
353                     elif block['severity'] == 'reject_media':
354                         json['media_removal'].append(entry)
355                     elif block['severity'] == 'reject_reports':
356                         json['report_removal'].append(entry)
357             except:
358                 json = get_mastodon_blocks(blocker)
359
360             for block_level, blocks in json.items():
361                 for instance in blocks:
362                     blocked, blocked_hash, reason = instance.values()
363                     blocked = tidyup(blocked)
364                     if blocked.count("*") <= 1:
365                         c.execute(
366                             "select hash from instances where hash = ?", (blocked_hash,)
367                         )
368                         if c.fetchone() == None:
369                             c.execute(
370                                 "insert into instances select ?, ?, ?",
371                                 (blocked, get_hash(blocked), get_type(blocked)),
372                             )
373                     else:
374                         # Doing the hash search for instance names as well to tidy up DB
375                         c.execute(
376                             "select domain from instances where hash = ?", (blocked_hash,)
377                         )
378                         searchres = c.fetchone()
379                         if searchres != None:
380                             blocked = searchres[0]
381
382                     timestamp = int(time())
383                     c.execute(
384                         "select * from blocks where blocker = ? and blocked = ? and block_level = ?",
385                         (blocker, blocked if blocked.count("*") <= 1 else blocked_hash, block_level),
386                     )
387                     if c.fetchone() == None:
388                         c.execute(
389                             "insert into blocks select ?, ?, ?, ?, ?, ?",
390                             (
391                                 blocker,
392                                 blocked if blocked.count("*") <= 1 else blocked_hash,
393                                 reason,
394                                 block_level,
395                                 timestamp,
396                                 timestamp,
397                             ),
398                         )
399                         if block_level == "reject":
400                             blockdict.append(
401                                 {
402                                     "blocked": blocked,
403                                     "reason": reason
404                                 })
405                     else:
406                         c.execute(
407                             "update blocks set last_seen = ? where blocker = ? and blocked = ? and block_level = ?",
408                             (timestamp, blocker, blocked if blocked.count("*") <= 1 else blocked_hash, block_level),
409                         )
410                     if reason != '':
411                         c.execute(
412                             "update blocks set reason = ? where blocker = ? and blocked = ? and block_level = ? and reason = ''",
413                             (reason, blocker, blocked if blocked.count("*") <= 1 else blocked_hash, block_level),
414                         )
415             conn.commit()
416         except Exception as e:
417             print("error:", e, blocker)
418     elif software == "friendica" or software == "misskey":
419         print(blocker)
420         try:
421             if software == "friendica":
422                 json = get_friendica_blocks(blocker)
423             elif software == "misskey":
424                 json = get_pisskey_blocks(blocker)
425             for block_level, blocks in json.items():
426                 for instance in blocks:
427                     blocked, reason = instance.values()
428                     blocked = tidyup(blocked)
429
430                     if blocked.count("*") > 0:
431                         # Some friendica servers also obscure domains without hash
432                         c.execute(
433                             "select domain from instances where domain like ? order by rowid limit 1", (blocked.replace("*", "_"),)
434                         )
435                         searchres = c.fetchone()
436                         if searchres != None:
437                             blocked = searchres[0]
438
439                     if blocked.count("?") > 0:
440                         # Some obscure them with question marks, not sure if that's dependent on version or not
441                         c.execute(
442                             "select domain from instances where domain like ? order by rowid limit 1", (blocked.replace("?", "_"),)
443                         )
444                         searchres = c.fetchone()
445                         if searchres != None:
446                             blocked = searchres[0]
447
448                     c.execute(
449                         "select domain from instances where domain = ?", (blocked,)
450                     )
451                     if c.fetchone() == None:
452                         c.execute(
453                             "insert into instances select ?, ?, ?",
454                             (blocked, get_hash(blocked), get_type(blocked)),
455                         )
456
457                     timestamp = int(time())
458                     c.execute(
459                         "select * from blocks where blocker = ? and blocked = ?",
460                         (blocker, blocked),
461                     )
462                     if c.fetchone() == None:
463                         c.execute(
464                             "insert into blocks select ?, ?, ?, ?, ?, ?",
465                             (
466                                 blocker,
467                                 blocked,
468                                 reason,
469                                 block_level,
470                                 timestamp,
471                                 timestamp
472                             ),
473                         )
474                         if block_level == "reject":
475                             blockdict.append(
476                                 {
477                                     "blocked": blocked,
478                                     "reason": reason
479                                 })
480                     else:
481                         c.execute(
482                             "update blocks set last_seen = ? where blocker = ? and blocked = ? and block_level = ?",
483                             (timestamp, blocker, blocked, block_level),
484                         )
485                     if reason != '':
486                         c.execute(
487                             "update blocks set reason = ? where blocker = ? and blocked = ? and block_level = ? and reason = ''",
488                             (reason, blocker, blocked, block_level),
489                         )
490             conn.commit()
491         except Exception as e:
492             print("error:", e, blocker)
493     elif software == "gotosocial":
494         print(blocker)
495         try:
496             # Blocks
497             federation = get(
498                 f"https://{blocker}/api/v1/instance/peers?filter=suspended", headers=headers, timeout=5
499             ).json()
500             for peer in federation:
501                 blocked = peer["domain"].lower()
502
503                 if blocked.count("*") > 0:
504                     # GTS does not have hashes for obscured domains, so we have to guess it
505                     c.execute(
506                         "select domain from instances where domain like ? order by rowid limit 1", (blocked.replace("*", "_"),)
507                     )
508                     searchres = c.fetchone()
509                     if searchres != None:
510                         blocked = searchres[0]
511
512                 c.execute(
513                     "select domain from instances where domain = ?", (blocked,)
514                 )
515                 if c.fetchone() == None:
516                     c.execute(
517                         "insert into instances select ?, ?, ?",
518                         (blocked, get_hash(blocked), get_type(blocked)),
519                     )
520                 c.execute(
521                     "select * from blocks where blocker = ? and blocked = ? and block_level = ?",
522                     (blocker, blocked, "reject"),
523                 )
524                 timestamp = int(time())
525                 if c.fetchone() == None:
526                     c.execute(
527                         "insert into blocks select ?, ?, ?, ?, ?, ?",
528                            (blocker, blocked, "", "reject", timestamp, timestamp),
529                     )
530                     blockdict.append(
531                         {
532                             "blocked": blocked,
533                             "reason": None
534                         })
535                 else:
536                     c.execute(
537                         "update blocks set last_seen = ? where blocker = ? and blocked = ? and block_level = ?",
538                         (timestamp, blocker, blocked, "reject"),
539                     )
540                 if "public_comment" in peer:
541                     reason = peer["public_comment"]
542                     c.execute(
543                         "update blocks set reason = ? where blocker = ? and blocked = ? and block_level = ? and reason = ''",
544                         (reason, blocker, blocked, "reject"),
545                     )
546                     for entry in blockdict:
547                         if entry["blocked"] == blocked:
548                             entry["reason"] = reason
549             conn.commit()
550         except Exception as e:
551             print("error:", e, blocker)
552
553     if config["bot_enabled"] and len(blockdict) > 0:
554         send_bot_post(blocker, blockdict)
555     blockdict = []
556
557 conn.close()