]> git.mxchange.org Git - fba.git/blob - fetch_blocks.py
New Masto API support
[fba.git] / fetch_blocks.py
1 from requests import get
2 from requests import post
3 from hashlib import sha256
4 import sqlite3
5 from bs4 import BeautifulSoup
6 from json import dumps
7 import re
8
9 headers = {
10     "user-agent": "Mozilla/5.0 (Windows NT 10.0; rv:102.0) Gecko/20100101 Firefox/102.0"
11 }
12
13
14 def get_mastodon_blocks(domain: str) -> dict:
15     blocks = {
16         "Suspended servers": [],
17         "Filtered media": [],
18         "Limited servers": [],
19         "Silenced servers": [],
20     }
21
22     translations = {
23         "Silenced instances": "Silenced servers",
24         "Suspended instances": "Suspended servers",
25         "Gesperrte Server": "Suspended servers",
26         "Gefilterte Medien": "Filtered media",
27         "Stummgeschaltete Server": "Silenced servers",
28         "停止済みのサーバー": "Suspended servers",
29         "メディアを拒否しているサーバー": "Filtered media",
30         "サイレンス済みのサーバー": "Silenced servers",
31         "Serveurs suspendus": "Suspended servers",
32         "Médias filtrés": "Filtered media",
33         "Serveurs limités": "Silenced servers",
34     }
35
36     try:
37         doc = BeautifulSoup(
38             get(f"https://{domain}/about/more", headers=headers, timeout=5).text,
39             "html.parser",
40         )
41     except:
42         return {}
43
44     for header in doc.find_all("h3"):
45         header_text = header.text
46         if header_text in translations:
47             header_text = translations[header_text]
48         if header_text in blocks:
49             # replaced find_next_siblings with find_all_next to account for instances that e.g. hide lists in dropdown menu
50             for line in header.find_all_next("table")[0].find_all("tr")[1:]:
51                 blocks[header_text].append(
52                     {
53                         "domain": line.find("span").text,
54                         "hash": line.find("span")["title"][9:],
55                         "reason": line.find_all("td")[1].text.strip(),
56                     }
57                 )
58     return {
59         "reject": blocks["Suspended servers"],
60         "media_removal": blocks["Filtered media"],
61         "followers_only": blocks["Limited servers"]
62         + blocks["Silenced servers"],
63     }
64
65 def get_friendica_blocks(domain: str) -> dict:
66     blocks = []
67
68     try:
69         doc = BeautifulSoup(
70             get(f"https://{domain}/friendica", headers=headers, timeout=5).text,
71             "html.parser",
72         )
73     except:
74         return {}
75
76     blocklist = doc.find(id="about_blocklist")
77     for line in blocklist.find("table").find_all("tr")[1:]:
78             blocks.append(
79                 {
80                     "domain": line.find_all("td")[0].text.strip(),
81                     "reason": line.find_all("td")[1].text.strip()
82                 }
83             )
84
85     return {
86         "reject": blocks
87     }
88
89 def get_pisskey_blocks(domain: str) -> dict:
90     blocks = {
91         "suspended": [],
92         "blocked": []
93     }
94
95     try:
96         counter = 0
97         step = 99
98         while True:
99             # iterating through all "suspended" (follow-only in its terminology) instances page-by-page, since that troonware doesn't support sending them all at once
100             try:
101                 if counter == 0:
102                     doc = post(f"https://{domain}/api/federation/instances", data=dumps({"sort":"+caughtAt","host":None,"suspended":True,"limit":step}), headers=headers, timeout=5).json()
103                     if doc == []: raise
104                 else:
105                     doc = post(f"https://{domain}/api/federation/instances", data=dumps({"sort":"+caughtAt","host":None,"suspended":True,"limit":step,"offset":counter-1}), headers=headers, timeout=5).json()
106                     if doc == []: raise
107                 for instance in doc:
108                     # just in case
109                     if instance["isSuspended"]:
110                         blocks["suspended"].append(
111                             {
112                                 "domain": instance["host"],
113                                 # no reason field, nothing
114                                 "reason": ""
115                             }
116                         )
117                 counter = counter + step
118             except:
119                 counter = 0
120                 break
121
122         while True:
123             # same shit, different asshole ("blocked" aka full suspend)
124             try:
125                 if counter == 0:
126                     doc = post(f"https://{domain}/api/federation/instances", data=dumps({"sort":"+caughtAt","host":None,"blocked":True,"limit":step}), headers=headers, timeout=5).json()
127                     if doc == []: raise
128                 else:
129                     doc = post(f"https://{domain}/api/federation/instances", data=dumps({"sort":"+caughtAt","host":None,"blocked":True,"limit":step,"offset":counter-1}), headers=headers, timeout=5).json()
130                     if doc == []: raise
131                 for instance in doc:
132                     if instance["isBlocked"]:
133                         blocks["blocked"].append(
134                             {
135                                 "domain": instance["host"],
136                                 "reason": ""
137                             }
138                         )
139                 counter = counter + step
140             except:
141                 counter = 0
142                 break
143
144         return {
145             "reject": blocks["blocked"],
146             "followers_only": blocks["suspended"]
147         }
148
149     except:
150         return {}
151
152 def get_hash(domain: str) -> str:
153     return sha256(domain.encode("utf-8")).hexdigest()
154
155
156 def get_type(domain: str) -> str:
157     try:
158         res = get(f"https://{domain}/nodeinfo/2.1.json", headers=headers, timeout=5)
159         if res.status_code == 404:
160             res = get(f"https://{domain}/nodeinfo/2.0", headers=headers, timeout=5)
161         if res.status_code == 404:
162             res = get(f"https://{domain}/nodeinfo/2.0.json", headers=headers, timeout=5)
163         if res.ok and "text/html" in res.headers["content-type"]:
164             res = get(f"https://{domain}/nodeinfo/2.1", headers=headers, timeout=5)
165         if res.ok:
166             if res.json()["software"]["name"] in ["akkoma", "rebased"]:
167                 return "pleroma"
168             elif res.json()["software"]["name"] in ["hometown", "ecko"]:
169                 return "mastodon"
170             elif res.json()["software"]["name"] in ["calckey", "groundpolis", "foundkey", "cherrypick"]:
171                 return "misskey"
172             else:
173                 return res.json()["software"]["name"]
174         elif res.status_code == 404:
175             res = get(f"https://{domain}/api/v1/instance", headers=headers, timeout=5)
176         if res.ok:
177             return "mastodon"
178     except:
179         return None
180
181 def tidyup(domain: str) -> str:
182     # some retards put their blocks in variable case
183     domain = domain.lower()
184     # other retards put the port
185     domain = re.sub("\:\d+$", "", domain)
186     # bigger retards put the schema in their blocklist, sometimes even without slashes
187     domain = re.sub("^https?\:(\/*)", "", domain)
188     # and trailing slash
189     domain = re.sub("\/$", "", domain)
190     # the biggest retards of them all try to block individual users
191     domain = re.sub("(.+)\@", "", domain)
192     return domain
193
194 conn = sqlite3.connect("blocks.db")
195 c = conn.cursor()
196
197 c.execute(
198     "select domain, software from instances where software in ('pleroma', 'mastodon', 'friendica', 'misskey', 'gotosocial')"
199 )
200
201 for blocker, software in c.fetchall():
202     blocker = tidyup(blocker)
203     if software == "pleroma":
204         print(blocker)
205         try:
206             # Blocks
207             federation = get(
208                 f"https://{blocker}/nodeinfo/2.1.json", headers=headers, timeout=5
209             ).json()["metadata"]["federation"]
210             if "mrf_simple" in federation:
211                 for block_level, blocks in (
212                     {**federation["mrf_simple"],
213                     **{"quarantined_instances": federation["quarantined_instances"]}}
214                 ).items():
215                     for blocked in blocks:
216                         blocked = tidyup(blocked)
217                         if blocked == "":
218                             continue
219                         if blocked.count("*") > 1:
220                             # -ACK!-oma also started obscuring domains without hash
221                             c.execute(
222                                 "select domain from instances where domain like ? order by rowid limit 1", (blocked.replace("*", "_"),)
223                             )
224                             searchres = c.fetchone()
225                             if searchres != None:
226                                 blocked = searchres[0]
227
228                         c.execute(
229                             "select domain from instances where domain = ?", (blocked,)
230                         )
231                         if c.fetchone() == None:
232                             c.execute(
233                                 "insert into instances select ?, ?, ?",
234                                 (blocked, get_hash(blocked), get_type(blocked)),
235                             )
236                         c.execute(
237                             "select * from blocks where blocker = ? and blocked = ? and block_level = ?",
238                             (blocker, blocked, block_level),
239                         )
240                         if c.fetchone() == None:
241                             c.execute(
242                                 "insert into blocks select ?, ?, '', ?",
243                                 (blocker, blocked, block_level),
244                             )
245             conn.commit()
246             # Reasons
247             if "mrf_simple_info" in federation:
248                 for block_level, info in (
249                     {**federation["mrf_simple_info"],
250                     **(federation["quarantined_instances_info"]
251                     if "quarantined_instances_info" in federation
252                     else {})}
253                 ).items():
254                     for blocked, reason in info.items():
255                         blocked = tidyup(blocked)
256                         if blocked == "":
257                             continue
258                         if blocked.count("*") > 1:
259                             # same domain guess as above, but for reasons field
260                             c.execute(
261                                 "select domain from instances where domain like ? order by rowid limit 1", (blocked.replace("*", "_"),)
262                             )
263                             searchres = c.fetchone()
264                             if searchres != None:
265                                 blocked = searchres[0]
266                         c.execute(
267                             "update blocks set reason = ? where blocker = ? and blocked = ? and block_level = ?",
268                             (reason["reason"], blocker, blocked, block_level),
269                         )
270             conn.commit()
271         except Exception as e:
272             print("error:", e, blocker)
273     elif software == "mastodon":
274         print(blocker)
275         try:
276             # json endpoint for newer mastodongs
277             try:
278                 json = {
279                     "reject": [],
280                     "media_removal": [],
281                     "followers_only": [],
282                     "report_removal": []
283                 }
284                 blocks = get(
285                     f"https://{blocker}/api/v1/instance/domain_blocks", headers=headers, timeout=5
286                 ).json()
287                 for block in blocks:
288                     entry = {'domain': block['domain'], 'hash': block['digest'], 'reason': block['comment']}
289                     if block['severity'] == 'suspend':
290                         json['reject'].append(entry)
291                     elif block['severity'] == 'silence':
292                         json['followers_only'].append(entry)
293                     elif block['severity'] == 'reject_media':
294                         json['media_removal'].append(entry)
295                     elif block['severity'] == 'reject_reports':
296                         json['report_removal'].append(entry)
297             except:
298                 json = get_mastodon_blocks(blocker)
299
300             for block_level, blocks in json.items():
301                 for instance in blocks:
302                     blocked, blocked_hash, reason = instance.values()
303                     blocked = tidyup(blocked)
304                     if blocked.count("*") <= 1:
305                         c.execute(
306                             "select hash from instances where hash = ?", (blocked_hash,)
307                         )
308                         if c.fetchone() == None:
309                             c.execute(
310                                 "insert into instances select ?, ?, ?",
311                                 (blocked, get_hash(blocked), get_type(blocked)),
312                             )
313                     else:
314                         # Doing the hash search for instance names as well to tidy up DB
315                         c.execute(
316                             "select domain from instances where hash = ?", (blocked_hash,)
317                         )
318                         searchres = c.fetchone()
319                         if searchres != None:
320                             blocked = searchres[0]
321
322                     c.execute(
323                         "select * from blocks where blocker = ? and blocked = ? and block_level = ?",
324                         (blocker, blocked if blocked.count("*") <= 1 else blocked_hash, block_level),
325                     )
326                     if c.fetchone() == None:
327                         c.execute(
328                             "insert into blocks select ?, ?, ?, ?",
329                             (
330                                 blocker,
331                                 blocked if blocked.count("*") <= 1 else blocked_hash,
332                                 reason,
333                                 block_level,
334                             ),
335                         )
336             conn.commit()
337         except Exception as e:
338             print("error:", e, blocker)
339     elif software == "friendica" or software == "misskey":
340         print(blocker)
341         try:
342             if software == "friendica":
343                 json = get_friendica_blocks(blocker)
344             elif software == "misskey":
345                 json = get_pisskey_blocks(blocker)
346             for block_level, blocks in json.items():
347                 for instance in blocks:
348                     blocked, reason = instance.values()
349                     blocked = tidyup(blocked)
350
351                     if blocked.count("*") > 0:
352                         # Some friendica servers also obscure domains without hash
353                         c.execute(
354                             "select domain from instances where domain like ? order by rowid limit 1", (blocked.replace("*", "_"),)
355                         )
356                         searchres = c.fetchone()
357                         if searchres != None:
358                             blocked = searchres[0]
359
360                     if blocked.count("?") > 0:
361                         # Some obscure them with question marks, not sure if that's dependent on version or not
362                         c.execute(
363                             "select domain from instances where domain like ? order by rowid limit 1", (blocked.replace("?", "_"),)
364                         )
365                         searchres = c.fetchone()
366                         if searchres != None:
367                             blocked = searchres[0]
368
369                     c.execute(
370                         "select domain from instances where domain = ?", (blocked,)
371                     )
372                     if c.fetchone() == None:
373                         c.execute(
374                             "insert into instances select ?, ?, ?",
375                             (blocked, get_hash(blocked), get_type(blocked)),
376                         )
377                     c.execute(
378                         "select * from blocks where blocker = ? and blocked = ?",
379                         (blocker, blocked),
380                     )
381                     if c.fetchone() == None:
382                         c.execute(
383                             "insert into blocks select ?, ?, ?, ?",
384                             (
385                                 blocker,
386                                 blocked,
387                                 reason,
388                                 block_level,
389                             ),
390                         )
391             conn.commit()
392         except Exception as e:
393             print("error:", e, blocker)
394     elif software == "gotosocial":
395         print(blocker)
396         try:
397             # Blocks
398             federation = get(
399                 f"https://{blocker}/api/v1/instance/peers?filter=suspended", headers=headers, timeout=5
400             ).json()
401             for peer in federation:
402                 blocked = peer["domain"].lower()
403
404                 if blocked.count("*") > 0:
405                     # GTS does not have hashes for obscured domains, so we have to guess it
406                     c.execute(
407                         "select domain from instances where domain like ? order by rowid limit 1", (blocked.replace("*", "_"),)
408                     )
409                     searchres = c.fetchone()
410                     if searchres != None:
411                         blocked = searchres[0]
412
413                 c.execute(
414                     "select domain from instances where domain = ?", (blocked,)
415                 )
416                 if c.fetchone() == None:
417                     c.execute(
418                         "insert into instances select ?, ?, ?",
419                         (blocked, get_hash(blocked), get_type(blocked)),
420                     )
421                 c.execute(
422                     "select * from blocks where blocker = ? and blocked = ? and block_level = ?",
423                     (blocker, blocked, "reject"),
424                 )
425                 if c.fetchone() == None:
426                     c.execute(
427                         "insert into blocks select ?, ?, ?, ?",
428                            (blocker, blocked, "", "reject"),
429                     )
430
431                 if "public_comment" in peer:
432                     reason = peer["public_comment"]
433                     c.execute(
434                         "select * from blocks where blocker = ? and blocked = ? and reason != ? and block_level = ?",
435                         (blocker, blocked, "", "reject"),
436                     )
437                     if c.fetchone() == None:
438                         c.execute(
439                             "update blocks set reason = ? where blocker = ? and blocked = ? and block_level = ?",
440                             (reason, blocker, blocked, "reject"),
441                         )
442             conn.commit()
443         except Exception as e:
444             print("error:", e, blocker)
445 conn.close()