]> git.mxchange.org Git - fba.git/blob - fetch_blocks.py
+1 filter for entries
[fba.git] / fetch_blocks.py
1 from requests import get
2 from requests import post
3 from hashlib import sha256
4 import sqlite3
5 from bs4 import BeautifulSoup
6 from json import dumps
7 import re
8 from time import time
9
10 headers = {
11     "user-agent": "Mozilla/5.0 (Windows NT 10.0; rv:102.0) Gecko/20100101 Firefox/102.0"
12 }
13
14
15 def get_mastodon_blocks(domain: str) -> dict:
16     blocks = {
17         "Suspended servers": [],
18         "Filtered media": [],
19         "Limited servers": [],
20         "Silenced servers": [],
21     }
22
23     translations = {
24         "Silenced instances": "Silenced servers",
25         "Suspended instances": "Suspended servers",
26         "Gesperrte Server": "Suspended servers",
27         "Gefilterte Medien": "Filtered media",
28         "Stummgeschaltete Server": "Silenced servers",
29         "停止済みのサーバー": "Suspended servers",
30         "メディアを拒否しているサーバー": "Filtered media",
31         "サイレンス済みのサーバー": "Silenced servers",
32         "שרתים מושעים": "Suspended servers",
33         "מדיה מסוננת": "Filtered media",
34         "שרתים מוגבלים": "Silenced servers",
35         "Serveurs suspendus": "Suspended servers",
36         "Médias filtrés": "Filtered media",
37         "Serveurs limités": "Silenced servers",
38     }
39
40     try:
41         doc = BeautifulSoup(
42             get(f"https://{domain}/about/more", headers=headers, timeout=5).text,
43             "html.parser",
44         )
45     except:
46         return {}
47
48     for header in doc.find_all("h3"):
49         header_text = header.text
50         if header_text in translations:
51             header_text = translations[header_text]
52         if header_text in blocks:
53             # replaced find_next_siblings with find_all_next to account for instances that e.g. hide lists in dropdown menu
54             for line in header.find_all_next("table")[0].find_all("tr")[1:]:
55                 blocks[header_text].append(
56                     {
57                         "domain": line.find("span").text,
58                         "hash": line.find("span")["title"][9:],
59                         "reason": line.find_all("td")[1].text.strip(),
60                     }
61                 )
62     return {
63         "reject": blocks["Suspended servers"],
64         "media_removal": blocks["Filtered media"],
65         "followers_only": blocks["Limited servers"]
66         + blocks["Silenced servers"],
67     }
68
69 def get_friendica_blocks(domain: str) -> dict:
70     blocks = []
71
72     try:
73         doc = BeautifulSoup(
74             get(f"https://{domain}/friendica", headers=headers, timeout=5).text,
75             "html.parser",
76         )
77     except:
78         return {}
79
80     blocklist = doc.find(id="about_blocklist")
81     for line in blocklist.find("table").find_all("tr")[1:]:
82             blocks.append(
83                 {
84                     "domain": line.find_all("td")[0].text.strip(),
85                     "reason": line.find_all("td")[1].text.strip()
86                 }
87             )
88
89     return {
90         "reject": blocks
91     }
92
93 def get_pisskey_blocks(domain: str) -> dict:
94     blocks = {
95         "suspended": [],
96         "blocked": []
97     }
98
99     try:
100         counter = 0
101         step = 99
102         while True:
103             # iterating through all "suspended" (follow-only in its terminology) instances page-by-page, since that troonware doesn't support sending them all at once
104             try:
105                 if counter == 0:
106                     doc = post(f"https://{domain}/api/federation/instances", data=dumps({"sort":"+caughtAt","host":None,"suspended":True,"limit":step}), headers=headers, timeout=5).json()
107                     if doc == []: raise
108                 else:
109                     doc = post(f"https://{domain}/api/federation/instances", data=dumps({"sort":"+caughtAt","host":None,"suspended":True,"limit":step,"offset":counter-1}), headers=headers, timeout=5).json()
110                     if doc == []: raise
111                 for instance in doc:
112                     # just in case
113                     if instance["isSuspended"]:
114                         blocks["suspended"].append(
115                             {
116                                 "domain": instance["host"],
117                                 # no reason field, nothing
118                                 "reason": ""
119                             }
120                         )
121                 counter = counter + step
122             except:
123                 counter = 0
124                 break
125
126         while True:
127             # same shit, different asshole ("blocked" aka full suspend)
128             try:
129                 if counter == 0:
130                     doc = post(f"https://{domain}/api/federation/instances", data=dumps({"sort":"+caughtAt","host":None,"blocked":True,"limit":step}), headers=headers, timeout=5).json()
131                     if doc == []: raise
132                 else:
133                     doc = post(f"https://{domain}/api/federation/instances", data=dumps({"sort":"+caughtAt","host":None,"blocked":True,"limit":step,"offset":counter-1}), headers=headers, timeout=5).json()
134                     if doc == []: raise
135                 for instance in doc:
136                     if instance["isBlocked"]:
137                         blocks["blocked"].append(
138                             {
139                                 "domain": instance["host"],
140                                 "reason": ""
141                             }
142                         )
143                 counter = counter + step
144             except:
145                 counter = 0
146                 break
147
148         return {
149             "reject": blocks["blocked"],
150             "followers_only": blocks["suspended"]
151         }
152
153     except:
154         return {}
155
156 def get_hash(domain: str) -> str:
157     return sha256(domain.encode("utf-8")).hexdigest()
158
159
160 def get_type(domain: str) -> str:
161     try:
162         res = get(f"https://{domain}/nodeinfo/2.1.json", headers=headers, timeout=5)
163         if res.status_code == 404:
164             res = get(f"https://{domain}/nodeinfo/2.0", headers=headers, timeout=5)
165         if res.status_code == 404:
166             res = get(f"https://{domain}/nodeinfo/2.0.json", headers=headers, timeout=5)
167         if res.ok and "text/html" in res.headers["content-type"]:
168             res = get(f"https://{domain}/nodeinfo/2.1", headers=headers, timeout=5)
169         if res.ok:
170             if res.json()["software"]["name"] in ["akkoma", "rebased"]:
171                 return "pleroma"
172             elif res.json()["software"]["name"] in ["hometown", "ecko"]:
173                 return "mastodon"
174             elif res.json()["software"]["name"] in ["calckey", "groundpolis", "foundkey", "cherrypick"]:
175                 return "misskey"
176             else:
177                 return res.json()["software"]["name"]
178         elif res.status_code == 404:
179             res = get(f"https://{domain}/api/v1/instance", headers=headers, timeout=5)
180         if res.ok:
181             return "mastodon"
182     except:
183         return None
184
185 def tidyup(domain: str) -> str:
186     # some retards put their blocks in variable case
187     domain = domain.lower()
188     # other retards put the port
189     domain = re.sub("\:\d+$", "", domain)
190     # bigger retards put the schema in their blocklist, sometimes even without slashes
191     domain = re.sub("^https?\:(\/*)", "", domain)
192     # and trailing slash
193     domain = re.sub("\/$", "", domain)
194     # and the @
195     domain = re.sub("^\@", "", domain)
196     # the biggest retards of them all try to block individual users
197     domain = re.sub("(.+)\@", "", domain)
198     return domain
199
200 conn = sqlite3.connect("blocks.db")
201 c = conn.cursor()
202
203 c.execute(
204     "select domain, software from instances where software in ('pleroma', 'mastodon', 'friendica', 'misskey', 'gotosocial')"
205 )
206
207 for blocker, software in c.fetchall():
208     blocker = tidyup(blocker)
209     if software == "pleroma":
210         print(blocker)
211         try:
212             # Blocks
213             federation = get(
214                 f"https://{blocker}/nodeinfo/2.1.json", headers=headers, timeout=5
215             ).json()["metadata"]["federation"]
216             if "mrf_simple" in federation:
217                 for block_level, blocks in (
218                     {**federation["mrf_simple"],
219                     **{"quarantined_instances": federation["quarantined_instances"]}}
220                 ).items():
221                     for blocked in blocks:
222                         blocked = tidyup(blocked)
223                         if blocked == "":
224                             continue
225                         if blocked.count("*") > 1:
226                             # -ACK!-oma also started obscuring domains without hash
227                             c.execute(
228                                 "select domain from instances where domain like ? order by rowid limit 1", (blocked.replace("*", "_"),)
229                             )
230                             searchres = c.fetchone()
231                             if searchres != None:
232                                 blocked = searchres[0]
233
234                         c.execute(
235                             "select domain from instances where domain = ?", (blocked,)
236                         )
237                         if c.fetchone() == None:
238                             c.execute(
239                                 "insert into instances select ?, ?, ?",
240                                 (blocked, get_hash(blocked), get_type(blocked)),
241                             )
242                         timestamp = int(time())
243                         c.execute(
244                             "select * from blocks where blocker = ? and blocked = ? and block_level = ?",
245                             (blocker, blocked, block_level),
246                         )
247                         if c.fetchone() == None:
248                             c.execute(
249                                 "insert into blocks select ?, ?, '', ?, ?, ?",
250                                 (blocker, blocked, block_level, timestamp, timestamp),
251                             )
252                         else:
253                             c.execute(
254                                 "update blocks set last_seen = ? where blocker = ? and blocked = ? and block_level = ?",
255                                 (timestamp, blocker, blocked, block_level)
256                             )
257             conn.commit()
258             # Reasons
259             if "mrf_simple_info" in federation:
260                 for block_level, info in (
261                     {**federation["mrf_simple_info"],
262                     **(federation["quarantined_instances_info"]
263                     if "quarantined_instances_info" in federation
264                     else {})}
265                 ).items():
266                     for blocked, reason in info.items():
267                         blocked = tidyup(blocked)
268                         if blocked == "":
269                             continue
270                         if blocked.count("*") > 1:
271                             # same domain guess as above, but for reasons field
272                             c.execute(
273                                 "select domain from instances where domain like ? order by rowid limit 1", (blocked.replace("*", "_"),)
274                             )
275                             searchres = c.fetchone()
276                             if searchres != None:
277                                 blocked = searchres[0]
278                         c.execute(
279                             "update blocks set reason = ? where blocker = ? and blocked = ? and block_level = ? and reason = ''",
280                             (reason["reason"], blocker, blocked, block_level),
281                         )
282             conn.commit()
283         except Exception as e:
284             print("error:", e, blocker)
285     elif software == "mastodon":
286         print(blocker)
287         try:
288             # json endpoint for newer mastodongs
289             try:
290                 json = {
291                     "reject": [],
292                     "media_removal": [],
293                     "followers_only": [],
294                     "report_removal": []
295                 }
296                 blocks = get(
297                     f"https://{blocker}/api/v1/instance/domain_blocks", headers=headers, timeout=5
298                 ).json()
299                 for block in blocks:
300                     entry = {'domain': block['domain'], 'hash': block['digest'], 'reason': block['comment']}
301                     if block['severity'] == 'suspend':
302                         json['reject'].append(entry)
303                     elif block['severity'] == 'silence':
304                         json['followers_only'].append(entry)
305                     elif block['severity'] == 'reject_media':
306                         json['media_removal'].append(entry)
307                     elif block['severity'] == 'reject_reports':
308                         json['report_removal'].append(entry)
309             except:
310                 json = get_mastodon_blocks(blocker)
311
312             for block_level, blocks in json.items():
313                 for instance in blocks:
314                     blocked, blocked_hash, reason = instance.values()
315                     blocked = tidyup(blocked)
316                     if blocked.count("*") <= 1:
317                         c.execute(
318                             "select hash from instances where hash = ?", (blocked_hash,)
319                         )
320                         if c.fetchone() == None:
321                             c.execute(
322                                 "insert into instances select ?, ?, ?",
323                                 (blocked, get_hash(blocked), get_type(blocked)),
324                             )
325                     else:
326                         # Doing the hash search for instance names as well to tidy up DB
327                         c.execute(
328                             "select domain from instances where hash = ?", (blocked_hash,)
329                         )
330                         searchres = c.fetchone()
331                         if searchres != None:
332                             blocked = searchres[0]
333
334                     timestamp = int(time())
335                     c.execute(
336                         "select * from blocks where blocker = ? and blocked = ? and block_level = ?",
337                         (blocker, blocked if blocked.count("*") <= 1 else blocked_hash, block_level),
338                     )
339                     if c.fetchone() == None:
340                         c.execute(
341                             "insert into blocks select ?, ?, ?, ?, ?, ?",
342                             (
343                                 blocker,
344                                 blocked if blocked.count("*") <= 1 else blocked_hash,
345                                 reason,
346                                 block_level,
347                                 timestamp,
348                                 timestamp,
349                             ),
350                         )
351                     else:
352                         c.execute(
353                             "update blocks set last_seen = ? where blocker = ? and blocked = ? and block_level = ?",
354                             (timestamp, blocker, blocked if blocked.count("*") <= 1 else blocked_hash, block_level),
355                         )
356                     if reason != '':
357                         c.execute(
358                             "update blocks set reason = ? where blocker = ? and blocked = ? and block_level = ? and reason = ''",
359                             (reason, blocker, blocked if blocked.count("*") <= 1 else blocked_hash, block_level),
360                         )
361             conn.commit()
362         except Exception as e:
363             print("error:", e, blocker)
364     elif software == "friendica" or software == "misskey":
365         print(blocker)
366         try:
367             if software == "friendica":
368                 json = get_friendica_blocks(blocker)
369             elif software == "misskey":
370                 json = get_pisskey_blocks(blocker)
371             for block_level, blocks in json.items():
372                 for instance in blocks:
373                     blocked, reason = instance.values()
374                     blocked = tidyup(blocked)
375
376                     if blocked.count("*") > 0:
377                         # Some friendica servers also obscure domains without hash
378                         c.execute(
379                             "select domain from instances where domain like ? order by rowid limit 1", (blocked.replace("*", "_"),)
380                         )
381                         searchres = c.fetchone()
382                         if searchres != None:
383                             blocked = searchres[0]
384
385                     if blocked.count("?") > 0:
386                         # Some obscure them with question marks, not sure if that's dependent on version or not
387                         c.execute(
388                             "select domain from instances where domain like ? order by rowid limit 1", (blocked.replace("?", "_"),)
389                         )
390                         searchres = c.fetchone()
391                         if searchres != None:
392                             blocked = searchres[0]
393
394                     c.execute(
395                         "select domain from instances where domain = ?", (blocked,)
396                     )
397                     if c.fetchone() == None:
398                         c.execute(
399                             "insert into instances select ?, ?, ?",
400                             (blocked, get_hash(blocked), get_type(blocked)),
401                         )
402
403                     timestamp = int(time())
404                     c.execute(
405                         "select * from blocks where blocker = ? and blocked = ? and reason = ?",
406                         (blocker, blocked, reason),
407                     )
408                     if c.fetchone() == None:
409                         c.execute(
410                             "insert into blocks select ?, ?, ?, ?, ?, ?",
411                             (
412                                 blocker,
413                                 blocked,
414                                 reason,
415                                 block_level,
416                                 timestamp,
417                                 timestamp
418                             ),
419                         )
420                     else:
421                         c.execute(
422                             "update blocks set last_seen = ? where blocker = ? and blocked = ? and block_level = ?",
423                             (timestamp, blocker, blocked, block_level),
424                         )
425                     if reason != '':
426                         c.execute(
427                             "update blocks set reason = ? where blocker = ? and blocked = ? and block_level = ? and reason = ''",
428                             (reason, blocker, blocked, block_level),
429                         )
430             conn.commit()
431         except Exception as e:
432             print("error:", e, blocker)
433     elif software == "gotosocial":
434         print(blocker)
435         try:
436             # Blocks
437             federation = get(
438                 f"https://{blocker}/api/v1/instance/peers?filter=suspended", headers=headers, timeout=5
439             ).json()
440             for peer in federation:
441                 blocked = peer["domain"].lower()
442
443                 if blocked.count("*") > 0:
444                     # GTS does not have hashes for obscured domains, so we have to guess it
445                     c.execute(
446                         "select domain from instances where domain like ? order by rowid limit 1", (blocked.replace("*", "_"),)
447                     )
448                     searchres = c.fetchone()
449                     if searchres != None:
450                         blocked = searchres[0]
451
452                 c.execute(
453                     "select domain from instances where domain = ?", (blocked,)
454                 )
455                 if c.fetchone() == None:
456                     c.execute(
457                         "insert into instances select ?, ?, ?",
458                         (blocked, get_hash(blocked), get_type(blocked)),
459                     )
460                 c.execute(
461                     "select * from blocks where blocker = ? and blocked = ? and block_level = ?",
462                     (blocker, blocked, "reject"),
463                 )
464                 timestamp = int(time())
465                 if c.fetchone() == None:
466                     c.execute(
467                         "insert into blocks select ?, ?, ?, ?, ?, ?",
468                            (blocker, blocked, "", "reject", timestamp, timestamp),
469                     )
470                 else:
471                     c.execute(
472                         "update blocks set last_seen = ? where blocker = ? and blocked = ? and block_level = ?",
473                         (timestamp, blocker, blocked, "reject"),
474                     )
475                 if "public_comment" in peer:
476                     reason = peer["public_comment"]
477                     c.execute(
478                         "update blocks set reason = ? where blocker = ? and blocked = ? and block_level = ? and reason = ''",
479                         (reason, blocker, blocked, "reject"),
480                     )
481             conn.commit()
482         except Exception as e:
483             print("error:", e, blocker)
484 conn.close()