]> git.mxchange.org Git - fba.git/blob - fetch_blocks.py
Track fetch date in DB & correctly prepend newly added descriptions
[fba.git] / fetch_blocks.py
1 from requests import get
2 from requests import post
3 from hashlib import sha256
4 import sqlite3
5 from bs4 import BeautifulSoup
6 from json import dumps
7 import re
8 from time import time
9
10 headers = {
11     "user-agent": "Mozilla/5.0 (Windows NT 10.0; rv:102.0) Gecko/20100101 Firefox/102.0"
12 }
13
14
15 def get_mastodon_blocks(domain: str) -> dict:
16     blocks = {
17         "Suspended servers": [],
18         "Filtered media": [],
19         "Limited servers": [],
20         "Silenced servers": [],
21     }
22
23     translations = {
24         "Silenced instances": "Silenced servers",
25         "Suspended instances": "Suspended servers",
26         "Gesperrte Server": "Suspended servers",
27         "Gefilterte Medien": "Filtered media",
28         "Stummgeschaltete Server": "Silenced servers",
29         "停止済みのサーバー": "Suspended servers",
30         "メディアを拒否しているサーバー": "Filtered media",
31         "サイレンス済みのサーバー": "Silenced servers",
32         "Serveurs suspendus": "Suspended servers",
33         "Médias filtrés": "Filtered media",
34         "Serveurs limités": "Silenced servers",
35     }
36
37     try:
38         doc = BeautifulSoup(
39             get(f"https://{domain}/about/more", headers=headers, timeout=5).text,
40             "html.parser",
41         )
42     except:
43         return {}
44
45     for header in doc.find_all("h3"):
46         header_text = header.text
47         if header_text in translations:
48             header_text = translations[header_text]
49         if header_text in blocks:
50             # replaced find_next_siblings with find_all_next to account for instances that e.g. hide lists in dropdown menu
51             for line in header.find_all_next("table")[0].find_all("tr")[1:]:
52                 blocks[header_text].append(
53                     {
54                         "domain": line.find("span").text,
55                         "hash": line.find("span")["title"][9:],
56                         "reason": line.find_all("td")[1].text.strip(),
57                     }
58                 )
59     return {
60         "reject": blocks["Suspended servers"],
61         "media_removal": blocks["Filtered media"],
62         "followers_only": blocks["Limited servers"]
63         + blocks["Silenced servers"],
64     }
65
66 def get_friendica_blocks(domain: str) -> dict:
67     blocks = []
68
69     try:
70         doc = BeautifulSoup(
71             get(f"https://{domain}/friendica", headers=headers, timeout=5).text,
72             "html.parser",
73         )
74     except:
75         return {}
76
77     blocklist = doc.find(id="about_blocklist")
78     for line in blocklist.find("table").find_all("tr")[1:]:
79             blocks.append(
80                 {
81                     "domain": line.find_all("td")[0].text.strip(),
82                     "reason": line.find_all("td")[1].text.strip()
83                 }
84             )
85
86     return {
87         "reject": blocks
88     }
89
90 def get_pisskey_blocks(domain: str) -> dict:
91     blocks = {
92         "suspended": [],
93         "blocked": []
94     }
95
96     try:
97         counter = 0
98         step = 99
99         while True:
100             # iterating through all "suspended" (follow-only in its terminology) instances page-by-page, since that troonware doesn't support sending them all at once
101             try:
102                 if counter == 0:
103                     doc = post(f"https://{domain}/api/federation/instances", data=dumps({"sort":"+caughtAt","host":None,"suspended":True,"limit":step}), headers=headers, timeout=5).json()
104                     if doc == []: raise
105                 else:
106                     doc = post(f"https://{domain}/api/federation/instances", data=dumps({"sort":"+caughtAt","host":None,"suspended":True,"limit":step,"offset":counter-1}), headers=headers, timeout=5).json()
107                     if doc == []: raise
108                 for instance in doc:
109                     # just in case
110                     if instance["isSuspended"]:
111                         blocks["suspended"].append(
112                             {
113                                 "domain": instance["host"],
114                                 # no reason field, nothing
115                                 "reason": ""
116                             }
117                         )
118                 counter = counter + step
119             except:
120                 counter = 0
121                 break
122
123         while True:
124             # same shit, different asshole ("blocked" aka full suspend)
125             try:
126                 if counter == 0:
127                     doc = post(f"https://{domain}/api/federation/instances", data=dumps({"sort":"+caughtAt","host":None,"blocked":True,"limit":step}), headers=headers, timeout=5).json()
128                     if doc == []: raise
129                 else:
130                     doc = post(f"https://{domain}/api/federation/instances", data=dumps({"sort":"+caughtAt","host":None,"blocked":True,"limit":step,"offset":counter-1}), headers=headers, timeout=5).json()
131                     if doc == []: raise
132                 for instance in doc:
133                     if instance["isBlocked"]:
134                         blocks["blocked"].append(
135                             {
136                                 "domain": instance["host"],
137                                 "reason": ""
138                             }
139                         )
140                 counter = counter + step
141             except:
142                 counter = 0
143                 break
144
145         return {
146             "reject": blocks["blocked"],
147             "followers_only": blocks["suspended"]
148         }
149
150     except:
151         return {}
152
153 def get_hash(domain: str) -> str:
154     return sha256(domain.encode("utf-8")).hexdigest()
155
156
157 def get_type(domain: str) -> str:
158     try:
159         res = get(f"https://{domain}/nodeinfo/2.1.json", headers=headers, timeout=5)
160         if res.status_code == 404:
161             res = get(f"https://{domain}/nodeinfo/2.0", headers=headers, timeout=5)
162         if res.status_code == 404:
163             res = get(f"https://{domain}/nodeinfo/2.0.json", headers=headers, timeout=5)
164         if res.ok and "text/html" in res.headers["content-type"]:
165             res = get(f"https://{domain}/nodeinfo/2.1", headers=headers, timeout=5)
166         if res.ok:
167             if res.json()["software"]["name"] in ["akkoma", "rebased"]:
168                 return "pleroma"
169             elif res.json()["software"]["name"] in ["hometown", "ecko"]:
170                 return "mastodon"
171             elif res.json()["software"]["name"] in ["calckey", "groundpolis", "foundkey", "cherrypick"]:
172                 return "misskey"
173             else:
174                 return res.json()["software"]["name"]
175         elif res.status_code == 404:
176             res = get(f"https://{domain}/api/v1/instance", headers=headers, timeout=5)
177         if res.ok:
178             return "mastodon"
179     except:
180         return None
181
182 def tidyup(domain: str) -> str:
183     # some retards put their blocks in variable case
184     domain = domain.lower()
185     # other retards put the port
186     domain = re.sub("\:\d+$", "", domain)
187     # bigger retards put the schema in their blocklist, sometimes even without slashes
188     domain = re.sub("^https?\:(\/*)", "", domain)
189     # and trailing slash
190     domain = re.sub("\/$", "", domain)
191     # the biggest retards of them all try to block individual users
192     domain = re.sub("(.+)\@", "", domain)
193     return domain
194
195 conn = sqlite3.connect("blocks.db")
196 c = conn.cursor()
197
198 c.execute(
199 #    "select domain, software from instances where software in ('pleroma', 'mastodon', 'friendica', 'misskey', 'gotosocial')"
200     "select domain, software from instances where domain = 'glaceon.social'"
201 )
202
203 for blocker, software in c.fetchall():
204     blocker = tidyup(blocker)
205     if software == "pleroma":
206         print(blocker)
207         try:
208             # Blocks
209             federation = get(
210                 f"https://{blocker}/nodeinfo/2.1.json", headers=headers, timeout=5
211             ).json()["metadata"]["federation"]
212             if "mrf_simple" in federation:
213                 for block_level, blocks in (
214                     {**federation["mrf_simple"],
215                     **{"quarantined_instances": federation["quarantined_instances"]}}
216                 ).items():
217                     for blocked in blocks:
218                         blocked = tidyup(blocked)
219                         if blocked == "":
220                             continue
221                         if blocked.count("*") > 1:
222                             # -ACK!-oma also started obscuring domains without hash
223                             c.execute(
224                                 "select domain from instances where domain like ? order by rowid limit 1", (blocked.replace("*", "_"),)
225                             )
226                             searchres = c.fetchone()
227                             if searchres != None:
228                                 blocked = searchres[0]
229
230                         c.execute(
231                             "select domain from instances where domain = ?", (blocked,)
232                         )
233                         if c.fetchone() == None:
234                             c.execute(
235                                 "insert into instances select ?, ?, ?",
236                                 (blocked, get_hash(blocked), get_type(blocked)),
237                             )
238                         timestamp = int(time())
239                         c.execute(
240                             "select * from blocks where blocker = ? and blocked = ? and block_level = ?",
241                             (blocker, blocked, block_level),
242                         )
243                         if c.fetchone() == None:
244                             c.execute(
245                                 "insert into blocks select ?, ?, '', ?, ?, ?",
246                                 (blocker, blocked, block_level, timestamp, timestamp),
247                             )
248                         else:
249                             c.execute(
250                                 "update blocks set last_seen = ? where blocker = ? and blocked = ? and block_level = ?",
251                                 (timestamp, blocker, blocked, block_level)
252                             )
253             conn.commit()
254             # Reasons
255             if "mrf_simple_info" in federation:
256                 for block_level, info in (
257                     {**federation["mrf_simple_info"],
258                     **(federation["quarantined_instances_info"]
259                     if "quarantined_instances_info" in federation
260                     else {})}
261                 ).items():
262                     for blocked, reason in info.items():
263                         blocked = tidyup(blocked)
264                         if blocked == "":
265                             continue
266                         if blocked.count("*") > 1:
267                             # same domain guess as above, but for reasons field
268                             c.execute(
269                                 "select domain from instances where domain like ? order by rowid limit 1", (blocked.replace("*", "_"),)
270                             )
271                             searchres = c.fetchone()
272                             if searchres != None:
273                                 blocked = searchres[0]
274                         c.execute(
275                             "update blocks set reason = ? where blocker = ? and blocked = ? and block_level = ? and reason = ''",
276                             (reason["reason"], blocker, blocked, block_level),
277                         )
278             conn.commit()
279         except Exception as e:
280             print("error:", e, blocker)
281     elif software == "mastodon":
282         print(blocker)
283         try:
284             # json endpoint for newer mastodongs
285             try:
286                 json = {
287                     "reject": [],
288                     "media_removal": [],
289                     "followers_only": [],
290                     "report_removal": []
291                 }
292                 blocks = get(
293                     f"https://{blocker}/api/v1/instance/domain_blocks", headers=headers, timeout=5
294                 ).json()
295                 for block in blocks:
296                     entry = {'domain': block['domain'], 'hash': block['digest'], 'reason': block['comment']}
297                     if block['severity'] == 'suspend':
298                         json['reject'].append(entry)
299                     elif block['severity'] == 'silence':
300                         json['followers_only'].append(entry)
301                     elif block['severity'] == 'reject_media':
302                         json['media_removal'].append(entry)
303                     elif block['severity'] == 'reject_reports':
304                         json['report_removal'].append(entry)
305             except:
306                 json = get_mastodon_blocks(blocker)
307
308             for block_level, blocks in json.items():
309                 for instance in blocks:
310                     blocked, blocked_hash, reason = instance.values()
311                     blocked = tidyup(blocked)
312                     if blocked.count("*") <= 1:
313                         c.execute(
314                             "select hash from instances where hash = ?", (blocked_hash,)
315                         )
316                         if c.fetchone() == None:
317                             c.execute(
318                                 "insert into instances select ?, ?, ?",
319                                 (blocked, get_hash(blocked), get_type(blocked)),
320                             )
321                     else:
322                         # Doing the hash search for instance names as well to tidy up DB
323                         c.execute(
324                             "select domain from instances where hash = ?", (blocked_hash,)
325                         )
326                         searchres = c.fetchone()
327                         if searchres != None:
328                             blocked = searchres[0]
329
330                     timestamp = int(time())
331                     c.execute(
332                         "select * from blocks where blocker = ? and blocked = ? and block_level = ?",
333                         (blocker, blocked if blocked.count("*") <= 1 else blocked_hash, block_level),
334                     )
335                     if c.fetchone() == None:
336                         c.execute(
337                             "insert into blocks select ?, ?, ?, ?, ?, ?",
338                             (
339                                 blocker,
340                                 blocked if blocked.count("*") <= 1 else blocked_hash,
341                                 reason,
342                                 block_level,
343                                 timestamp,
344                                 timestamp,
345                             ),
346                         )
347                     else:
348                         c.execute(
349                             "update blocks set last_seen = ? where blocker = ? and blocked = ? and block_level = ?",
350                             (timestamp, blocker, blocked if blocked.count("*") <= 1 else blocked_hash, block_level),
351                         )
352                     if reason != '':
353                         c.execute(
354                             "update blocks set reason = ? where blocker = ? and blocked = ? and block_level = ? and reason = ''",
355                             (reason, blocker, blocked if blocked.count("*") <= 1 else blocked_hash, block_level),
356                         )
357             conn.commit()
358         except Exception as e:
359             print("error:", e, blocker)
360     elif software == "friendica" or software == "misskey":
361         print(blocker)
362         try:
363             if software == "friendica":
364                 json = get_friendica_blocks(blocker)
365             elif software == "misskey":
366                 json = get_pisskey_blocks(blocker)
367             for block_level, blocks in json.items():
368                 for instance in blocks:
369                     blocked, reason = instance.values()
370                     blocked = tidyup(blocked)
371
372                     if blocked.count("*") > 0:
373                         # Some friendica servers also obscure domains without hash
374                         c.execute(
375                             "select domain from instances where domain like ? order by rowid limit 1", (blocked.replace("*", "_"),)
376                         )
377                         searchres = c.fetchone()
378                         if searchres != None:
379                             blocked = searchres[0]
380
381                     if blocked.count("?") > 0:
382                         # Some obscure them with question marks, not sure if that's dependent on version or not
383                         c.execute(
384                             "select domain from instances where domain like ? order by rowid limit 1", (blocked.replace("?", "_"),)
385                         )
386                         searchres = c.fetchone()
387                         if searchres != None:
388                             blocked = searchres[0]
389
390                     c.execute(
391                         "select domain from instances where domain = ?", (blocked,)
392                     )
393                     if c.fetchone() == None:
394                         c.execute(
395                             "insert into instances select ?, ?, ?",
396                             (blocked, get_hash(blocked), get_type(blocked)),
397                         )
398
399                     timestamp = int(time())
400                     c.execute(
401                         "select * from blocks where blocker = ? and blocked = ? and reason = ?",
402                         (blocker, blocked, reason),
403                     )
404                     if c.fetchone() == None:
405                         c.execute(
406                             "insert into blocks select ?, ?, ?, ?, ?, ?",
407                             (
408                                 blocker,
409                                 blocked,
410                                 reason,
411                                 block_level,
412                                 timestamp,
413                                 timestamp
414                             ),
415                         )
416                     else:
417                         c.execute(
418                             "update blocks set last_seen = ? where blocker = ? and blocked = ? and block_level = ?",
419                             (timestamp, blocker, blocked, block_level),
420                         )
421                     if reason != '':
422                         c.execute(
423                             "update blocks set reason = ? where blocker = ? and blocked = ? and block_level = ? and reason = ''",
424                             (reason, blocker, blocked, block_level),
425                         )
426             conn.commit()
427         except Exception as e:
428             print("error:", e, blocker)
429     elif software == "gotosocial":
430         print(blocker)
431         try:
432             # Blocks
433             federation = get(
434                 f"https://{blocker}/api/v1/instance/peers?filter=suspended", headers=headers, timeout=5
435             ).json()
436             for peer in federation:
437                 blocked = peer["domain"].lower()
438
439                 if blocked.count("*") > 0:
440                     # GTS does not have hashes for obscured domains, so we have to guess it
441                     c.execute(
442                         "select domain from instances where domain like ? order by rowid limit 1", (blocked.replace("*", "_"),)
443                     )
444                     searchres = c.fetchone()
445                     if searchres != None:
446                         blocked = searchres[0]
447
448                 c.execute(
449                     "select domain from instances where domain = ?", (blocked,)
450                 )
451                 if c.fetchone() == None:
452                     c.execute(
453                         "insert into instances select ?, ?, ?",
454                         (blocked, get_hash(blocked), get_type(blocked)),
455                     )
456                 c.execute(
457                     "select * from blocks where blocker = ? and blocked = ? and block_level = ?",
458                     (blocker, blocked, "reject"),
459                 )
460                 timestamp = int(time())
461                 if c.fetchone() == None:
462                     c.execute(
463                         "insert into blocks select ?, ?, ?, ?, ?, ?",
464                            (blocker, blocked, "", "reject", timestamp, timestamp),
465                     )
466                 else:
467                     c.execute(
468                         "update blocks set last_seen = ? where blocker = ? and blocked = ? and block_level = ?",
469                         (timestamp, blocker, blocked, "reject"),
470                     )
471                 if "public_comment" in peer:
472                     reason = peer["public_comment"]
473                     c.execute(
474                         "update blocks set reason = ? where blocker = ? and blocked = ? and block_level = ? and reason = ''",
475                         (reason, blocker, blocked, "reject"),
476                     )
477             conn.commit()
478         except Exception as e:
479             print("error:", e, blocker)
480 conn.close()