]> git.mxchange.org Git - fba.git/blob - fetch_blocks.py
Continued:
[fba.git] / fetch_blocks.py
1 import reqto
2 import time
3 import bs4
4 import fba
5 import itertools
6 import re
7
8 fba.c.execute(
9     "SELECT domain, software FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'gotosocial')"
10 )
11
12 for blocker, software in fba.c.fetchall():
13     # NOISY-DEBUG: print("DEBUG: BEFORE-blocker,software:", blocker, software)
14     blockdict = []
15     blocker = fba.tidyup(blocker)
16     # NOISY-DEBUG: print("DEBUG: AFTER-blocker,software:", blocker, software)
17
18     if blocker == "":
19         print("WARNING: blocker is now empty!")
20         continue
21
22     if software == "pleroma":
23         print("INFO: blocker:", blocker)
24         try:
25             # Blocks
26             federation = reqto.get(
27                 f"https://{blocker}/nodeinfo/2.1.json", headers=fba.headers, timeout=5
28             ).json()["metadata"]["federation"]
29             if "mrf_simple" in federation:
30                 for block_level, blocks in (
31                     {**federation["mrf_simple"],
32                     **{"quarantined_instances": federation["quarantined_instances"]}}
33                 ).items():
34                     # NOISY-DEBUG: print("DEBUG: block_level, blocks.items():", block_level, len(blocks.items()))
35                     block_level = fba.tidyup(block_level)
36                     # NOISY-DEBUG: print("DEBUG: BEFORE block_level:", block_level)
37
38                     if block_level == "":
39                         print("WARNING: block_level is now empty!")
40                         continue
41
42                     for blocked in blocks:
43                         # NOISY-DEBUG: print("DEBUG: BEFORE blocked:", blocked)
44                         blocked = fba.tidyup(blocked)
45                         # NOISY-DEBUG: print("DEBUG: AFTER blocked:", blocked)
46
47                         if blocked == "":
48                             print("WARNING: blocked is empty after fba.tidyup():", blocker, block_level)
49                             continue
50
51                         if blocked.count("*") > 1:
52                             # -ACK!-oma also started obscuring domains without hash
53                             fba.c.execute(
54                                 "SELECT domain FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", (blocked.replace("*", "_"),)
55                             )
56                             searchres = fba.c.fetchone()
57                             # NOISY-DEBUG: print("DEBUG: searchres[]:", type(searchres))
58                             if searchres != None:
59                                 blocked = searchres[0]
60                                 # NOISY-DEBUG: print("DEBUG: Looked up domain:", blocked)
61
62                         # NOISY-DEBUG: print("DEBUG: Looking up instance by domain:", blocked)
63                         fba.c.execute(
64                             "SELECT domain FROM instances WHERE domain = ?", (blocked,)
65                         )
66
67                         if fba.c.fetchone() == None:
68                             # NOISY-DEBUG: print("DEBUG: Domain wasn't found, adding:", blocked)
69                             fba.add_instance(blocked)
70
71                         timestamp = int(time.time())
72                         fba.c.execute(
73                             "SELECT * FROM blocks WHERE blocker = ? AND blocked = ? AND block_level = ?",
74                             (
75                                blocker,
76                                blocked,
77                                block_level
78                            ),
79                         )
80
81                         if fba.c.fetchone() == None:
82                             # NOISY-DEBUG: print("DEBUG: Blocking:", blocker, blocked, block_level)
83                             fba.block_instance(blocker, blocked, "unknown", block_level, timestamp, timestamp)
84
85                             if block_level == "reject":
86                                 # NOISY-DEBUG: print("DEBUG: Adding to blockdict:", blocked)
87                                 blockdict.append(
88                                     {
89                                         "blocked": blocked,
90                                         "reason": None
91                                     })
92                         else:
93                             # NOISY-DEBUG: print("DEBUG: Updating last_seen:", blocker, blocked, block_level)
94                             fba.update_last_seen(timestamp, blocker, blocked, block_level)
95
96             fba.conn.commit()
97
98             # Reasons
99             if "mrf_simple_info" in federation:
100                 # NOISY-DEBUG: print("DEBUG: Found mrf_simple_info:", blocker)
101                 for block_level, info in (
102                     {**federation["mrf_simple_info"],
103                     **(federation["quarantined_instances_info"]
104                     if "quarantined_instances_info" in federation
105                     else {})}
106                 ).items():
107                     # NOISY-DEBUG: print("DEBUG: block_level, info.items():", block_level, len(info.items()))
108                     block_level = fba.tidyup(block_level)
109                     # NOISY-DEBUG: print("DEBUG: BEFORE block_level:", block_level)
110
111                     if block_level == "":
112                         print("WARNING: block_level is now empty!")
113                         continue
114
115                     for blocked, reason in info.items():
116                         # NOISY-DEBUG: print("DEBUG: BEFORE blocked:", blocked)
117                         blocked = fba.tidyup(blocked)
118                         # NOISY-DEBUG: print("DEBUG: AFTER blocked:", blocked)
119
120                         if blocked == "":
121                             print("WARNING: blocked is empty after fba.tidyup():", blocker, block_level)
122                             continue
123                         elif blocked.count("*") > 1:
124                             # same domain guess as above, but for reasons field
125                             fba.c.execute(
126                                 "SELECT domain FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", (blocked.replace("*", "_"),)
127                             )
128                             searchres = fba.c.fetchone()
129
130                             if searchres != None:
131                                 blocked = searchres[0]
132
133                         # NOISY-DEBUG: print("DEBUG: Updating block reason:", blocker, blocked, reason["reason"])
134                         fba.update_block_reason(reason["reason"], blocker, blocked, block_level)
135
136                         for entry in blockdict:
137                             if entry["blocked"] == blocked:
138                                 # NOISY-DEBUG: print("DEBUG: Updating entry reason:", blocked)
139                                 entry["reason"] = reason["reason"]
140
141             fba.conn.commit()
142         except Exception as e:
143             print("error:", e, blocker, software)
144     elif software == "mastodon":
145         print("INFO: blocker:", blocker)
146         try:
147             # json endpoint for newer mastodongs
148             try:
149                 json = {
150                     "reject": [],
151                     "media_removal": [],
152                     "followers_only": [],
153                     "report_removal": []
154                 }
155
156                 # handling CSRF, I've saw at least one server requiring it to access the endpoint
157                 # NOISY-DEBUG: print("DEBUG: Fetching meta:", blocker)
158                 meta = bs4.BeautifulSoup(
159                     reqto.get(f"https://{blocker}/about", headers=fba.headers, timeout=5).text,
160                     "html.parser",
161                 )
162                 try:
163                     csrf = meta.find("meta", attrs={"name": "csrf-token"})["content"]
164                     # NOISY-DEBUG: print("DEBUG: Adding CSRF token:", blocker, csrf)
165                     reqheaders = {**fba.headers, **{"x-csrf-token": csrf}}
166                 except:
167                     # NOISY-DEBUG: print("DEBUG: No CSRF token found, using normal headers:", blocker)
168                     reqheaders = fba.headers
169
170                 # NOISY-DEBUG: print("DEBUG: Quering API domain_blocks:", blocker)
171                 blocks = reqto.get(
172                     f"https://{blocker}/api/v1/instance/domain_blocks", headers=reqheaders, timeout=5
173                 ).json()
174
175                 # NOISY-DEBUG: print("DEBUG: blocks():", len(blocks))
176                 for block in blocks:
177                     entry = {
178                         'domain': block['domain'],
179                         'hash': block['digest'],
180                         'reason': block['comment']
181                     }
182
183                     # NOISY-DEBUG: print("DEBUG: severity,domain,hash,comment:", block['severity'], block['domain'], block['digest'], block['comment'])
184                     if block['severity'] == 'suspend':
185                         json['reject'].append(entry)
186                     elif block['severity'] == 'silence':
187                         json['followers_only'].append(entry)
188                     elif block['severity'] == 'reject_media':
189                         json['media_removal'].append(entry)
190                     elif block['severity'] == 'reject_reports':
191                         json['report_removal'].append(entry)
192                     else:
193                         print("WARNING: Unknown severity:", block['severity'], block['domain'])
194             except:
195                 # NOISY-DEBUG: print("DEBUG: Failed, Trying mastodon-specific fetches:", blocker)
196                 json = fba.get_mastodon_blocks(blocker)
197
198             # NOISY-DEBUG: print("DEBUG: json.items():", blocker, len(json.items()))
199             for block_level, blocks in json.items():
200                 # NOISY-DEBUG: print("DEBUG: blocker,block_level,blocks():", blocker, block_level, len(blocks))
201                 block_level = fba.tidyup(block_level)
202                 # NOISY-DEBUG: print("DEBUG: AFTER-block_level:", block_level)
203                 if block_level == "":
204                     print("WARNING: block_level is empty, blocker:", blocker)
205                     continue
206
207                 for instance in blocks:
208                     blocked, blocked_hash, reason = instance.values()
209                     # NOISY-DEBUG: print("DEBUG: blocked,hash,reason:", blocked, blocked_hash, reason)
210                     blocked = fba.tidyup(blocked)
211                     # NOISY-DEBUG: print("DEBUG: AFTER-blocked:", blocked)
212
213                     if blocked == "":
214                         print("WARNING: blocked is empty:", blocker)
215                         continue
216                     elif blocked.count("*") < 1:
217                         # No obsfucation for this instance
218                         fba.c.execute(
219                             "SELECT hash FROM instances WHERE domain = ? LIMIT 1", (blocked,)
220                         )
221
222                         if fba.c.fetchone() == None:
223                             # NOISY-DEBUG: print("DEBUG: Hash wasn't found, adding:", blocked)
224                             fba.add_instance(blocked)
225                     else:
226                         # Doing the hash search for instance names as well to tidy up DB
227                         fba.c.execute(
228                             "SELECT domain FROM instances WHERE hash = ? LIMIT 1", (blocked_hash,)
229                         )
230                         searchres = fba.c.fetchone()
231
232                         if searchres != None:
233                             # NOISY-DEBUG: print("DEBUG: Updating domain: ", searchres[0])
234                             blocked = searchres[0]
235
236                     timestamp = int(time.time())
237                     fba.c.execute(
238                         "SELECT * FROM blocks WHERE blocker = ? AND blocked = ? AND block_level = ?",
239                         (
240                             blocker,
241                             blocked if blocked.count("*") <= 1 else blocked_hash,
242                             block_level
243                         ),
244                     )
245
246                     if fba.c.fetchone() == None:
247                         fba.block_instance(blocker, blocked if blocked.count("*") <= 1 else blocked_hash, reason, block_level, timestamp, timestamp)
248
249                         if block_level == "reject":
250                             blockdict.append(
251                                 {
252                                     "blocked": blocked,
253                                     "reason": reason
254                                 })
255                     else:
256                         fba.update_last_seen(timestamp, blocker, blocked if blocked.count("*") <= 1 else blocked_hash, block_level)
257
258                     if reason != '':
259                         # NOISY-DEBUG: print("DEBUG: Updating block reason:", blocker, blocked, reason)
260                         fba.update_block_reason(reason, blocker, blocked if blocked.count("*") <= 1 else blocked_hash, block_level)
261
262             fba.conn.commit()
263         except Exception as e:
264             print("error:", e, blocker, software)
265     elif software == "friendica" or software == "misskey":
266         print("INFO: blocker:", blocker)
267         try:
268             if software == "friendica":
269                 json = fba.get_friendica_blocks(blocker)
270             elif software == "misskey":
271                 json = fba.get_misskey_blocks(blocker)
272
273             for block_level, blocks in json.items():
274                 # NOISY-DEBUG: print("DEBUG: blocker,block_level,blocks():", blocker, block_level, len(blocks))
275                 block_level = fba.tidyup(block_level)
276                 # NOISY-DEBUG: print("DEBUG: AFTER-block_level:", block_level)
277                 if block_level == "":
278                     print("WARNING: block_level is empty, blocker:", blocker)
279                     continue
280
281                 for instance in blocks:
282                     blocked, reason = instance.values()
283                     # NOISY-DEBUG: print("DEBUG: BEFORE-blocked:", blocked)
284                     blocked = fba.tidyup(blocked)
285                     # NOISY-DEBUG: print("DEBUG: AFTER-blocked:", blocked)
286
287                     if blocked == "":
288                         print("WARNING: blocked is empty:", blocker)
289                         continue
290                     if blocked.count("*") > 0:
291                         # Some friendica servers also obscure domains without hash
292                         fba.c.execute(
293                             "SELECT domain FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", (blocked.replace("*", "_"),)
294                         )
295                         searchres = fba.c.fetchone()
296                         if searchres != None:
297                             blocked = searchres[0]
298
299                     if blocked.count("?") > 0:
300                         # Some obscure them with question marks, not sure if that's dependent on version or not
301                         fba.c.execute(
302                             "SELECT domain FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", (blocked.replace("?", "_"),)
303                         )
304                         searchres = fba.c.fetchone()
305                         if searchres != None:
306                             blocked = searchres[0]
307
308                     # NOISY-DEBUG: print("DEBUG: AFTER-blocked:", blocked)
309                     fba.c.execute(
310                         "SELECT domain FROM instances WHERE domain = ?", (blocked,)
311                     )
312
313                     if fba.c.fetchone() == None:
314                         # NOISY-DEBUG: print("DEBUG: Hash wasn't found, adding:", blocked)
315                         fba.add_instance(blocked)
316
317                     timestamp = int(time.time())
318                     fba.c.execute(
319                         "SELECT * FROM blocks WHERE blocker = ? AND blocked = ?",
320                         (blocker, blocked),
321                     )
322                     if fba.c.fetchone() == None:
323                         fba.block_instance(blocker, blocked, reason, block_level, timestamp, timestamp)
324
325                         if block_level == "reject":
326                             blockdict.append(
327                                 {
328                                     "blocked": blocked,
329                                     "reason": reason
330                                 })
331                     else:
332                         fba.update_last_seen(timestamp, blocker, blocked, block_level)
333
334                     if reason != '':
335                         # NOISY-DEBUG: print("DEBUG: Updating block reason:", blocker, blocked, reason)
336                         fba.update_block_reason(reason, blocker, blocked, block_level)
337
338             fba.conn.commit()
339         except Exception as e:
340             print("error:", e, blocker, software)
341     elif software == "gotosocial":
342         print("INFO: blocker:", blocker)
343         try:
344             # Blocks
345             federation = reqto.get(
346                 f"https://{blocker}/api/v1/instance/peers?filter=suspended", headers=fba.headers, timeout=5
347             ).json()
348
349             if (federation == None):
350                 print("WARNING: No valid response:", blocker);
351             else:
352                 for peer in federation:
353                     # NOISY-DEBUG: print("DEBUG: peer(),[]:", len(peer), type(peer))
354                     if (isinstance(peer, str) and peer == "error"):
355                         print("WARNING: Cannot continue, maybe authentication required?", blocker)
356                         break
357
358                     blocked = peer["domain"].lower()
359                     # NOISY-DEBUG: print("DEBUG: BEFORE-blocked:", blocked)
360                     blocked = fba.tidyup(blocked)
361                     # NOISY-DEBUG: print("DEBUG: AFTER-blocked:", blocked)
362
363                     if blocked == "":
364                         print("WARNING: blocked is empty:", blocker)
365                         continue
366                     if blocked.count("*") > 0:
367                         # GTS does not have hashes for obscured domains, so we have to guess it
368                         fba.c.execute(
369                             "SELECT domain FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", (blocked.replace("*", "_"),)
370                         )
371                         searchres = fba.c.fetchone()
372
373                         if searchres != None:
374                             blocked = searchres[0]
375
376                     fba.c.execute(
377                         "SELECT domain FROM instances WHERE domain = ?", (blocked,)
378                     )
379
380                     if fba.c.fetchone() == None:
381                         # NOISY-DEBUG: print("DEBUG: Hash wasn't found, adding:", blocked)
382                         fba.add_instance(blocked)
383
384                     fba.c.execute(
385                         "SELECT * FROM blocks WHERE blocker = ? AND blocked = ? AND block_level = ?",
386                         (
387                             blocker,
388                             blocked,
389                             "reject"
390                         ),
391                     )
392                     timestamp = int(time.time())
393
394                     if fba.c.fetchone() == None:
395                         fba.block_instance(blocker, blocked, "unknown", "reject", timestamp, timestamp)
396
397                         blockdict.append(
398                             {
399                                 "blocked": blocked,
400                                 "reason": None
401                             })
402                     else:
403                         fba.update_last_seen(timestamp, blocker, blocked, "reject")
404
405                     if "public_comment" in peer:
406                         reason = peer["public_comment"]
407                         # NOISY-DEBUG: print("DEBUG: Updating block reason:", blocker, blocked, reason)
408                         fba.update_block_reason(reason, blocker, blocked, "reject")
409
410                         for entry in blockdict:
411                             if entry["blocked"] == blocked:
412                                 entry["reason"] = reason
413
414                 fba.conn.commit()
415         except Exception as e:
416             print("error:", e, blocker, software)
417     else:
418         print("WARNING: Unknown software:", software)
419
420     if fba.config["bot_enabled"] and len(blockdict) > 0:
421         send_bot_post(blocker, blockdict)
422
423     blockdict = []
424
425 fba.conn.close()