]> git.mxchange.org Git - fba.git/blob - fetch_blocks.py
Continued:
[fba.git] / fetch_blocks.py
1 import reqto
2 import time
3 import bs4
4 import itertools
5 import re
6 import fba
7
8 fba.cursor.execute(
9     "SELECT domain, software FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'gotosocial', 'bookwyrm', 'takahe') AND (last_blocked IS NULL OR last_blocked < ?) ORDER BY rowid DESC", [time.time() - fba.config["recheck_block"]]
10 )
11
12 for blocker, software in fba.cursor.fetchall():
13     # NOISY-DEBUG: print("DEBUG: BEFORE blocker,software:", blocker, software)
14     blockdict = []
15     blocker = fba.tidyup(blocker)
16     # NOISY-DEBUG: print("DEBUG: AFTER blocker,software:", blocker, software)
17
18     if blocker == "":
19         print("WARNING: blocker is now empty!")
20         continue
21
22     fba.update_last_blocked(blocker)
23
24     if software == "pleroma":
25         print("INFO: blocker:", blocker)
26         try:
27             # Blocks
28             json = fba.fetch_nodeinfo(blocker)
29             if json is None:
30                 print("WARNING: Could not fetch nodeinfo from blocker:", blocker)
31                 continue
32
33             print("DEBUG: Updating nodeinfo:", blocker)
34             fba.update_last_nodeinfo(blocker)
35
36             federation = json["metadata"]["federation"]
37
38             if "enabled" in federation:
39                 # NOISY-DEBUG: print("DEBUG: Instance has no block list to analyze:", blocker)
40                 continue
41
42             if "mrf_simple" in federation:
43                 for block_level, blocks in (
44                     {**federation["mrf_simple"],
45                     **{"quarantined_instances": federation["quarantined_instances"]}}
46                 ).items():
47                     # NOISY-DEBUG: print("DEBUG: block_level, blocks():", block_level, len(blocks))
48                     block_level = fba.tidyup(block_level)
49                     # NOISY-DEBUG: print("DEBUG: BEFORE block_level:", block_level)
50
51                     if block_level == "":
52                         print("WARNING: block_level is now empty!")
53                         continue
54
55                     for blocked in blocks:
56                         # NOISY-DEBUG: print("DEBUG: BEFORE blocked:", blocked)
57                         blocked = fba.tidyup(blocked)
58                         # NOISY-DEBUG: print("DEBUG: AFTER blocked:", blocked)
59
60                         if blocked == "":
61                             print("WARNING: blocked is empty after fba.tidyup():", blocker, block_level)
62                             continue
63
64                         if blocked.count("*") > 1:
65                             # -ACK!-oma also started obscuring domains without hash
66                             fba.cursor.execute(
67                                 "SELECT domain FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", [blocked.replace("*", "_")]
68                             )
69                             searchres = fba.cursor.fetchone()
70                             # NOISY-DEBUG: print("DEBUG: searchres[]:", type(searchres))
71                             if searchres != None:
72                                 blocked = searchres[0]
73                                 # NOISY-DEBUG: print("DEBUG: Looked up domain:", blocked)
74
75                         # NOISY-DEBUG: print("DEBUG: Looking up instance by domain:", blocked)
76                         fba.cursor.execute(
77                             "SELECT domain FROM instances WHERE domain = ? LIMIT 1", [blocked]
78                         )
79
80                         if fba.cursor.fetchone() == None:
81                             # NOISY-DEBUG: print("DEBUG: Domain wasn't found, adding:", blocked, blocker)
82                             fba.add_instance(blocked, blocker, argv[0])
83
84                         fba.cursor.execute(
85                             "SELECT * FROM blocks WHERE blocker = ? AND blocked = ? AND block_level = ?",
86                             (
87                                blocker,
88                                blocked,
89                                block_level
90                            ),
91                         )
92
93                         if fba.cursor.fetchone() == None:
94                             # NOISY-DEBUG: print("DEBUG: Blocking:", blocker, blocked, block_level)
95                             fba.block_instance(blocker, blocked, "unknown", block_level)
96
97                             if block_level == "reject":
98                                 # NOISY-DEBUG: print("DEBUG: Adding to blockdict:", blocked)
99                                 blockdict.append(
100                                     {
101                                         "blocked": blocked,
102                                         "reason": None
103                                     })
104                         else:
105                             # NOISY-DEBUG: print("DEBUG: Updating last_seen:", blocker, blocked, block_level)
106                             fba.update_last_seen(blocker, blocked, block_level)
107
108             fba.connection.commit()
109
110             # Reasons
111             if "mrf_simple_info" in federation:
112                 # NOISY-DEBUG: print("DEBUG: Found mrf_simple_info:", blocker)
113                 for block_level, info in (
114                     {**federation["mrf_simple_info"],
115                     **(federation["quarantined_instances_info"]
116                     if "quarantined_instances_info" in federation
117                     else {})}
118                 ).items():
119                     # NOISY-DEBUG: print("DEBUG: block_level, info.items():", block_level, len(info.items()))
120                     block_level = fba.tidyup(block_level)
121                     # NOISY-DEBUG: print("DEBUG: BEFORE block_level:", block_level)
122
123                     if block_level == "":
124                         print("WARNING: block_level is now empty!")
125                         continue
126
127                     for blocked, reason in info.items():
128                         # NOISY-DEBUG: print("DEBUG: BEFORE blocked:", blocked)
129                         blocked = fba.tidyup(blocked)
130                         # NOISY-DEBUG: print("DEBUG: AFTER blocked:", blocked)
131
132                         if blocked == "":
133                             print("WARNING: blocked is empty after fba.tidyup():", blocker, block_level)
134                             continue
135                         elif blocked.count("*") > 1:
136                             # same domain guess as above, but for reasons field
137                             fba.cursor.execute(
138                                 "SELECT domain FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", [blocked.replace("*", "_")]
139                             )
140                             searchres = fba.cursor.fetchone()
141
142                             if searchres != None:
143                                 blocked = searchres[0]
144
145                         # NOISY-DEBUG: print("DEBUG: Updating block reason:", blocker, blocked, reason["reason"])
146                         fba.update_block_reason(reason["reason"], blocker, blocked, block_level)
147
148                         for entry in blockdict:
149                             if entry["blocked"] == blocked:
150                                 # NOISY-DEBUG: print("DEBUG: Updating entry reason:", blocked)
151                                 entry["reason"] = reason["reason"]
152
153             fba.connection.commit()
154         except Exception as e:
155             print("error:", e, blocker, software)
156     elif software == "mastodon":
157         print("INFO: blocker:", blocker)
158         try:
159             # json endpoint for newer mastodongs
160             try:
161                 json = {
162                     "reject": [],
163                     "media_removal": [],
164                     "followers_only": [],
165                     "report_removal": []
166                 }
167
168                 # handling CSRF, I've saw at least one server requiring it to access the endpoint
169                 # NOISY-DEBUG: print("DEBUG: Fetching meta:", blocker)
170                 meta = bs4.BeautifulSoup(
171                     reqto.get(f"https://{blocker}/about", headers=fba.headers, timeout=(fba.config["connection_timeout"], config["read_timeout"])).text,
172                     "html.parser",
173                 )
174                 try:
175                     csrf = meta.find("meta", attrs={"name": "csrf-token"})["content"]
176                     # NOISY-DEBUG: print("DEBUG: Adding CSRF token:", blocker, csrf)
177                     reqheaders = {**fba.headers, **{"x-csrf-token": csrf}}
178                 except:
179                     # NOISY-DEBUG: print("DEBUG: No CSRF token found, using normal headers:", blocker)
180                     reqheaders = fba.headers
181
182                 # NOISY-DEBUG: print("DEBUG: Quering API domain_blocks:", blocker)
183                 blocks = reqto.get(f"https://{blocker}/api/v1/instance/domain_blocks", headers=reqheaders, timeout=(fba.config["connection_timeout"], config["read_timeout"])).json()
184
185                 # NOISY-DEBUG: print("DEBUG: blocks():", len(blocks))
186                 for block in blocks:
187                     entry = {
188                         'domain': block['domain'],
189                         'hash': block['digest'],
190                         'reason': block['comment']
191                     }
192
193                     # NOISY-DEBUG: print("DEBUG: severity,domain,hash,comment:", block['severity'], block['domain'], block['digest'], block['comment'])
194                     if block['severity'] == 'suspend':
195                         json['reject'].append(entry)
196                     elif block['severity'] == 'silence':
197                         json['followers_only'].append(entry)
198                     elif block['severity'] == 'reject_media':
199                         json['media_removal'].append(entry)
200                     elif block['severity'] == 'reject_reports':
201                         json['report_removal'].append(entry)
202                     else:
203                         print("WARNING: Unknown severity:", block['severity'], block['domain'])
204             except:
205                 # NOISY-DEBUG: print("DEBUG: Failed, Trying mastodon-specific fetches:", blocker)
206                 json = fba.get_mastodon_blocks(blocker)
207
208             # NOISY-DEBUG: print("DEBUG: json.items():", blocker, len(json.items()))
209             for block_level, blocks in json.items():
210                 # NOISY-DEBUG: print("DEBUG: blocker,block_level,blocks():", blocker, block_level, len(blocks))
211                 block_level = fba.tidyup(block_level)
212                 # NOISY-DEBUG: print("DEBUG: AFTER-block_level:", block_level)
213                 if block_level == "":
214                     print("WARNING: block_level is empty, blocker:", blocker)
215                     continue
216
217                 for instance in blocks:
218                     blocked, blocked_hash, reason = instance.values()
219                     # NOISY-DEBUG: print("DEBUG: blocked,hash,reason:", blocked, blocked_hash, reason)
220                     blocked = fba.tidyup(blocked)
221                     # NOISY-DEBUG: print("DEBUG: AFTER-blocked:", blocked)
222
223                     if blocked == "":
224                         print("WARNING: blocked is empty:", blocker)
225                         continue
226                     elif blocked.count("*") < 1:
227                         # No obsfucation for this instance
228                         fba.cursor.execute(
229                             "SELECT hash FROM instances WHERE domain = ? LIMIT 1", [blocked]
230                         )
231
232                         if fba.cursor.fetchone() == None:
233                             # NOISY-DEBUG: print("DEBUG: Hash wasn't found, adding:", blocked, blocker)
234                             fba.add_instance(blocked, blocker, argv[0])
235                     else:
236                         # Doing the hash search for instance names as well to tidy up DB
237                         fba.cursor.execute(
238                             "SELECT domain FROM instances WHERE hash = ? LIMIT 1", [blocked_hash]
239                         )
240                         searchres = fba.cursor.fetchone()
241
242                         if searchres != None:
243                             # NOISY-DEBUG: print("DEBUG: Updating domain: ", searchres[0])
244                             blocked = searchres[0]
245
246                     fba.cursor.execute(
247                         "SELECT * FROM blocks WHERE blocker = ? AND blocked = ? AND block_level = ?",
248                         (
249                             blocker,
250                             blocked if blocked.count("*") <= 1 else blocked_hash,
251                             block_level
252                         ),
253                     )
254
255                     if fba.cursor.fetchone() == None:
256                         fba.block_instance(blocker, blocked if blocked.count("*") <= 1 else blocked_hash, reason, block_level)
257
258                         if block_level == "reject":
259                             blockdict.append(
260                                 {
261                                     "blocked": blocked,
262                                     "reason": reason
263                                 })
264                     else:
265                         fba.update_last_seen(blocker, blocked if blocked.count("*") <= 1 else blocked_hash, block_level)
266
267                     if reason != '':
268                         # NOISY-DEBUG: print("DEBUG: Updating block reason:", blocker, blocked, reason)
269                         fba.update_block_reason(reason, blocker, blocked if blocked.count("*") <= 1 else blocked_hash, block_level)
270
271             fba.connection.commit()
272         except Exception as e:
273             print("error:", e, blocker, software)
274     elif software == "friendica" or software == "misskey" or software == "bookwyrm" or software == "takahe":
275         print("INFO: blocker:", blocker)
276         try:
277             if software == "friendica":
278                 json = fba.get_friendica_blocks(blocker)
279             elif software == "misskey":
280                 json = fba.get_misskey_blocks(blocker)
281             elif software == "bookwyrm":
282                 print("WARNING: bookwyrm is not fully supported for fetching blacklist!", blocker)
283                 #json = fba.get_bookwyrm_blocks(blocker)
284             elif software == "takahe":
285                 print("WARNING: takahe is not fully supported for fetching blacklist!", blocker)
286                 #json = fba.get_takahe_blocks(blocker)
287
288             for block_level, blocks in json.items():
289                 # NOISY-DEBUG: print("DEBUG: blocker,block_level,blocks():", blocker, block_level, len(blocks))
290                 block_level = fba.tidyup(block_level)
291                 # NOISY-DEBUG: print("DEBUG: AFTER-block_level:", block_level)
292                 if block_level == "":
293                     print("WARNING: block_level is empty, blocker:", blocker)
294                     continue
295
296                 for instance in blocks:
297                     blocked, reason = instance.values()
298                     # NOISY-DEBUG: print("DEBUG: BEFORE blocked:", blocked)
299                     blocked = fba.tidyup(blocked)
300                     # NOISY-DEBUG: print("DEBUG: AFTER blocked:", blocked)
301
302                     if blocked == "":
303                         print("WARNING: blocked is empty:", blocker)
304                         continue
305                     if blocked.count("*") > 0:
306                         # Some friendica servers also obscure domains without hash
307                         fba.cursor.execute(
308                             "SELECT domain FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", [blocked.replace("*", "_")]
309                         )
310                         searchres = fba.cursor.fetchone()
311                         if searchres != None:
312                             blocked = searchres[0]
313
314                     if blocked.count("?") > 0:
315                         # Some obscure them with question marks, not sure if that's dependent on version or not
316                         fba.cursor.execute(
317                             "SELECT domain FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", [blocked.replace("?", "_")]
318                         )
319                         searchres = fba.cursor.fetchone()
320                         if searchres != None:
321                             blocked = searchres[0]
322
323                     # NOISY-DEBUG: print("DEBUG: AFTER-blocked:", blocked)
324                     fba.cursor.execute(
325                         "SELECT domain FROM instances WHERE domain = ? LIMIT 1", [blocked]
326                     )
327
328                     if fba.cursor.fetchone() == None:
329                         # NOISY-DEBUG: print("DEBUG: Hash wasn't found, adding:", blocked, blocker)
330                         fba.add_instance(blocked, blocker)
331
332                     fba.cursor.execute(
333                         "SELECT * FROM blocks WHERE blocker = ? AND blocked = ?",
334                         (blocker, blocked),
335                     )
336                     if fba.cursor.fetchone() == None:
337                         fba.block_instance(blocker, blocked, reason, block_level)
338
339                         if block_level == "reject":
340                             blockdict.append(
341                                 {
342                                     "blocked": blocked,
343                                     "reason": reason
344                                 })
345                     else:
346                         fba.update_last_seen(blocker, blocked, block_level)
347
348                     if reason != '':
349                         # NOISY-DEBUG: print("DEBUG: Updating block reason:", blocker, blocked, reason)
350                         fba.update_block_reason(reason, blocker, blocked, block_level)
351
352             fba.connection.commit()
353         except Exception as e:
354             print("error:", e, blocker, software)
355     elif software == "gotosocial":
356         print("INFO: blocker:", blocker)
357         try:
358             # Blocks
359             federation = reqto.get(f"https://{blocker}{get_peers_url}?filter=suspended", headers=fba.headers, timeout=(fba.config["connection_timeout"], config["read_timeout"])).json()
360
361             if (federation == None):
362                 print("WARNING: No valid response:", blocker);
363             elif "error" in federation:
364                 print("WARNING: API returned error:", federation["error"])
365             else:
366                 # NOISY-DEBUG: print("DEBUG: Checking fenderation():", len(federation))
367                 for peer in federation:
368                     blocked = peer["domain"].lower()
369                     # NOISY-DEBUG: print("DEBUG: BEFORE blocked:", blocked)
370                     blocked = fba.tidyup(blocked)
371                     # NOISY-DEBUG: print("DEBUG: AFTER blocked:", blocked)
372
373                     if blocked == "":
374                         print("WARNING: blocked is empty:", blocker)
375                         continue
376                     elif blocked.count("*") > 0:
377                         # GTS does not have hashes for obscured domains, so we have to guess it
378                         fba.cursor.execute(
379                             "SELECT domain FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", [blocked.replace("*", "_")]
380                         )
381                         searchres = fba.cursor.fetchone()
382
383                         if searchres != None:
384                             blocked = searchres[0]
385
386                     fba.cursor.execute(
387                         "SELECT domain FROM instances WHERE domain = ? LIMIT 1", [blocked]
388                     )
389
390                     if fba.cursor.fetchone() == None:
391                         # NOISY-DEBUG: print("DEBUG: Updating block reason:", blocker, blocked, reason)
392                         fba.update_block_reason(reason, blocker, blocked, block_level)
393
394                     fba.cursor.execute(
395                         "SELECT * FROM blocks WHERE blocker = ? AND blocked = ? AND block_level = ?",
396                         (
397                             blocker,
398                             blocked,
399                             "reject"
400                         ),
401                     )
402
403                     if fba.cursor.fetchone() == None:
404                         fba.block_instance(blocker, blocked, "unknown", "reject")
405
406                         blockdict.append(
407                             {
408                                 "blocked": blocked,
409                                 "reason": None
410                             })
411                     else:
412                         fba.update_last_seen(blocker, blocked, "reject")
413
414                     if "public_comment" in peer:
415                         # NOISY-DEBUG: print("DEBUG: Updating block reason:", blocker, blocked, peer["public_comment"])
416                         fba.update_block_reason(peer["public_comment"], blocker, blocked, "reject")
417
418                         for entry in blockdict:
419                             if entry["blocked"] == blocked:
420                                 entry["reason"] = peer["public_comment"]
421
422                 fba.connection.commit()
423         except Exception as e:
424             print("error:", e, blocker, software)
425     else:
426         print("WARNING: Unknown software:", blocker, software)
427
428     if fba.config["bot_enabled"] and len(blockdict) > 0:
429         send_bot_post(blocker, blockdict)
430
431     blockdict = []
432
433 fba.connection.close()