]> git.mxchange.org Git - fba.git/blob - fetch_blocks.py
Continued:
[fba.git] / fetch_blocks.py
1 import reqto
2 import time
3 import bs4
4 import itertools
5 import re
6 import fba
7
8 fba.cursor.execute(
9     "SELECT domain, software FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'gotosocial', 'bookwyrm', 'takahe') AND (last_blocked IS NULL OR last_blocked < ?) ORDER BY rowid DESC", [time.time() - fba.config["recheck_block"]]
10 )
11
12 rows = fba.cursor.fetchall()
13 print(f"INFO: Checking {len(rows)} entries ...")
14 for blocker, software in rows:
15     # NOISY-DEBUG: print("DEBUG: BEFORE blocker,software:", blocker, software)
16     blockdict = []
17     blocker = fba.tidyup(blocker)
18     # NOISY-DEBUG: print("DEBUG: AFTER blocker,software:", blocker, software)
19
20     if blocker == "":
21         print("WARNING: blocker is now empty!")
22         continue
23     elif fba.is_blacklisted(blocker):
24         print(f"WARNING: blocker='{blocker}' is blacklisted now!")
25         continue
26
27     # NOISY-DEBUG: print(f"DEBUG: blocker='{blocker}'")
28     fba.update_last_blocked(blocker)
29
30     if software == "pleroma":
31         print("INFO: blocker:", blocker)
32         try:
33             # Blocks
34             json = fba.fetch_nodeinfo(blocker)
35             if json is None:
36                 print("WARNING: Could not fetch nodeinfo from blocker:", blocker)
37                 continue
38
39             print("DEBUG: Updating nodeinfo:", blocker)
40             fba.update_last_nodeinfo(blocker)
41
42             federation = json["metadata"]["federation"]
43
44             if "enabled" in federation:
45                 # NOISY-DEBUG: print("DEBUG: Instance has no block list to analyze:", blocker)
46                 continue
47
48             if "mrf_simple" in federation:
49                 for block_level, blocks in (
50                     {**federation["mrf_simple"],
51                     **{"quarantined_instances": federation["quarantined_instances"]}}
52                 ).items():
53                     # NOISY-DEBUG: print("DEBUG: block_level, blocks():", block_level, len(blocks))
54                     block_level = fba.tidyup(block_level)
55                     # NOISY-DEBUG: print("DEBUG: BEFORE block_level:", block_level)
56
57                     if block_level == "":
58                         print("WARNING: block_level is now empty!")
59                         continue
60
61                     for blocked in blocks:
62                         # NOISY-DEBUG: print("DEBUG: BEFORE blocked:", blocked)
63                         blocked = fba.tidyup(blocked)
64                         # NOISY-DEBUG: print("DEBUG: AFTER blocked:", blocked)
65
66                         if blocked == "":
67                             print("WARNING: blocked is empty after fba.tidyup():", blocker, block_level)
68                             continue
69
70                         if blocked.count("*") > 1:
71                             # -ACK!-oma also started obscuring domains without hash
72                             fba.cursor.execute(
73                                 "SELECT domain FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", [blocked.replace("*", "_")]
74                             )
75                             searchres = fba.cursor.fetchone()
76                             # NOISY-DEBUG: print("DEBUG: searchres[]:", type(searchres))
77                             if searchres != None:
78                                 blocked = searchres[0]
79                                 # NOISY-DEBUG: print("DEBUG: Looked up domain:", blocked)
80
81                         # NOISY-DEBUG: print("DEBUG: Looking up instance by domain:", blocked)
82                         if not fba.is_instance_registered(blocked):
83                             # NOISY-DEBUG: print("DEBUG: Domain wasn't found, adding:", blocked, blocker)
84                             fba.add_instance(blocked, blocker, argv[0])
85
86                         fba.cursor.execute(
87                             "SELECT * FROM blocks WHERE blocker = ? AND blocked = ? AND block_level = ? LIMIT 1",
88                             (
89                                blocker,
90                                blocked,
91                                block_level
92                            ),
93                         )
94
95                         if fba.cursor.fetchone() == None:
96                             # NOISY-DEBUG: print("DEBUG: Blocking:", blocker, blocked, block_level)
97                             fba.block_instance(blocker, blocked, "unknown", block_level)
98
99                             if block_level == "reject":
100                                 # NOISY-DEBUG: print("DEBUG: Adding to blockdict:", blocked)
101                                 blockdict.append(
102                                     {
103                                         "blocked": blocked,
104                                         "reason": None
105                                     })
106                         else:
107                             # NOISY-DEBUG: print("DEBUG: Updating last_seen:", blocker, blocked, block_level)
108                             fba.update_last_seen(blocker, blocked, block_level)
109
110             fba.connection.commit()
111
112             # Reasons
113             if "mrf_simple_info" in federation:
114                 # NOISY-DEBUG: print("DEBUG: Found mrf_simple_info:", blocker)
115                 for block_level, info in (
116                     {**federation["mrf_simple_info"],
117                     **(federation["quarantined_instances_info"]
118                     if "quarantined_instances_info" in federation
119                     else {})}
120                 ).items():
121                     # NOISY-DEBUG: print("DEBUG: block_level, info.items():", block_level, len(info.items()))
122                     block_level = fba.tidyup(block_level)
123                     # NOISY-DEBUG: print("DEBUG: BEFORE block_level:", block_level)
124
125                     if block_level == "":
126                         print("WARNING: block_level is now empty!")
127                         continue
128
129                     for blocked, reason in info.items():
130                         # NOISY-DEBUG: print("DEBUG: BEFORE blocked:", blocked)
131                         blocked = fba.tidyup(blocked)
132                         # NOISY-DEBUG: print("DEBUG: AFTER blocked:", blocked)
133
134                         if blocked == "":
135                             print("WARNING: blocked is empty after fba.tidyup():", blocker, block_level)
136                             continue
137                         elif blocked.count("*") > 1:
138                             # same domain guess as above, but for reasons field
139                             fba.cursor.execute(
140                                 "SELECT domain FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", [blocked.replace("*", "_")]
141                             )
142                             searchres = fba.cursor.fetchone()
143
144                             if searchres != None:
145                                 blocked = searchres[0]
146
147                         # NOISY-DEBUG: print("DEBUG: Updating block reason:", blocker, blocked, reason["reason"])
148                         fba.update_block_reason(reason["reason"], blocker, blocked, block_level)
149
150                         for entry in blockdict:
151                             if entry["blocked"] == blocked:
152                                 # NOISY-DEBUG: print("DEBUG: Updating entry reason:", blocked)
153                                 entry["reason"] = reason["reason"]
154
155             fba.connection.commit()
156         except Exception as e:
157             print(f"ERROR: blocker='{blocker}',software='{software}',exception='{e}'")
158     elif software == "mastodon":
159         print("INFO: blocker:", blocker)
160         try:
161             # json endpoint for newer mastodongs
162             try:
163                 json = {
164                     "reject": [],
165                     "media_removal": [],
166                     "followers_only": [],
167                     "report_removal": []
168                 }
169
170                 # handling CSRF, I've saw at least one server requiring it to access the endpoint
171                 # NOISY-DEBUG: print("DEBUG: Fetching meta:", blocker)
172                 meta = bs4.BeautifulSoup(
173                     reqto.get(f"https://{blocker}/about", headers=fba.headers, timeout=(fba.config["connection_timeout"], config["read_timeout"])).text,
174                     "html.parser",
175                 )
176                 try:
177                     csrf = meta.find("meta", attrs={"name": "csrf-token"})["content"]
178                     # NOISY-DEBUG: print("DEBUG: Adding CSRF token:", blocker, csrf)
179                     reqheaders = {**fba.headers, **{"x-csrf-token": csrf}}
180                 except:
181                     # NOISY-DEBUG: print("DEBUG: No CSRF token found, using normal headers:", blocker)
182                     reqheaders = fba.headers
183
184                 # NOISY-DEBUG: print("DEBUG: Quering API domain_blocks:", blocker)
185                 blocks = reqto.get(f"https://{blocker}/api/v1/instance/domain_blocks", headers=reqheaders, timeout=(fba.config["connection_timeout"], config["read_timeout"])).json()
186
187                 # NOISY-DEBUG: print("DEBUG: blocks():", len(blocks))
188                 for block in blocks:
189                     entry = {
190                         'domain': block['domain'],
191                         'hash': block['digest'],
192                         'reason': block['comment']
193                     }
194
195                     # NOISY-DEBUG: print("DEBUG: severity,domain,hash,comment:", block['severity'], block['domain'], block['digest'], block['comment'])
196                     if block['severity'] == 'suspend':
197                         json['reject'].append(entry)
198                     elif block['severity'] == 'silence':
199                         json['followers_only'].append(entry)
200                     elif block['severity'] == 'reject_media':
201                         json['media_removal'].append(entry)
202                     elif block['severity'] == 'reject_reports':
203                         json['report_removal'].append(entry)
204                     else:
205                         print("WARNING: Unknown severity:", block['severity'], block['domain'])
206             except:
207                 # NOISY-DEBUG: print("DEBUG: Failed, Trying mastodon-specific fetches:", blocker)
208                 json = fba.get_mastodon_blocks(blocker)
209
210             # NOISY-DEBUG: print("DEBUG: json.items():", blocker, len(json.items()))
211             for block_level, blocks in json.items():
212                 # NOISY-DEBUG: print("DEBUG: blocker,block_level,blocks():", blocker, block_level, len(blocks))
213                 block_level = fba.tidyup(block_level)
214                 # NOISY-DEBUG: print("DEBUG: AFTER-block_level:", block_level)
215                 if block_level == "":
216                     print("WARNING: block_level is empty, blocker:", blocker)
217                     continue
218
219                 for instance in blocks:
220                     blocked, blocked_hash, reason = instance.values()
221                     # NOISY-DEBUG: print("DEBUG: blocked,hash,reason:", blocked, blocked_hash, reason)
222                     blocked = fba.tidyup(blocked)
223                     # NOISY-DEBUG: print("DEBUG: AFTER-blocked:", blocked)
224
225                     if blocked == "":
226                         print("WARNING: blocked is empty:", blocker)
227                         continue
228                     elif blocked.count("*") < 1:
229                         # No obsfucation for this instance
230                         fba.cursor.execute(
231                             "SELECT hash FROM instances WHERE domain = ? LIMIT 1", [blocked]
232                         )
233
234                         if fba.cursor.fetchone() == None:
235                             # NOISY-DEBUG: print("DEBUG: Hash wasn't found, adding:", blocked, blocker)
236                             fba.add_instance(blocked, blocker, argv[0])
237                     else:
238                         # Doing the hash search for instance names as well to tidy up DB
239                         fba.cursor.execute(
240                             "SELECT domain FROM instances WHERE hash = ? LIMIT 1", [blocked_hash]
241                         )
242                         searchres = fba.cursor.fetchone()
243
244                         if searchres != None:
245                             # NOISY-DEBUG: print("DEBUG: Updating domain: ", searchres[0])
246                             blocked = searchres[0]
247
248                     fba.cursor.execute(
249                         "SELECT * FROM blocks WHERE blocker = ? AND blocked = ? AND block_level = ? LIMIT 1",
250                         (
251                             blocker,
252                             blocked if blocked.count("*") <= 1 else blocked_hash,
253                             block_level
254                         ),
255                     )
256
257                     if fba.cursor.fetchone() == None:
258                         fba.block_instance(blocker, blocked if blocked.count("*") <= 1 else blocked_hash, reason, block_level)
259
260                         if block_level == "reject":
261                             blockdict.append(
262                                 {
263                                     "blocked": blocked,
264                                     "reason": reason
265                                 })
266                     else:
267                         fba.update_last_seen(blocker, blocked if blocked.count("*") <= 1 else blocked_hash, block_level)
268
269                     if reason != "":
270                         # NOISY-DEBUG: print("DEBUG: Updating block reason:", blocker, blocked, reason)
271                         fba.update_block_reason(reason, blocker, blocked if blocked.count("*") <= 1 else blocked_hash, block_level)
272
273             fba.connection.commit()
274         except Exception as e:
275             print(f"ERROR: blocker='{blocker}',software='{software}',exception='{e}'")
276     elif software == "friendica" or software == "misskey" or software == "bookwyrm" or software == "takahe":
277         print("INFO: blocker:", blocker)
278         try:
279             if software == "friendica":
280                 json = fba.get_friendica_blocks(blocker)
281             elif software == "misskey":
282                 json = fba.get_misskey_blocks(blocker)
283             elif software == "bookwyrm":
284                 print("WARNING: bookwyrm is not fully supported for fetching blacklist!", blocker)
285                 #json = fba.get_bookwyrm_blocks(blocker)
286             elif software == "takahe":
287                 print("WARNING: takahe is not fully supported for fetching blacklist!", blocker)
288                 #json = fba.get_takahe_blocks(blocker)
289
290             for block_level, blocks in json.items():
291                 # NOISY-DEBUG: print("DEBUG: blocker,block_level,blocks():", blocker, block_level, len(blocks))
292                 block_level = fba.tidyup(block_level)
293                 # NOISY-DEBUG: print("DEBUG: AFTER-block_level:", block_level)
294                 if block_level == "":
295                     print("WARNING: block_level is empty, blocker:", blocker)
296                     continue
297
298                 for instance in blocks:
299                     blocked, reason = instance.values()
300                     # NOISY-DEBUG: print("DEBUG: BEFORE blocked:", blocked)
301                     blocked = fba.tidyup(blocked)
302                     # NOISY-DEBUG: print("DEBUG: AFTER blocked:", blocked)
303
304                     if blocked == "":
305                         print("WARNING: blocked is empty:", blocker)
306                         continue
307                     if blocked.count("*") > 0:
308                         # Some friendica servers also obscure domains without hash
309                         fba.cursor.execute(
310                             "SELECT domain FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", [blocked.replace("*", "_")]
311                         )
312                         searchres = fba.cursor.fetchone()
313                         if searchres != None:
314                             blocked = searchres[0]
315
316                     if blocked.count("?") > 0:
317                         # Some obscure them with question marks, not sure if that's dependent on version or not
318                         fba.cursor.execute(
319                             "SELECT domain FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", [blocked.replace("?", "_")]
320                         )
321                         searchres = fba.cursor.fetchone()
322                         if searchres != None:
323                             blocked = searchres[0]
324
325                     # NOISY-DEBUG: print("DEBUG: AFTER-blocked:", blocked)
326                     if not fba.is_instance_registered(blocked):
327                         # NOISY-DEBUG: print("DEBUG: Hash wasn't found, adding:", blocked, blocker)
328                         fba.add_instance(blocked, blocker)
329
330                     fba.cursor.execute(
331                         "SELECT * FROM blocks WHERE blocker = ? AND blocked = ?",
332                         (blocker, blocked),
333                     )
334
335                     if fba.cursor.fetchone() == None:
336                         fba.block_instance(blocker, blocked, reason, block_level)
337
338                         if block_level == "reject":
339                             blockdict.append(
340                                 {
341                                     "blocked": blocked,
342                                     "reason": reason
343                                 })
344                     else:
345                         fba.update_last_seen(blocker, blocked, block_level)
346
347                     if reason != '':
348                         # NOISY-DEBUG: print("DEBUG: Updating block reason:", blocker, blocked, reason)
349                         fba.update_block_reason(reason, blocker, blocked, block_level)
350
351             fba.connection.commit()
352         except Exception as e:
353             print(f"ERROR: blocker='{blocker}',software='{software}',exception='{e}'")
354     elif software == "gotosocial":
355         print("INFO: blocker:", blocker)
356         try:
357             # Blocks
358             federation = reqto.get(f"https://{blocker}{get_peers_url}?filter=suspended", headers=fba.headers, timeout=(fba.config["connection_timeout"], config["read_timeout"])).json()
359
360             if (federation == None):
361                 print("WARNING: No valid response:", blocker);
362             elif "error" in federation:
363                 print("WARNING: API returned error:", federation["error"])
364             else:
365                 # NOISY-DEBUG: print("DEBUG: Checking fenderation():", len(federation))
366                 for peer in federation:
367                     blocked = peer["domain"].lower()
368                     # NOISY-DEBUG: print("DEBUG: BEFORE blocked:", blocked)
369                     blocked = fba.tidyup(blocked)
370                     # NOISY-DEBUG: print("DEBUG: AFTER blocked:", blocked)
371
372                     if blocked == "":
373                         print("WARNING: blocked is empty:", blocker)
374                         continue
375                     elif blocked.count("*") > 0:
376                         # GTS does not have hashes for obscured domains, so we have to guess it
377                         fba.cursor.execute(
378                             "SELECT domain FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", [blocked.replace("*", "_")]
379                         )
380                         searchres = fba.cursor.fetchone()
381
382                         if searchres != None:
383                             blocked = searchres[0]
384
385                     if not fba.is_instance_registered(blocked):
386                         # NOISY-DEBUG: print("DEBUG: Domain wasn't found, adding:", blocked, blocker)
387                         fba.add_instance(blocked, blocker)
388
389                     fba.cursor.execute(
390                         "SELECT * FROM blocks WHERE blocker = ? AND blocked = ? AND block_level = ? LIMIT 1",
391                         (
392                             blocker,
393                             blocked,
394                             "reject"
395                         ),
396                     )
397
398                     if fba.cursor.fetchone() == None:
399                         # NOISY-DEBUG: print(f"DEBUG: blocker='{blocker}' is blocking '{blocked}' for unknown reason at this point")
400                         fba.block_instance(blocker, blocked, "unknown", "reject")
401
402                         blockdict.append(
403                             {
404                                 "blocked": blocked,
405                                 "reason": None
406                             })
407                     else:
408                         fba.update_last_seen(blocker, blocked, "reject")
409
410                     if "public_comment" in peer:
411                         # NOISY-DEBUG: print("DEBUG: Updating block reason:", blocker, blocked, peer["public_comment"])
412                         fba.update_block_reason(peer["public_comment"], blocker, blocked, "reject")
413
414                         for entry in blockdict:
415                             if entry["blocked"] == blocked:
416                                 # NOISY-DEBUG: print(f"DEBUG: Setting block reason for blocked='{blocked}':'{peer['public_comment']}'")
417                                 entry["reason"] = peer["public_comment"]
418
419                 fba.connection.commit()
420         except Exception as e:
421             print(f"ERROR: blocker='{blocker}',software='{software}',exception='{e}'")
422     else:
423         print("WARNING: Unknown software:", blocker, software)
424
425     if fba.config["bot_enabled"] and len(blockdict) > 0:
426         send_bot_post(blocker, blockdict)
427
428     blockdict = []
429
430 fba.connection.close()