]> git.mxchange.org Git - fba.git/blob - fetch_blocks.py
a4248ac3c0e1dc74713904049422964496ed3d82
[fba.git] / fetch_blocks.py
1 import reqto
2 import time
3 import bs4
4 import itertools
5 import re
6 import fba
7
8 fba.c.execute(
9     "SELECT domain, software FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'gotosocial') AND (last_blocked IS NULL OR last_blocked < ?) ORDER BY rowid DESC", [time.time() - fba.config["recheck_block"]]
10 )
11
12 for blocker, software in fba.c.fetchall():
13     # NOISY-DEBUG: print("DEBUG: BEFORE-blocker,software:", blocker, software)
14     blockdict = []
15     blocker = fba.tidyup(blocker)
16     # NOISY-DEBUG: print("DEBUG: AFTER-blocker,software:", blocker, software)
17
18     if blocker == "":
19         print("WARNING: blocker is now empty!")
20         continue
21
22     fba.update_last_blocked(blocker)
23
24     if software == "pleroma":
25         print("INFO: blocker:", blocker)
26         try:
27             # Blocks
28             json = fba.fetch_nodeinfo(blocker)
29             if json is None:
30                 print("WARNING: Could not fetch nodeinfo from blocker:", blocker)
31                 continue
32
33             federation = json["metadata"]["federation"]
34
35             if "enabled" in federation:
36                 # NOISY-DEBUG: print("DEBUG: Instance has no block list to analyze:", blocker)
37                 continue
38
39             if "mrf_simple" in federation:
40                 for block_level, blocks in (
41                     {**federation["mrf_simple"],
42                     **{"quarantined_instances": federation["quarantined_instances"]}}
43                 ).items():
44                     # NOISY-DEBUG: print("DEBUG: block_level, blocks():", block_level, len(blocks))
45                     block_level = fba.tidyup(block_level)
46                     # NOISY-DEBUG: print("DEBUG: BEFORE block_level:", block_level)
47
48                     if block_level == "":
49                         print("WARNING: block_level is now empty!")
50                         continue
51
52                     for blocked in blocks:
53                         # NOISY-DEBUG: print("DEBUG: BEFORE blocked:", blocked)
54                         blocked = fba.tidyup(blocked)
55                         # NOISY-DEBUG: print("DEBUG: AFTER blocked:", blocked)
56
57                         if blocked == "":
58                             print("WARNING: blocked is empty after fba.tidyup():", blocker, block_level)
59                             continue
60
61                         if blocked.count("*") > 1:
62                             # -ACK!-oma also started obscuring domains without hash
63                             fba.c.execute(
64                                 "SELECT domain FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", [blocked.replace("*", "_")]
65                             )
66                             searchres = fba.c.fetchone()
67                             # NOISY-DEBUG: print("DEBUG: searchres[]:", type(searchres))
68                             if searchres != None:
69                                 blocked = searchres[0]
70                                 # NOISY-DEBUG: print("DEBUG: Looked up domain:", blocked)
71
72                         # NOISY-DEBUG: print("DEBUG: Looking up instance by domain:", blocked)
73                         fba.c.execute(
74                             "SELECT domain FROM instances WHERE domain = ?", [blocked]
75                         )
76
77                         if fba.c.fetchone() == None:
78                             # NOISY-DEBUG: print("DEBUG: Domain wasn't found, adding:", blocked, blocker)
79                             fba.add_instance(blocked, blocker, argv[0])
80
81                         timestamp = int(time.time())
82                         fba.c.execute(
83                             "SELECT * FROM blocks WHERE blocker = ? AND blocked = ? AND block_level = ?",
84                             (
85                                blocker,
86                                blocked,
87                                block_level
88                            ),
89                         )
90
91                         if fba.c.fetchone() == None:
92                             # NOISY-DEBUG: print("DEBUG: Blocking:", blocker, blocked, block_level)
93                             fba.block_instance(blocker, blocked, "unknown", block_level, timestamp, timestamp)
94
95                             if block_level == "reject":
96                                 # NOISY-DEBUG: print("DEBUG: Adding to blockdict:", blocked)
97                                 blockdict.append(
98                                     {
99                                         "blocked": blocked,
100                                         "reason": None
101                                     })
102                         else:
103                             # NOISY-DEBUG: print("DEBUG: Updating last_seen:", blocker, blocked, block_level)
104                             fba.update_last_seen(timestamp, blocker, blocked, block_level)
105
106             fba.conn.commit()
107
108             # Reasons
109             if "mrf_simple_info" in federation:
110                 # NOISY-DEBUG: print("DEBUG: Found mrf_simple_info:", blocker)
111                 for block_level, info in (
112                     {**federation["mrf_simple_info"],
113                     **(federation["quarantined_instances_info"]
114                     if "quarantined_instances_info" in federation
115                     else {})}
116                 ).items():
117                     # NOISY-DEBUG: print("DEBUG: block_level, info.items():", block_level, len(info.items()))
118                     block_level = fba.tidyup(block_level)
119                     # NOISY-DEBUG: print("DEBUG: BEFORE block_level:", block_level)
120
121                     if block_level == "":
122                         print("WARNING: block_level is now empty!")
123                         continue
124
125                     for blocked, reason in info.items():
126                         # NOISY-DEBUG: print("DEBUG: BEFORE blocked:", blocked)
127                         blocked = fba.tidyup(blocked)
128                         # NOISY-DEBUG: print("DEBUG: AFTER blocked:", blocked)
129
130                         if blocked == "":
131                             print("WARNING: blocked is empty after fba.tidyup():", blocker, block_level)
132                             continue
133                         elif blocked.count("*") > 1:
134                             # same domain guess as above, but for reasons field
135                             fba.c.execute(
136                                 "SELECT domain FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", [blocked.replace("*", "_")]
137                             )
138                             searchres = fba.c.fetchone()
139
140                             if searchres != None:
141                                 blocked = searchres[0]
142
143                         # NOISY-DEBUG: print("DEBUG: Updating block reason:", blocker, blocked, reason["reason"])
144                         fba.update_block_reason(reason["reason"], blocker, blocked, block_level)
145
146                         for entry in blockdict:
147                             if entry["blocked"] == blocked:
148                                 # NOISY-DEBUG: print("DEBUG: Updating entry reason:", blocked)
149                                 entry["reason"] = reason["reason"]
150
151             fba.conn.commit()
152         except Exception as e:
153             print("error:", e, blocker, software)
154     elif software == "mastodon":
155         print("INFO: blocker:", blocker)
156         try:
157             # json endpoint for newer mastodongs
158             try:
159                 json = {
160                     "reject": [],
161                     "media_removal": [],
162                     "followers_only": [],
163                     "report_removal": []
164                 }
165
166                 # handling CSRF, I've saw at least one server requiring it to access the endpoint
167                 # NOISY-DEBUG: print("DEBUG: Fetching meta:", blocker)
168                 meta = bs4.BeautifulSoup(
169                     reqto.get(f"https://{blocker}/about", headers=fba.headers, timeout=fba.config["timeout"]).text,
170                     "html.parser",
171                 )
172                 try:
173                     csrf = meta.find("meta", attrs={"name": "csrf-token"})["content"]
174                     # NOISY-DEBUG: print("DEBUG: Adding CSRF token:", blocker, csrf)
175                     reqheaders = {**fba.headers, **{"x-csrf-token": csrf}}
176                 except:
177                     # NOISY-DEBUG: print("DEBUG: No CSRF token found, using normal headers:", blocker)
178                     reqheaders = fba.headers
179
180                 # NOISY-DEBUG: print("DEBUG: Quering API domain_blocks:", blocker)
181                 blocks = reqto.get(f"https://{blocker}/api/v1/instance/domain_blocks", headers=reqheaders, timeout=fba.config["timeout"]).json()
182
183                 # NOISY-DEBUG: print("DEBUG: blocks():", len(blocks))
184                 for block in blocks:
185                     entry = {
186                         'domain': block['domain'],
187                         'hash': block['digest'],
188                         'reason': block['comment']
189                     }
190
191                     # NOISY-DEBUG: print("DEBUG: severity,domain,hash,comment:", block['severity'], block['domain'], block['digest'], block['comment'])
192                     if block['severity'] == 'suspend':
193                         json['reject'].append(entry)
194                     elif block['severity'] == 'silence':
195                         json['followers_only'].append(entry)
196                     elif block['severity'] == 'reject_media':
197                         json['media_removal'].append(entry)
198                     elif block['severity'] == 'reject_reports':
199                         json['report_removal'].append(entry)
200                     else:
201                         print("WARNING: Unknown severity:", block['severity'], block['domain'])
202             except:
203                 # NOISY-DEBUG: print("DEBUG: Failed, Trying mastodon-specific fetches:", blocker)
204                 json = fba.get_mastodon_blocks(blocker)
205
206             # NOISY-DEBUG: print("DEBUG: json.items():", blocker, len(json.items()))
207             for block_level, blocks in json.items():
208                 # NOISY-DEBUG: print("DEBUG: blocker,block_level,blocks():", blocker, block_level, len(blocks))
209                 block_level = fba.tidyup(block_level)
210                 # NOISY-DEBUG: print("DEBUG: AFTER-block_level:", block_level)
211                 if block_level == "":
212                     print("WARNING: block_level is empty, blocker:", blocker)
213                     continue
214
215                 for instance in blocks:
216                     blocked, blocked_hash, reason = instance.values()
217                     # NOISY-DEBUG: print("DEBUG: blocked,hash,reason:", blocked, blocked_hash, reason)
218                     blocked = fba.tidyup(blocked)
219                     # NOISY-DEBUG: print("DEBUG: AFTER-blocked:", blocked)
220
221                     if blocked == "":
222                         print("WARNING: blocked is empty:", blocker)
223                         continue
224                     elif blocked.count("*") < 1:
225                         # No obsfucation for this instance
226                         fba.c.execute(
227                             "SELECT hash FROM instances WHERE domain = ? LIMIT 1", [blocked]
228                         )
229
230                         if fba.c.fetchone() == None:
231                             # NOISY-DEBUG: print("DEBUG: Hash wasn't found, adding:", blocked, blocker)
232                             fba.add_instance(blocked, blocker, argv[0])
233                     else:
234                         # Doing the hash search for instance names as well to tidy up DB
235                         fba.c.execute(
236                             "SELECT domain FROM instances WHERE hash = ? LIMIT 1", [blocked_hash]
237                         )
238                         searchres = fba.c.fetchone()
239
240                         if searchres != None:
241                             # NOISY-DEBUG: print("DEBUG: Updating domain: ", searchres[0])
242                             blocked = searchres[0]
243
244                     timestamp = int(time.time())
245                     fba.c.execute(
246                         "SELECT * FROM blocks WHERE blocker = ? AND blocked = ? AND block_level = ?",
247                         (
248                             blocker,
249                             blocked if blocked.count("*") <= 1 else blocked_hash,
250                             block_level
251                         ),
252                     )
253
254                     if fba.c.fetchone() == None:
255                         fba.block_instance(blocker, blocked if blocked.count("*") <= 1 else blocked_hash, reason, block_level, timestamp, timestamp)
256
257                         if block_level == "reject":
258                             blockdict.append(
259                                 {
260                                     "blocked": blocked,
261                                     "reason": reason
262                                 })
263                     else:
264                         fba.update_last_seen(timestamp, blocker, blocked if blocked.count("*") <= 1 else blocked_hash, block_level)
265
266                     if reason != '':
267                         # NOISY-DEBUG: print("DEBUG: Updating block reason:", blocker, blocked, reason)
268                         fba.update_block_reason(reason, blocker, blocked if blocked.count("*") <= 1 else blocked_hash, block_level)
269
270             fba.conn.commit()
271         except Exception as e:
272             print("error:", e, blocker, software)
273     elif software == "friendica" or software == "misskey":
274         print("INFO: blocker:", blocker)
275         try:
276             if software == "friendica":
277                 json = fba.get_friendica_blocks(blocker)
278             elif software == "misskey":
279                 json = fba.get_misskey_blocks(blocker)
280
281             for block_level, blocks in json.items():
282                 # NOISY-DEBUG: print("DEBUG: blocker,block_level,blocks():", blocker, block_level, len(blocks))
283                 block_level = fba.tidyup(block_level)
284                 # NOISY-DEBUG: print("DEBUG: AFTER-block_level:", block_level)
285                 if block_level == "":
286                     print("WARNING: block_level is empty, blocker:", blocker)
287                     continue
288
289                 for instance in blocks:
290                     blocked, reason = instance.values()
291                     # NOISY-DEBUG: print("DEBUG: BEFORE-blocked:", blocked)
292                     blocked = fba.tidyup(blocked)
293                     # NOISY-DEBUG: print("DEBUG: AFTER-blocked:", blocked)
294
295                     if blocked == "":
296                         print("WARNING: blocked is empty:", blocker)
297                         continue
298                     if blocked.count("*") > 0:
299                         # Some friendica servers also obscure domains without hash
300                         fba.c.execute(
301                             "SELECT domain FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", [blocked.replace("*", "_")]
302                         )
303                         searchres = fba.c.fetchone()
304                         if searchres != None:
305                             blocked = searchres[0]
306
307                     if blocked.count("?") > 0:
308                         # Some obscure them with question marks, not sure if that's dependent on version or not
309                         fba.c.execute(
310                             "SELECT domain FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", [blocked.replace("?", "_")]
311                         )
312                         searchres = fba.c.fetchone()
313                         if searchres != None:
314                             blocked = searchres[0]
315
316                     # NOISY-DEBUG: print("DEBUG: AFTER-blocked:", blocked)
317                     fba.c.execute(
318                         "SELECT domain FROM instances WHERE domain = ?", [blocked]
319                     )
320
321                     if fba.c.fetchone() == None:
322                         # NOISY-DEBUG: print("DEBUG: Hash wasn't found, adding:", blocked, blocker)
323                         fba.add_instance(blocked, blocker)
324
325                     timestamp = int(time.time())
326                     fba.c.execute(
327                         "SELECT * FROM blocks WHERE blocker = ? AND blocked = ?",
328                         (blocker, blocked),
329                     )
330                     if fba.c.fetchone() == None:
331                         fba.block_instance(blocker, blocked, reason, block_level, timestamp, timestamp)
332
333                         if block_level == "reject":
334                             blockdict.append(
335                                 {
336                                     "blocked": blocked,
337                                     "reason": reason
338                                 })
339                     else:
340                         fba.update_last_seen(timestamp, blocker, blocked, block_level)
341
342                     if reason != '':
343                         # NOISY-DEBUG: print("DEBUG: Updating block reason:", blocker, blocked, reason)
344                         fba.update_block_reason(reason, blocker, blocked, block_level)
345
346             fba.conn.commit()
347         except Exception as e:
348             print("error:", e, blocker, software)
349     elif software == "gotosocial":
350         print("INFO: blocker:", blocker)
351         try:
352             # Blocks
353             federation = reqto.get(f"https://{blocker}/api/v1/instance/peers?filter=suspended", headers=fba.headers, timeout=fba.config["timeout"]).json()
354
355             if (federation == None):
356                 print("WARNING: No valid response:", blocker);
357             elif "error" in federation:
358                 print("WARNING: API returned error:", federation["error"])
359             else:
360                 # NOISY-DEBUG: print("DEBUG: Checking fenderation():", len(federation))
361                 for peer in federation:
362                     blocked = peer["domain"].lower()
363                     # NOISY-DEBUG: print("DEBUG: BEFORE-blocked:", blocked)
364                     blocked = fba.tidyup(blocked)
365                     # NOISY-DEBUG: print("DEBUG: AFTER-blocked:", blocked)
366
367                     if blocked == "":
368                         print("WARNING: blocked is empty:", blocker)
369                         continue
370                     if blocked.count("*") > 0:
371                         # GTS does not have hashes for obscured domains, so we have to guess it
372                         fba.c.execute(
373                             "SELECT domain FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", [blocked.replace("*", "_")]
374                         )
375                         searchres = fba.c.fetchone()
376
377                         if searchres != None:
378                             blocked = searchres[0]
379
380                     fba.c.execute(
381                         "SELECT domain FROM instances WHERE domain = ?", [blocked]
382                     )
383
384                     if fba.c.fetchone() == None:
385                         # NOISY-DEBUG: print("DEBUG: Updating block reason:", blocker, blocked, reason)
386                         fba.update_block_reason(reason, blocker, blocked, block_level)
387
388                     fba.c.execute(
389                         "SELECT * FROM blocks WHERE blocker = ? AND blocked = ? AND block_level = ?",
390                         (
391                             blocker,
392                             blocked,
393                             "reject"
394                         ),
395                     )
396                     timestamp = int(time.time())
397
398                     if fba.c.fetchone() == None:
399                         fba.block_instance(blocker, blocked, "unknown", "reject", timestamp, timestamp)
400
401                         blockdict.append(
402                             {
403                                 "blocked": blocked,
404                                 "reason": None
405                             })
406                     else:
407                         fba.update_last_seen(timestamp, blocker, blocked, "reject")
408
409                     if "public_comment" in peer:
410                         reason = peer["public_comment"]
411                         # NOISY-DEBUG: print("DEBUG: Updating block reason:", blocker, blocked, reason)
412                         fba.update_block_reason(reason, blocker, blocked, "reject")
413
414                         for entry in blockdict:
415                             if entry["blocked"] == blocked:
416                                 entry["reason"] = reason
417
418                 fba.conn.commit()
419         except Exception as e:
420             print("error:", e, blocker, software)
421     else:
422         print("WARNING: Unknown software:", software)
423
424     if fba.config["bot_enabled"] and len(blockdict) > 0:
425         send_bot_post(blocker, blockdict)
426
427     blockdict = []
428
429 fba.conn.close()