]> git.mxchange.org Git - fba.git/blob - fetch_blocks.py
Continued:
[fba.git] / fetch_blocks.py
1 import reqto
2 import time
3 import bs4
4 import fba
5 import itertools
6 import re
7
8 fba.c.execute(
9     "SELECT domain, software FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'gotosocial') ORDER BY rowid DESC"
10 )
11
12 for blocker, software in fba.c.fetchall():
13     # NOISY-DEBUG: print("DEBUG: BEFORE-blocker,software:", blocker, software)
14     blockdict = []
15     blocker = fba.tidyup(blocker)
16     # NOISY-DEBUG: print("DEBUG: AFTER-blocker,software:", blocker, software)
17
18     if blocker == "":
19         print("WARNING: blocker is now empty!")
20         continue
21
22     if software == "pleroma":
23         print("INFO: blocker:", blocker)
24         try:
25             # Blocks
26             json = fba.fetch_nodeinfo(blocker)
27             if json is None:
28                 print("WARNING: Could not fetch nodeinfo from blocker:", blocker)
29                 continue
30
31             federation = json["metadata"]["federation"]
32
33             if "enabled" in federation:
34                 # NOISY-DEBUG: print("DEBUG: Instance has no block list to analyze:", blocker)
35                 continue
36
37             if "mrf_simple" in federation:
38                 for block_level, blocks in (
39                     {**federation["mrf_simple"],
40                     **{"quarantined_instances": federation["quarantined_instances"]}}
41                 ).items():
42                     # NOISY-DEBUG: print("DEBUG: block_level, blocks():", block_level, len(blocks))
43                     block_level = fba.tidyup(block_level)
44                     # NOISY-DEBUG: print("DEBUG: BEFORE block_level:", block_level)
45
46                     if block_level == "":
47                         print("WARNING: block_level is now empty!")
48                         continue
49
50                     for blocked in blocks:
51                         # NOISY-DEBUG: print("DEBUG: BEFORE blocked:", blocked)
52                         blocked = fba.tidyup(blocked)
53                         # NOISY-DEBUG: print("DEBUG: AFTER blocked:", blocked)
54
55                         if blocked == "":
56                             print("WARNING: blocked is empty after fba.tidyup():", blocker, block_level)
57                             continue
58
59                         if blocked.count("*") > 1:
60                             # -ACK!-oma also started obscuring domains without hash
61                             fba.c.execute(
62                                 "SELECT domain FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", (blocked.replace("*", "_"),)
63                             )
64                             searchres = fba.c.fetchone()
65                             # NOISY-DEBUG: print("DEBUG: searchres[]:", type(searchres))
66                             if searchres != None:
67                                 blocked = searchres[0]
68                                 # NOISY-DEBUG: print("DEBUG: Looked up domain:", blocked)
69
70                         # NOISY-DEBUG: print("DEBUG: Looking up instance by domain:", blocked)
71                         fba.c.execute(
72                             "SELECT domain FROM instances WHERE domain = ?", (blocked,)
73                         )
74
75                         if fba.c.fetchone() == None:
76                             # NOISY-DEBUG: print("DEBUG: Domain wasn't found, adding:", blocked)
77                             fba.add_instance(blocked)
78
79                         timestamp = int(time.time())
80                         fba.c.execute(
81                             "SELECT * FROM blocks WHERE blocker = ? AND blocked = ? AND block_level = ?",
82                             (
83                                blocker,
84                                blocked,
85                                block_level
86                            ),
87                         )
88
89                         if fba.c.fetchone() == None:
90                             # NOISY-DEBUG: print("DEBUG: Blocking:", blocker, blocked, block_level)
91                             fba.block_instance(blocker, blocked, "unknown", block_level, timestamp, timestamp)
92
93                             if block_level == "reject":
94                                 # NOISY-DEBUG: print("DEBUG: Adding to blockdict:", blocked)
95                                 blockdict.append(
96                                     {
97                                         "blocked": blocked,
98                                         "reason": None
99                                     })
100                         else:
101                             # NOISY-DEBUG: print("DEBUG: Updating last_seen:", blocker, blocked, block_level)
102                             fba.update_last_seen(timestamp, blocker, blocked, block_level)
103
104             fba.conn.commit()
105
106             # Reasons
107             if "mrf_simple_info" in federation:
108                 # NOISY-DEBUG: print("DEBUG: Found mrf_simple_info:", blocker)
109                 for block_level, info in (
110                     {**federation["mrf_simple_info"],
111                     **(federation["quarantined_instances_info"]
112                     if "quarantined_instances_info" in federation
113                     else {})}
114                 ).items():
115                     # NOISY-DEBUG: print("DEBUG: block_level, info.items():", block_level, len(info.items()))
116                     block_level = fba.tidyup(block_level)
117                     # NOISY-DEBUG: print("DEBUG: BEFORE block_level:", block_level)
118
119                     if block_level == "":
120                         print("WARNING: block_level is now empty!")
121                         continue
122
123                     for blocked, reason in info.items():
124                         # NOISY-DEBUG: print("DEBUG: BEFORE blocked:", blocked)
125                         blocked = fba.tidyup(blocked)
126                         # NOISY-DEBUG: print("DEBUG: AFTER blocked:", blocked)
127
128                         if blocked == "":
129                             print("WARNING: blocked is empty after fba.tidyup():", blocker, block_level)
130                             continue
131                         elif blocked.count("*") > 1:
132                             # same domain guess as above, but for reasons field
133                             fba.c.execute(
134                                 "SELECT domain FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", (blocked.replace("*", "_"),)
135                             )
136                             searchres = fba.c.fetchone()
137
138                             if searchres != None:
139                                 blocked = searchres[0]
140
141                         # NOISY-DEBUG: print("DEBUG: Updating block reason:", blocker, blocked, reason["reason"])
142                         fba.update_block_reason(reason["reason"], blocker, blocked, block_level)
143
144                         for entry in blockdict:
145                             if entry["blocked"] == blocked:
146                                 # NOISY-DEBUG: print("DEBUG: Updating entry reason:", blocked)
147                                 entry["reason"] = reason["reason"]
148
149             fba.conn.commit()
150         except Exception as e:
151             print("error:", e, blocker, software)
152     elif software == "mastodon":
153         print("INFO: blocker:", blocker)
154         try:
155             # json endpoint for newer mastodongs
156             try:
157                 json = {
158                     "reject": [],
159                     "media_removal": [],
160                     "followers_only": [],
161                     "report_removal": []
162                 }
163
164                 # handling CSRF, I've saw at least one server requiring it to access the endpoint
165                 # NOISY-DEBUG: print("DEBUG: Fetching meta:", blocker)
166                 meta = bs4.BeautifulSoup(
167                     reqto.get(f"https://{blocker}/about", headers=fba.headers, timeout=5).text,
168                     "html.parser",
169                 )
170                 try:
171                     csrf = meta.find("meta", attrs={"name": "csrf-token"})["content"]
172                     # NOISY-DEBUG: print("DEBUG: Adding CSRF token:", blocker, csrf)
173                     reqheaders = {**fba.headers, **{"x-csrf-token": csrf}}
174                 except:
175                     # NOISY-DEBUG: print("DEBUG: No CSRF token found, using normal headers:", blocker)
176                     reqheaders = fba.headers
177
178                 # NOISY-DEBUG: print("DEBUG: Quering API domain_blocks:", blocker)
179                 blocks = reqto.get(f"https://{blocker}/api/v1/instance/domain_blocks", headers=reqheaders, timeout=5).json()
180
181                 # NOISY-DEBUG: print("DEBUG: blocks():", len(blocks))
182                 for block in blocks:
183                     entry = {
184                         'domain': block['domain'],
185                         'hash': block['digest'],
186                         'reason': block['comment']
187                     }
188
189                     # NOISY-DEBUG: print("DEBUG: severity,domain,hash,comment:", block['severity'], block['domain'], block['digest'], block['comment'])
190                     if block['severity'] == 'suspend':
191                         json['reject'].append(entry)
192                     elif block['severity'] == 'silence':
193                         json['followers_only'].append(entry)
194                     elif block['severity'] == 'reject_media':
195                         json['media_removal'].append(entry)
196                     elif block['severity'] == 'reject_reports':
197                         json['report_removal'].append(entry)
198                     else:
199                         print("WARNING: Unknown severity:", block['severity'], block['domain'])
200             except:
201                 # NOISY-DEBUG: print("DEBUG: Failed, Trying mastodon-specific fetches:", blocker)
202                 json = fba.get_mastodon_blocks(blocker)
203
204             # NOISY-DEBUG: print("DEBUG: json.items():", blocker, len(json.items()))
205             for block_level, blocks in json.items():
206                 # NOISY-DEBUG: print("DEBUG: blocker,block_level,blocks():", blocker, block_level, len(blocks))
207                 block_level = fba.tidyup(block_level)
208                 # NOISY-DEBUG: print("DEBUG: AFTER-block_level:", block_level)
209                 if block_level == "":
210                     print("WARNING: block_level is empty, blocker:", blocker)
211                     continue
212
213                 for instance in blocks:
214                     blocked, blocked_hash, reason = instance.values()
215                     # NOISY-DEBUG: print("DEBUG: blocked,hash,reason:", blocked, blocked_hash, reason)
216                     blocked = fba.tidyup(blocked)
217                     # NOISY-DEBUG: print("DEBUG: AFTER-blocked:", blocked)
218
219                     if blocked == "":
220                         print("WARNING: blocked is empty:", blocker)
221                         continue
222                     elif blocked.count("*") < 1:
223                         # No obsfucation for this instance
224                         fba.c.execute(
225                             "SELECT hash FROM instances WHERE domain = ? LIMIT 1", (blocked,)
226                         )
227
228                         if fba.c.fetchone() == None:
229                             # NOISY-DEBUG: print("DEBUG: Hash wasn't found, adding:", blocked)
230                             fba.add_instance(blocked)
231                     else:
232                         # Doing the hash search for instance names as well to tidy up DB
233                         fba.c.execute(
234                             "SELECT domain FROM instances WHERE hash = ? LIMIT 1", (blocked_hash,)
235                         )
236                         searchres = fba.c.fetchone()
237
238                         if searchres != None:
239                             # NOISY-DEBUG: print("DEBUG: Updating domain: ", searchres[0])
240                             blocked = searchres[0]
241
242                     timestamp = int(time.time())
243                     fba.c.execute(
244                         "SELECT * FROM blocks WHERE blocker = ? AND blocked = ? AND block_level = ?",
245                         (
246                             blocker,
247                             blocked if blocked.count("*") <= 1 else blocked_hash,
248                             block_level
249                         ),
250                     )
251
252                     if fba.c.fetchone() == None:
253                         fba.block_instance(blocker, blocked if blocked.count("*") <= 1 else blocked_hash, reason, block_level, timestamp, timestamp)
254
255                         if block_level == "reject":
256                             blockdict.append(
257                                 {
258                                     "blocked": blocked,
259                                     "reason": reason
260                                 })
261                     else:
262                         fba.update_last_seen(timestamp, blocker, blocked if blocked.count("*") <= 1 else blocked_hash, block_level)
263
264                     if reason != '':
265                         # NOISY-DEBUG: print("DEBUG: Updating block reason:", blocker, blocked, reason)
266                         fba.update_block_reason(reason, blocker, blocked if blocked.count("*") <= 1 else blocked_hash, block_level)
267
268             fba.conn.commit()
269         except Exception as e:
270             print("error:", e, blocker, software)
271     elif software == "friendica" or software == "misskey":
272         print("INFO: blocker:", blocker)
273         try:
274             if software == "friendica":
275                 json = fba.get_friendica_blocks(blocker)
276             elif software == "misskey":
277                 json = fba.get_misskey_blocks(blocker)
278
279             for block_level, blocks in json.items():
280                 # NOISY-DEBUG: print("DEBUG: blocker,block_level,blocks():", blocker, block_level, len(blocks))
281                 block_level = fba.tidyup(block_level)
282                 # NOISY-DEBUG: print("DEBUG: AFTER-block_level:", block_level)
283                 if block_level == "":
284                     print("WARNING: block_level is empty, blocker:", blocker)
285                     continue
286
287                 for instance in blocks:
288                     blocked, reason = instance.values()
289                     # NOISY-DEBUG: print("DEBUG: BEFORE-blocked:", blocked)
290                     blocked = fba.tidyup(blocked)
291                     # NOISY-DEBUG: print("DEBUG: AFTER-blocked:", blocked)
292
293                     if blocked == "":
294                         print("WARNING: blocked is empty:", blocker)
295                         continue
296                     if blocked.count("*") > 0:
297                         # Some friendica servers also obscure domains without hash
298                         fba.c.execute(
299                             "SELECT domain FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", (blocked.replace("*", "_"),)
300                         )
301                         searchres = fba.c.fetchone()
302                         if searchres != None:
303                             blocked = searchres[0]
304
305                     if blocked.count("?") > 0:
306                         # Some obscure them with question marks, not sure if that's dependent on version or not
307                         fba.c.execute(
308                             "SELECT domain FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", (blocked.replace("?", "_"),)
309                         )
310                         searchres = fba.c.fetchone()
311                         if searchres != None:
312                             blocked = searchres[0]
313
314                     # NOISY-DEBUG: print("DEBUG: AFTER-blocked:", blocked)
315                     fba.c.execute(
316                         "SELECT domain FROM instances WHERE domain = ?", (blocked,)
317                     )
318
319                     if fba.c.fetchone() == None:
320                         # NOISY-DEBUG: print("DEBUG: Hash wasn't found, adding:", blocked)
321                         fba.add_instance(blocked)
322
323                     timestamp = int(time.time())
324                     fba.c.execute(
325                         "SELECT * FROM blocks WHERE blocker = ? AND blocked = ?",
326                         (blocker, blocked),
327                     )
328                     if fba.c.fetchone() == None:
329                         fba.block_instance(blocker, blocked, reason, block_level, timestamp, timestamp)
330
331                         if block_level == "reject":
332                             blockdict.append(
333                                 {
334                                     "blocked": blocked,
335                                     "reason": reason
336                                 })
337                     else:
338                         fba.update_last_seen(timestamp, blocker, blocked, block_level)
339
340                     if reason != '':
341                         # NOISY-DEBUG: print("DEBUG: Updating block reason:", blocker, blocked, reason)
342                         fba.update_block_reason(reason, blocker, blocked, block_level)
343
344             fba.conn.commit()
345         except Exception as e:
346             print("error:", e, blocker, software)
347     elif software == "gotosocial":
348         print("INFO: blocker:", blocker)
349         try:
350             # Blocks
351             federation = reqto.get(f"https://{blocker}/api/v1/instance/peers?filter=suspended", headers=fba.headers, timeout=5).json()
352
353             if (federation == None):
354                 print("WARNING: No valid response:", blocker);
355             elif "error" in federation:
356                 print("WARNING: API returned error:", federation["error"])
357             else:
358                 # NOISY-DEBUG: print("DEBUG: Checking fenderation():", len(federation))
359                 for peer in federation:
360                     blocked = peer["domain"].lower()
361                     # NOISY-DEBUG: print("DEBUG: BEFORE-blocked:", blocked)
362                     blocked = fba.tidyup(blocked)
363                     # NOISY-DEBUG: print("DEBUG: AFTER-blocked:", blocked)
364
365                     if blocked == "":
366                         print("WARNING: blocked is empty:", blocker)
367                         continue
368                     if blocked.count("*") > 0:
369                         # GTS does not have hashes for obscured domains, so we have to guess it
370                         fba.c.execute(
371                             "SELECT domain FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", (blocked.replace("*", "_"),)
372                         )
373                         searchres = fba.c.fetchone()
374
375                         if searchres != None:
376                             blocked = searchres[0]
377
378                     fba.c.execute(
379                         "SELECT domain FROM instances WHERE domain = ?", (blocked,)
380                     )
381
382                     if fba.c.fetchone() == None:
383                         # NOISY-DEBUG: print("DEBUG: Hash wasn't found, adding:", blocked)
384                         fba.add_instance(blocked)
385
386                     fba.c.execute(
387                         "SELECT * FROM blocks WHERE blocker = ? AND blocked = ? AND block_level = ?",
388                         (
389                             blocker,
390                             blocked,
391                             "reject"
392                         ),
393                     )
394                     timestamp = int(time.time())
395
396                     if fba.c.fetchone() == None:
397                         fba.block_instance(blocker, blocked, "unknown", "reject", timestamp, timestamp)
398
399                         blockdict.append(
400                             {
401                                 "blocked": blocked,
402                                 "reason": None
403                             })
404                     else:
405                         fba.update_last_seen(timestamp, blocker, blocked, "reject")
406
407                     if "public_comment" in peer:
408                         reason = peer["public_comment"]
409                         # NOISY-DEBUG: print("DEBUG: Updating block reason:", blocker, blocked, reason)
410                         fba.update_block_reason(reason, blocker, blocked, "reject")
411
412                         for entry in blockdict:
413                             if entry["blocked"] == blocked:
414                                 entry["reason"] = reason
415
416                 fba.conn.commit()
417         except Exception as e:
418             print("error:", e, blocker, software)
419     else:
420         print("WARNING: Unknown software:", software)
421
422     if fba.config["bot_enabled"] and len(blockdict) > 0:
423         send_bot_post(blocker, blockdict)
424
425     blockdict = []
426
427 fba.conn.close()