]> git.mxchange.org Git - fba.git/blob - fetch_blocks.py
Continued:
[fba.git] / fetch_blocks.py
1 import reqto
2 import time
3 import bs4
4 import fba
5 import itertools
6 import re
7
8 fba.c.execute(
9     "SELECT domain, software FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'gotosocial') ORDER BY rowid DESC"
10 )
11
12 for blocker, software in fba.c.fetchall():
13     # NOISY-DEBUG: print("DEBUG: BEFORE-blocker,software:", blocker, software)
14     blockdict = []
15     blocker = fba.tidyup(blocker)
16     # NOISY-DEBUG: print("DEBUG: AFTER-blocker,software:", blocker, software)
17
18     if blocker == "":
19         print("WARNING: blocker is now empty!")
20         continue
21
22     if software == "pleroma":
23         print("INFO: blocker:", blocker)
24         try:
25             # Blocks
26             federation = reqto.get(
27                 f"https://{blocker}/nodeinfo/2.1.json", headers=fba.headers, timeout=5
28             ).json()["metadata"]["federation"]
29
30             if "enabled" in federation:
31                 # NOISY-DEBUG: print("DEBUG: Instance has no block list to analyze:", blocker)
32                 continue
33
34             if "mrf_simple" in federation:
35                 for block_level, blocks in (
36                     {**federation["mrf_simple"],
37                     **{"quarantined_instances": federation["quarantined_instances"]}}
38                 ).items():
39                     # NOISY-DEBUG: print("DEBUG: block_level, blocks():", block_level, len(blocks))
40                     block_level = fba.tidyup(block_level)
41                     # NOISY-DEBUG: print("DEBUG: BEFORE block_level:", block_level)
42
43                     if block_level == "":
44                         print("WARNING: block_level is now empty!")
45                         continue
46
47                     for blocked in blocks:
48                         # NOISY-DEBUG: print("DEBUG: BEFORE blocked:", blocked)
49                         blocked = fba.tidyup(blocked)
50                         # NOISY-DEBUG: print("DEBUG: AFTER blocked:", blocked)
51
52                         if blocked == "":
53                             print("WARNING: blocked is empty after fba.tidyup():", blocker, block_level)
54                             continue
55
56                         if blocked.count("*") > 1:
57                             # -ACK!-oma also started obscuring domains without hash
58                             fba.c.execute(
59                                 "SELECT domain FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", (blocked.replace("*", "_"),)
60                             )
61                             searchres = fba.c.fetchone()
62                             # NOISY-DEBUG: print("DEBUG: searchres[]:", type(searchres))
63                             if searchres != None:
64                                 blocked = searchres[0]
65                                 # NOISY-DEBUG: print("DEBUG: Looked up domain:", blocked)
66
67                         # NOISY-DEBUG: print("DEBUG: Looking up instance by domain:", blocked)
68                         fba.c.execute(
69                             "SELECT domain FROM instances WHERE domain = ?", (blocked,)
70                         )
71
72                         if fba.c.fetchone() == None:
73                             # NOISY-DEBUG: print("DEBUG: Domain wasn't found, adding:", blocked)
74                             fba.add_instance(blocked)
75
76                         timestamp = int(time.time())
77                         fba.c.execute(
78                             "SELECT * FROM blocks WHERE blocker = ? AND blocked = ? AND block_level = ?",
79                             (
80                                blocker,
81                                blocked,
82                                block_level
83                            ),
84                         )
85
86                         if fba.c.fetchone() == None:
87                             # NOISY-DEBUG: print("DEBUG: Blocking:", blocker, blocked, block_level)
88                             fba.block_instance(blocker, blocked, "unknown", block_level, timestamp, timestamp)
89
90                             if block_level == "reject":
91                                 # NOISY-DEBUG: print("DEBUG: Adding to blockdict:", blocked)
92                                 blockdict.append(
93                                     {
94                                         "blocked": blocked,
95                                         "reason": None
96                                     })
97                         else:
98                             # NOISY-DEBUG: print("DEBUG: Updating last_seen:", blocker, blocked, block_level)
99                             fba.update_last_seen(timestamp, blocker, blocked, block_level)
100
101             fba.conn.commit()
102
103             # Reasons
104             if "mrf_simple_info" in federation:
105                 # NOISY-DEBUG: print("DEBUG: Found mrf_simple_info:", blocker)
106                 for block_level, info in (
107                     {**federation["mrf_simple_info"],
108                     **(federation["quarantined_instances_info"]
109                     if "quarantined_instances_info" in federation
110                     else {})}
111                 ).items():
112                     # NOISY-DEBUG: print("DEBUG: block_level, info.items():", block_level, len(info.items()))
113                     block_level = fba.tidyup(block_level)
114                     # NOISY-DEBUG: print("DEBUG: BEFORE block_level:", block_level)
115
116                     if block_level == "":
117                         print("WARNING: block_level is now empty!")
118                         continue
119
120                     for blocked, reason in info.items():
121                         # NOISY-DEBUG: print("DEBUG: BEFORE blocked:", blocked)
122                         blocked = fba.tidyup(blocked)
123                         # NOISY-DEBUG: print("DEBUG: AFTER blocked:", blocked)
124
125                         if blocked == "":
126                             print("WARNING: blocked is empty after fba.tidyup():", blocker, block_level)
127                             continue
128                         elif blocked.count("*") > 1:
129                             # same domain guess as above, but for reasons field
130                             fba.c.execute(
131                                 "SELECT domain FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", (blocked.replace("*", "_"),)
132                             )
133                             searchres = fba.c.fetchone()
134
135                             if searchres != None:
136                                 blocked = searchres[0]
137
138                         # NOISY-DEBUG: print("DEBUG: Updating block reason:", blocker, blocked, reason["reason"])
139                         fba.update_block_reason(reason["reason"], blocker, blocked, block_level)
140
141                         for entry in blockdict:
142                             if entry["blocked"] == blocked:
143                                 # NOISY-DEBUG: print("DEBUG: Updating entry reason:", blocked)
144                                 entry["reason"] = reason["reason"]
145
146             fba.conn.commit()
147         except Exception as e:
148             print("error:", e, blocker, software)
149     elif software == "mastodon":
150         print("INFO: blocker:", blocker)
151         try:
152             # json endpoint for newer mastodongs
153             try:
154                 json = {
155                     "reject": [],
156                     "media_removal": [],
157                     "followers_only": [],
158                     "report_removal": []
159                 }
160
161                 # handling CSRF, I've saw at least one server requiring it to access the endpoint
162                 # NOISY-DEBUG: print("DEBUG: Fetching meta:", blocker)
163                 meta = bs4.BeautifulSoup(
164                     reqto.get(f"https://{blocker}/about", headers=fba.headers, timeout=5).text,
165                     "html.parser",
166                 )
167                 try:
168                     csrf = meta.find("meta", attrs={"name": "csrf-token"})["content"]
169                     # NOISY-DEBUG: print("DEBUG: Adding CSRF token:", blocker, csrf)
170                     reqheaders = {**fba.headers, **{"x-csrf-token": csrf}}
171                 except:
172                     # NOISY-DEBUG: print("DEBUG: No CSRF token found, using normal headers:", blocker)
173                     reqheaders = fba.headers
174
175                 # NOISY-DEBUG: print("DEBUG: Quering API domain_blocks:", blocker)
176                 blocks = reqto.get(
177                     f"https://{blocker}/api/v1/instance/domain_blocks", headers=reqheaders, timeout=5
178                 ).json()
179
180                 # NOISY-DEBUG: print("DEBUG: blocks():", len(blocks))
181                 for block in blocks:
182                     entry = {
183                         'domain': block['domain'],
184                         'hash': block['digest'],
185                         'reason': block['comment']
186                     }
187
188                     # NOISY-DEBUG: print("DEBUG: severity,domain,hash,comment:", block['severity'], block['domain'], block['digest'], block['comment'])
189                     if block['severity'] == 'suspend':
190                         json['reject'].append(entry)
191                     elif block['severity'] == 'silence':
192                         json['followers_only'].append(entry)
193                     elif block['severity'] == 'reject_media':
194                         json['media_removal'].append(entry)
195                     elif block['severity'] == 'reject_reports':
196                         json['report_removal'].append(entry)
197                     else:
198                         print("WARNING: Unknown severity:", block['severity'], block['domain'])
199             except:
200                 # NOISY-DEBUG: print("DEBUG: Failed, Trying mastodon-specific fetches:", blocker)
201                 json = fba.get_mastodon_blocks(blocker)
202
203             # NOISY-DEBUG: print("DEBUG: json.items():", blocker, len(json.items()))
204             for block_level, blocks in json.items():
205                 # NOISY-DEBUG: print("DEBUG: blocker,block_level,blocks():", blocker, block_level, len(blocks))
206                 block_level = fba.tidyup(block_level)
207                 # NOISY-DEBUG: print("DEBUG: AFTER-block_level:", block_level)
208                 if block_level == "":
209                     print("WARNING: block_level is empty, blocker:", blocker)
210                     continue
211
212                 for instance in blocks:
213                     blocked, blocked_hash, reason = instance.values()
214                     # NOISY-DEBUG: print("DEBUG: blocked,hash,reason:", blocked, blocked_hash, reason)
215                     blocked = fba.tidyup(blocked)
216                     # NOISY-DEBUG: print("DEBUG: AFTER-blocked:", blocked)
217
218                     if blocked == "":
219                         print("WARNING: blocked is empty:", blocker)
220                         continue
221                     elif blocked.count("*") < 1:
222                         # No obsfucation for this instance
223                         fba.c.execute(
224                             "SELECT hash FROM instances WHERE domain = ? LIMIT 1", (blocked,)
225                         )
226
227                         if fba.c.fetchone() == None:
228                             # NOISY-DEBUG: print("DEBUG: Hash wasn't found, adding:", blocked)
229                             fba.add_instance(blocked)
230                     else:
231                         # Doing the hash search for instance names as well to tidy up DB
232                         fba.c.execute(
233                             "SELECT domain FROM instances WHERE hash = ? LIMIT 1", (blocked_hash,)
234                         )
235                         searchres = fba.c.fetchone()
236
237                         if searchres != None:
238                             # NOISY-DEBUG: print("DEBUG: Updating domain: ", searchres[0])
239                             blocked = searchres[0]
240
241                     timestamp = int(time.time())
242                     fba.c.execute(
243                         "SELECT * FROM blocks WHERE blocker = ? AND blocked = ? AND block_level = ?",
244                         (
245                             blocker,
246                             blocked if blocked.count("*") <= 1 else blocked_hash,
247                             block_level
248                         ),
249                     )
250
251                     if fba.c.fetchone() == None:
252                         fba.block_instance(blocker, blocked if blocked.count("*") <= 1 else blocked_hash, reason, block_level, timestamp, timestamp)
253
254                         if block_level == "reject":
255                             blockdict.append(
256                                 {
257                                     "blocked": blocked,
258                                     "reason": reason
259                                 })
260                     else:
261                         fba.update_last_seen(timestamp, blocker, blocked if blocked.count("*") <= 1 else blocked_hash, block_level)
262
263                     if reason != '':
264                         # NOISY-DEBUG: print("DEBUG: Updating block reason:", blocker, blocked, reason)
265                         fba.update_block_reason(reason, blocker, blocked if blocked.count("*") <= 1 else blocked_hash, block_level)
266
267             fba.conn.commit()
268         except Exception as e:
269             print("error:", e, blocker, software)
270     elif software == "friendica" or software == "misskey":
271         print("INFO: blocker:", blocker)
272         try:
273             if software == "friendica":
274                 json = fba.get_friendica_blocks(blocker)
275             elif software == "misskey":
276                 json = fba.get_misskey_blocks(blocker)
277
278             for block_level, blocks in json.items():
279                 # NOISY-DEBUG: print("DEBUG: blocker,block_level,blocks():", blocker, block_level, len(blocks))
280                 block_level = fba.tidyup(block_level)
281                 # NOISY-DEBUG: print("DEBUG: AFTER-block_level:", block_level)
282                 if block_level == "":
283                     print("WARNING: block_level is empty, blocker:", blocker)
284                     continue
285
286                 for instance in blocks:
287                     blocked, reason = instance.values()
288                     # NOISY-DEBUG: print("DEBUG: BEFORE-blocked:", blocked)
289                     blocked = fba.tidyup(blocked)
290                     # NOISY-DEBUG: print("DEBUG: AFTER-blocked:", blocked)
291
292                     if blocked == "":
293                         print("WARNING: blocked is empty:", blocker)
294                         continue
295                     if blocked.count("*") > 0:
296                         # Some friendica servers also obscure domains without hash
297                         fba.c.execute(
298                             "SELECT domain FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", (blocked.replace("*", "_"),)
299                         )
300                         searchres = fba.c.fetchone()
301                         if searchres != None:
302                             blocked = searchres[0]
303
304                     if blocked.count("?") > 0:
305                         # Some obscure them with question marks, not sure if that's dependent on version or not
306                         fba.c.execute(
307                             "SELECT domain FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", (blocked.replace("?", "_"),)
308                         )
309                         searchres = fba.c.fetchone()
310                         if searchres != None:
311                             blocked = searchres[0]
312
313                     # NOISY-DEBUG: print("DEBUG: AFTER-blocked:", blocked)
314                     fba.c.execute(
315                         "SELECT domain FROM instances WHERE domain = ?", (blocked,)
316                     )
317
318                     if fba.c.fetchone() == None:
319                         # NOISY-DEBUG: print("DEBUG: Hash wasn't found, adding:", blocked)
320                         fba.add_instance(blocked)
321
322                     timestamp = int(time.time())
323                     fba.c.execute(
324                         "SELECT * FROM blocks WHERE blocker = ? AND blocked = ?",
325                         (blocker, blocked),
326                     )
327                     if fba.c.fetchone() == None:
328                         fba.block_instance(blocker, blocked, reason, block_level, timestamp, timestamp)
329
330                         if block_level == "reject":
331                             blockdict.append(
332                                 {
333                                     "blocked": blocked,
334                                     "reason": reason
335                                 })
336                     else:
337                         fba.update_last_seen(timestamp, blocker, blocked, block_level)
338
339                     if reason != '':
340                         # NOISY-DEBUG: print("DEBUG: Updating block reason:", blocker, blocked, reason)
341                         fba.update_block_reason(reason, blocker, blocked, block_level)
342
343             fba.conn.commit()
344         except Exception as e:
345             print("error:", e, blocker, software)
346     elif software == "gotosocial":
347         print("INFO: blocker:", blocker)
348         try:
349             # Blocks
350             federation = reqto.get(
351                 f"https://{blocker}/api/v1/instance/peers?filter=suspended", headers=fba.headers, timeout=5
352             ).json()
353
354             if (federation == None):
355                 print("WARNING: No valid response:", blocker);
356             elif "error" in federation:
357                 print("WARNING: API returned error:", federation["error"])
358             else:
359                 for peer in federation:
360                     blocked = peer["domain"].lower()
361                     # NOISY-DEBUG: print("DEBUG: BEFORE-blocked:", blocked)
362                     blocked = fba.tidyup(blocked)
363                     # NOISY-DEBUG: print("DEBUG: AFTER-blocked:", blocked)
364
365                     if blocked == "":
366                         print("WARNING: blocked is empty:", blocker)
367                         continue
368                     if blocked.count("*") > 0:
369                         # GTS does not have hashes for obscured domains, so we have to guess it
370                         fba.c.execute(
371                             "SELECT domain FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", (blocked.replace("*", "_"),)
372                         )
373                         searchres = fba.c.fetchone()
374
375                         if searchres != None:
376                             blocked = searchres[0]
377
378                     fba.c.execute(
379                         "SELECT domain FROM instances WHERE domain = ?", (blocked,)
380                     )
381
382                     if fba.c.fetchone() == None:
383                         # NOISY-DEBUG: print("DEBUG: Hash wasn't found, adding:", blocked)
384                         fba.add_instance(blocked)
385
386                     fba.c.execute(
387                         "SELECT * FROM blocks WHERE blocker = ? AND blocked = ? AND block_level = ?",
388                         (
389                             blocker,
390                             blocked,
391                             "reject"
392                         ),
393                     )
394                     timestamp = int(time.time())
395
396                     if fba.c.fetchone() == None:
397                         fba.block_instance(blocker, blocked, "unknown", "reject", timestamp, timestamp)
398
399                         blockdict.append(
400                             {
401                                 "blocked": blocked,
402                                 "reason": None
403                             })
404                     else:
405                         fba.update_last_seen(timestamp, blocker, blocked, "reject")
406
407                     if "public_comment" in peer:
408                         reason = peer["public_comment"]
409                         # NOISY-DEBUG: print("DEBUG: Updating block reason:", blocker, blocked, reason)
410                         fba.update_block_reason(reason, blocker, blocked, "reject")
411
412                         for entry in blockdict:
413                             if entry["blocked"] == blocked:
414                                 entry["reason"] = reason
415
416                 fba.conn.commit()
417         except Exception as e:
418             print("error:", e, blocker, software)
419     else:
420         print("WARNING: Unknown software:", software)
421
422     if fba.config["bot_enabled"] and len(blockdict) > 0:
423         send_bot_post(blocker, blockdict)
424
425     blockdict = []
426
427 fba.conn.close()