]> git.mxchange.org Git - fba.git/blob - fetch_blocks.py
Continued:
[fba.git] / fetch_blocks.py
1 import reqto
2 import time
3 import bs4
4 import fba
5 import itertools
6 import re
7
8 fba.c.execute(
9     "SELECT domain, software FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'gotosocial')"
10 )
11
12 for blocker, software in fba.c.fetchall():
13     # NOISY-DEBUG: print("DEBUG: BEFORE-blocker,software:", blocker, software)
14     blockdict = []
15     blocker = fba.tidyup(blocker)
16     # NOISY-DEBUG: print("DEBUG: AFTER-blocker,software:", blocker, software)
17
18     if blocker == "":
19         print("WARNING: blocker is now empty!")
20         continue
21
22     if software == "pleroma":
23         print("INFO: blocker:", blocker)
24         try:
25             # Blocks
26             federation = reqto.get(
27                 f"https://{blocker}/nodeinfo/2.1.json", headers=fba.headers, timeout=5
28             ).json()["metadata"]["federation"]
29
30             if "mrf_simple" in federation:
31                 for block_level, blocks in (
32                     {**federation["mrf_simple"],
33                     **{"quarantined_instances": federation["quarantined_instances"]}}
34                 ).items():
35                     # NOISY-DEBUG: print("DEBUG: block_level, blocks():", block_level, len(blocks))
36                     block_level = fba.tidyup(block_level)
37                     # NOISY-DEBUG: print("DEBUG: BEFORE block_level:", block_level)
38
39                     if block_level == "":
40                         print("WARNING: block_level is now empty!")
41                         continue
42
43                     for blocked in blocks:
44                         # NOISY-DEBUG: print("DEBUG: BEFORE blocked:", blocked)
45                         blocked = fba.tidyup(blocked)
46                         # NOISY-DEBUG: print("DEBUG: AFTER blocked:", blocked)
47
48                         if blocked == "":
49                             print("WARNING: blocked is empty after fba.tidyup():", blocker, block_level)
50                             continue
51
52                         if blocked.count("*") > 1:
53                             # -ACK!-oma also started obscuring domains without hash
54                             fba.c.execute(
55                                 "SELECT domain FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", (blocked.replace("*", "_"),)
56                             )
57                             searchres = fba.c.fetchone()
58                             # NOISY-DEBUG: print("DEBUG: searchres[]:", type(searchres))
59                             if searchres != None:
60                                 blocked = searchres[0]
61                                 # NOISY-DEBUG: print("DEBUG: Looked up domain:", blocked)
62
63                         # NOISY-DEBUG: print("DEBUG: Looking up instance by domain:", blocked)
64                         fba.c.execute(
65                             "SELECT domain FROM instances WHERE domain = ?", (blocked,)
66                         )
67
68                         if fba.c.fetchone() == None:
69                             # NOISY-DEBUG: print("DEBUG: Domain wasn't found, adding:", blocked)
70                             fba.add_instance(blocked)
71
72                         timestamp = int(time.time())
73                         fba.c.execute(
74                             "SELECT * FROM blocks WHERE blocker = ? AND blocked = ? AND block_level = ?",
75                             (
76                                blocker,
77                                blocked,
78                                block_level
79                            ),
80                         )
81
82                         if fba.c.fetchone() == None:
83                             # NOISY-DEBUG: print("DEBUG: Blocking:", blocker, blocked, block_level)
84                             fba.block_instance(blocker, blocked, "unknown", block_level, timestamp, timestamp)
85
86                             if block_level == "reject":
87                                 # NOISY-DEBUG: print("DEBUG: Adding to blockdict:", blocked)
88                                 blockdict.append(
89                                     {
90                                         "blocked": blocked,
91                                         "reason": None
92                                     })
93                         else:
94                             # NOISY-DEBUG: print("DEBUG: Updating last_seen:", blocker, blocked, block_level)
95                             fba.update_last_seen(timestamp, blocker, blocked, block_level)
96
97             fba.conn.commit()
98
99             # Reasons
100             if "mrf_simple_info" in federation:
101                 # NOISY-DEBUG: print("DEBUG: Found mrf_simple_info:", blocker)
102                 for block_level, info in (
103                     {**federation["mrf_simple_info"],
104                     **(federation["quarantined_instances_info"]
105                     if "quarantined_instances_info" in federation
106                     else {})}
107                 ).items():
108                     # NOISY-DEBUG: print("DEBUG: block_level, info.items():", block_level, len(info.items()))
109                     block_level = fba.tidyup(block_level)
110                     # NOISY-DEBUG: print("DEBUG: BEFORE block_level:", block_level)
111
112                     if block_level == "":
113                         print("WARNING: block_level is now empty!")
114                         continue
115
116                     for blocked, reason in info.items():
117                         # NOISY-DEBUG: print("DEBUG: BEFORE blocked:", blocked)
118                         blocked = fba.tidyup(blocked)
119                         # NOISY-DEBUG: print("DEBUG: AFTER blocked:", blocked)
120
121                         if blocked == "":
122                             print("WARNING: blocked is empty after fba.tidyup():", blocker, block_level)
123                             continue
124                         elif blocked.count("*") > 1:
125                             # same domain guess as above, but for reasons field
126                             fba.c.execute(
127                                 "SELECT domain FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", (blocked.replace("*", "_"),)
128                             )
129                             searchres = fba.c.fetchone()
130
131                             if searchres != None:
132                                 blocked = searchres[0]
133
134                         # NOISY-DEBUG: print("DEBUG: Updating block reason:", blocker, blocked, reason["reason"])
135                         fba.update_block_reason(reason["reason"], blocker, blocked, block_level)
136
137                         for entry in blockdict:
138                             if entry["blocked"] == blocked:
139                                 # NOISY-DEBUG: print("DEBUG: Updating entry reason:", blocked)
140                                 entry["reason"] = reason["reason"]
141
142             fba.conn.commit()
143         except Exception as e:
144             print("error:", e, blocker, software)
145     elif software == "mastodon":
146         print("INFO: blocker:", blocker)
147         try:
148             # json endpoint for newer mastodongs
149             try:
150                 json = {
151                     "reject": [],
152                     "media_removal": [],
153                     "followers_only": [],
154                     "report_removal": []
155                 }
156
157                 # handling CSRF, I've saw at least one server requiring it to access the endpoint
158                 # NOISY-DEBUG: print("DEBUG: Fetching meta:", blocker)
159                 meta = bs4.BeautifulSoup(
160                     reqto.get(f"https://{blocker}/about", headers=fba.headers, timeout=5).text,
161                     "html.parser",
162                 )
163                 try:
164                     csrf = meta.find("meta", attrs={"name": "csrf-token"})["content"]
165                     # NOISY-DEBUG: print("DEBUG: Adding CSRF token:", blocker, csrf)
166                     reqheaders = {**fba.headers, **{"x-csrf-token": csrf}}
167                 except:
168                     # NOISY-DEBUG: print("DEBUG: No CSRF token found, using normal headers:", blocker)
169                     reqheaders = fba.headers
170
171                 # NOISY-DEBUG: print("DEBUG: Quering API domain_blocks:", blocker)
172                 blocks = reqto.get(
173                     f"https://{blocker}/api/v1/instance/domain_blocks", headers=reqheaders, timeout=5
174                 ).json()
175
176                 # NOISY-DEBUG: print("DEBUG: blocks():", len(blocks))
177                 for block in blocks:
178                     entry = {
179                         'domain': block['domain'],
180                         'hash': block['digest'],
181                         'reason': block['comment']
182                     }
183
184                     # NOISY-DEBUG: print("DEBUG: severity,domain,hash,comment:", block['severity'], block['domain'], block['digest'], block['comment'])
185                     if block['severity'] == 'suspend':
186                         json['reject'].append(entry)
187                     elif block['severity'] == 'silence':
188                         json['followers_only'].append(entry)
189                     elif block['severity'] == 'reject_media':
190                         json['media_removal'].append(entry)
191                     elif block['severity'] == 'reject_reports':
192                         json['report_removal'].append(entry)
193                     else:
194                         print("WARNING: Unknown severity:", block['severity'], block['domain'])
195             except:
196                 # NOISY-DEBUG: print("DEBUG: Failed, Trying mastodon-specific fetches:", blocker)
197                 json = fba.get_mastodon_blocks(blocker)
198
199             # NOISY-DEBUG: print("DEBUG: json.items():", blocker, len(json.items()))
200             for block_level, blocks in json.items():
201                 # NOISY-DEBUG: print("DEBUG: blocker,block_level,blocks():", blocker, block_level, len(blocks))
202                 block_level = fba.tidyup(block_level)
203                 # NOISY-DEBUG: print("DEBUG: AFTER-block_level:", block_level)
204                 if block_level == "":
205                     print("WARNING: block_level is empty, blocker:", blocker)
206                     continue
207
208                 for instance in blocks:
209                     blocked, blocked_hash, reason = instance.values()
210                     # NOISY-DEBUG: print("DEBUG: blocked,hash,reason:", blocked, blocked_hash, reason)
211                     blocked = fba.tidyup(blocked)
212                     # NOISY-DEBUG: print("DEBUG: AFTER-blocked:", blocked)
213
214                     if blocked == "":
215                         print("WARNING: blocked is empty:", blocker)
216                         continue
217                     elif blocked.count("*") < 1:
218                         # No obsfucation for this instance
219                         fba.c.execute(
220                             "SELECT hash FROM instances WHERE domain = ? LIMIT 1", (blocked,)
221                         )
222
223                         if fba.c.fetchone() == None:
224                             # NOISY-DEBUG: print("DEBUG: Hash wasn't found, adding:", blocked)
225                             fba.add_instance(blocked)
226                     else:
227                         # Doing the hash search for instance names as well to tidy up DB
228                         fba.c.execute(
229                             "SELECT domain FROM instances WHERE hash = ? LIMIT 1", (blocked_hash,)
230                         )
231                         searchres = fba.c.fetchone()
232
233                         if searchres != None:
234                             # NOISY-DEBUG: print("DEBUG: Updating domain: ", searchres[0])
235                             blocked = searchres[0]
236
237                     timestamp = int(time.time())
238                     fba.c.execute(
239                         "SELECT * FROM blocks WHERE blocker = ? AND blocked = ? AND block_level = ?",
240                         (
241                             blocker,
242                             blocked if blocked.count("*") <= 1 else blocked_hash,
243                             block_level
244                         ),
245                     )
246
247                     if fba.c.fetchone() == None:
248                         fba.block_instance(blocker, blocked if blocked.count("*") <= 1 else blocked_hash, reason, block_level, timestamp, timestamp)
249
250                         if block_level == "reject":
251                             blockdict.append(
252                                 {
253                                     "blocked": blocked,
254                                     "reason": reason
255                                 })
256                     else:
257                         fba.update_last_seen(timestamp, blocker, blocked if blocked.count("*") <= 1 else blocked_hash, block_level)
258
259                     if reason != '':
260                         # NOISY-DEBUG: print("DEBUG: Updating block reason:", blocker, blocked, reason)
261                         fba.update_block_reason(reason, blocker, blocked if blocked.count("*") <= 1 else blocked_hash, block_level)
262
263             fba.conn.commit()
264         except Exception as e:
265             print("error:", e, blocker, software)
266     elif software == "friendica" or software == "misskey":
267         print("INFO: blocker:", blocker)
268         try:
269             if software == "friendica":
270                 json = fba.get_friendica_blocks(blocker)
271             elif software == "misskey":
272                 json = fba.get_misskey_blocks(blocker)
273
274             for block_level, blocks in json.items():
275                 # NOISY-DEBUG: print("DEBUG: blocker,block_level,blocks():", blocker, block_level, len(blocks))
276                 block_level = fba.tidyup(block_level)
277                 # NOISY-DEBUG: print("DEBUG: AFTER-block_level:", block_level)
278                 if block_level == "":
279                     print("WARNING: block_level is empty, blocker:", blocker)
280                     continue
281
282                 for instance in blocks:
283                     blocked, reason = instance.values()
284                     # NOISY-DEBUG: print("DEBUG: BEFORE-blocked:", blocked)
285                     blocked = fba.tidyup(blocked)
286                     # NOISY-DEBUG: print("DEBUG: AFTER-blocked:", blocked)
287
288                     if blocked == "":
289                         print("WARNING: blocked is empty:", blocker)
290                         continue
291                     if blocked.count("*") > 0:
292                         # Some friendica servers also obscure domains without hash
293                         fba.c.execute(
294                             "SELECT domain FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", (blocked.replace("*", "_"),)
295                         )
296                         searchres = fba.c.fetchone()
297                         if searchres != None:
298                             blocked = searchres[0]
299
300                     if blocked.count("?") > 0:
301                         # Some obscure them with question marks, not sure if that's dependent on version or not
302                         fba.c.execute(
303                             "SELECT domain FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", (blocked.replace("?", "_"),)
304                         )
305                         searchres = fba.c.fetchone()
306                         if searchres != None:
307                             blocked = searchres[0]
308
309                     # NOISY-DEBUG: print("DEBUG: AFTER-blocked:", blocked)
310                     fba.c.execute(
311                         "SELECT domain FROM instances WHERE domain = ?", (blocked,)
312                     )
313
314                     if fba.c.fetchone() == None:
315                         # NOISY-DEBUG: print("DEBUG: Hash wasn't found, adding:", blocked)
316                         fba.add_instance(blocked)
317
318                     timestamp = int(time.time())
319                     fba.c.execute(
320                         "SELECT * FROM blocks WHERE blocker = ? AND blocked = ?",
321                         (blocker, blocked),
322                     )
323                     if fba.c.fetchone() == None:
324                         fba.block_instance(blocker, blocked, reason, block_level, timestamp, timestamp)
325
326                         if block_level == "reject":
327                             blockdict.append(
328                                 {
329                                     "blocked": blocked,
330                                     "reason": reason
331                                 })
332                     else:
333                         fba.update_last_seen(timestamp, blocker, blocked, block_level)
334
335                     if reason != '':
336                         # NOISY-DEBUG: print("DEBUG: Updating block reason:", blocker, blocked, reason)
337                         fba.update_block_reason(reason, blocker, blocked, block_level)
338
339             fba.conn.commit()
340         except Exception as e:
341             print("error:", e, blocker, software)
342     elif software == "gotosocial":
343         print("INFO: blocker:", blocker)
344         try:
345             # Blocks
346             federation = reqto.get(
347                 f"https://{blocker}/api/v1/instance/peers?filter=suspended", headers=fba.headers, timeout=5
348             ).json()
349
350             if (federation == None):
351                 print("WARNING: No valid response:", blocker);
352             elif "error" in federation:
353                 print("WARNING: API returned error:", federation["error"])
354             else:
355                 for peer in federation:
356                     blocked = peer["domain"].lower()
357                     # NOISY-DEBUG: print("DEBUG: BEFORE-blocked:", blocked)
358                     blocked = fba.tidyup(blocked)
359                     # NOISY-DEBUG: print("DEBUG: AFTER-blocked:", blocked)
360
361                     if blocked == "":
362                         print("WARNING: blocked is empty:", blocker)
363                         continue
364                     if blocked.count("*") > 0:
365                         # GTS does not have hashes for obscured domains, so we have to guess it
366                         fba.c.execute(
367                             "SELECT domain FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", (blocked.replace("*", "_"),)
368                         )
369                         searchres = fba.c.fetchone()
370
371                         if searchres != None:
372                             blocked = searchres[0]
373
374                     fba.c.execute(
375                         "SELECT domain FROM instances WHERE domain = ?", (blocked,)
376                     )
377
378                     if fba.c.fetchone() == None:
379                         # NOISY-DEBUG: print("DEBUG: Hash wasn't found, adding:", blocked)
380                         fba.add_instance(blocked)
381
382                     fba.c.execute(
383                         "SELECT * FROM blocks WHERE blocker = ? AND blocked = ? AND block_level = ?",
384                         (
385                             blocker,
386                             blocked,
387                             "reject"
388                         ),
389                     )
390                     timestamp = int(time.time())
391
392                     if fba.c.fetchone() == None:
393                         fba.block_instance(blocker, blocked, "unknown", "reject", timestamp, timestamp)
394
395                         blockdict.append(
396                             {
397                                 "blocked": blocked,
398                                 "reason": None
399                             })
400                     else:
401                         fba.update_last_seen(timestamp, blocker, blocked, "reject")
402
403                     if "public_comment" in peer:
404                         reason = peer["public_comment"]
405                         # NOISY-DEBUG: print("DEBUG: Updating block reason:", blocker, blocked, reason)
406                         fba.update_block_reason(reason, blocker, blocked, "reject")
407
408                         for entry in blockdict:
409                             if entry["blocked"] == blocked:
410                                 entry["reason"] = reason
411
412                 fba.conn.commit()
413         except Exception as e:
414             print("error:", e, blocker, software)
415     else:
416         print("WARNING: Unknown software:", software)
417
418     if fba.config["bot_enabled"] and len(blockdict) > 0:
419         send_bot_post(blocker, blockdict)
420
421     blockdict = []
422
423 fba.conn.close()