]> git.mxchange.org Git - fba.git/blob - fetch_blocks.py
Continued:
[fba.git] / fetch_blocks.py
1 #!/usr/bin/python3
2 # -*- coding: utf-8 -*-
3
4 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
5 # Copyright (C) 2023 Free Software Foundation
6 #
7 # This program is free software: you can redistribute it and/or modify
8 # it under the terms of the GNU Affero General Public License as published
9 # by the Free Software Foundation, either version 3 of the License, or
10 # (at your option) any later version.
11 #
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 # GNU Affero General Public License for more details.
16 #
17 # You should have received a copy of the GNU Affero General Public License
18 # along with this program.  If not, see <https://www.gnu.org/licenses/>.
19
20 import reqto
21 import time
22 import bs4
23 import itertools
24 import re
25 import fba
26
27 fba.cursor.execute(
28     "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'gotosocial', 'bookwyrm', 'takahe') AND (last_blocked IS NULL OR last_blocked < ?) ORDER BY rowid DESC", [time.time() - fba.config["recheck_block"]]
29 )
30
31 rows = fba.cursor.fetchall()
32 print(f"INFO: Checking {len(rows)} entries ...")
33 for blocker, software, origin, nodeinfo_url in rows:
34     # NOISY-DEBUG: print("DEBUG: BEFORE blocker,software,origin,nodeinfo_url:", blocker, software, origin, nodeinfo_url)
35     blockdict = []
36     blocker = fba.tidyup(blocker)
37     # NOISY-DEBUG: print("DEBUG: AFTER blocker,software:", blocker, software)
38
39     if blocker == "":
40         print("WARNING: blocker is now empty!")
41         continue
42     elif fba.is_blacklisted(blocker):
43         print(f"WARNING: blocker='{blocker}' is blacklisted now!")
44         continue
45
46     # NOISY-DEBUG: print(f"DEBUG: blocker='{blocker}'")
47     fba.update_last_blocked(blocker)
48
49     if software == "pleroma":
50         print("INFO: blocker:", blocker)
51         try:
52             # Blocks
53             json = fba.fetch_nodeinfo(blocker, nodeinfo_url)
54             if json is None:
55                 print("WARNING: Could not fetch nodeinfo from blocker:", blocker)
56                 continue
57
58             print("DEBUG: Updating nodeinfo:", blocker)
59             fba.update_last_nodeinfo(blocker)
60
61             federation = json["metadata"]["federation"]
62
63             if "enabled" in federation:
64                 # NOISY-DEBUG: print("DEBUG: Instance has no block list to analyze:", blocker)
65                 continue
66
67             if "mrf_simple" in federation:
68                 for block_level, blocks in (
69                     {**federation["mrf_simple"],
70                     **{"quarantined_instances": federation["quarantined_instances"]}}
71                 ).items():
72                     # NOISY-DEBUG: print("DEBUG: block_level, blocks():", block_level, len(blocks))
73                     block_level = fba.tidyup(block_level)
74                     # NOISY-DEBUG: print("DEBUG: BEFORE block_level:", block_level)
75
76                     if block_level == "":
77                         print("WARNING: block_level is now empty!")
78                         continue
79
80                     for blocked in blocks:
81                         # NOISY-DEBUG: print("DEBUG: BEFORE blocked:", blocked)
82                         blocked = fba.tidyup(blocked)
83                         # NOISY-DEBUG: print("DEBUG: AFTER blocked:", blocked)
84
85                         if blocked == "":
86                             print("WARNING: blocked is empty after fba.tidyup():", blocker, block_level)
87                             continue
88
89                         if blocked.count("*") > 1:
90                             # -ACK!-oma also started obscuring domains without hash
91                             fba.cursor.execute(
92                                 "SELECT domain, nodeinfo_url FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", [blocked.replace("*", "_")]
93                             )
94                             searchres = fba.cursor.fetchone()
95                             # NOISY-DEBUG: print("DEBUG: searchres[]:", type(searchres))
96                             if searchres != None:
97                                 blocked = searchres[0]
98                                 nodeinfo_url = searchres[1]
99                                 # NOISY-DEBUG: print("DEBUG: Looked up domain:", blocked)
100
101                         # NOISY-DEBUG: print("DEBUG: Looking up instance by domain:", blocked)
102                         if not fba.is_instance_registered(blocked):
103                             # NOISY-DEBUG: print(f"DEBUG: Domain blocked='{blocked}' wasn't found, adding ..., blocker='{blocker}',origin='{origin}',nodeinfo_url='{nodeinfo_url}'")
104                             fba.add_instance(blocked, blocker, origin, nodeinfo_url)
105
106                         fba.cursor.execute(
107                             "SELECT * FROM blocks WHERE blocker = ? AND blocked = ? AND block_level = ? LIMIT 1",
108                             (
109                                blocker,
110                                blocked,
111                                block_level
112                            ),
113                         )
114
115                         if fba.cursor.fetchone() == None:
116                             # NOISY-DEBUG: print("DEBUG: Blocking:", blocker, blocked, block_level)
117                             fba.block_instance(blocker, blocked, "unknown", block_level)
118
119                             if block_level == "reject":
120                                 # NOISY-DEBUG: print("DEBUG: Adding to blockdict:", blocked)
121                                 blockdict.append(
122                                     {
123                                         "blocked": blocked,
124                                         "reason" : None
125                                     })
126                         else:
127                             # NOISY-DEBUG: print("DEBUG: Updating last_seen:", blocker, blocked, block_level)
128                             fba.update_last_seen(blocker, blocked, block_level)
129
130             fba.connection.commit()
131
132             # Reasons
133             if "mrf_simple_info" in federation:
134                 # NOISY-DEBUG: print("DEBUG: Found mrf_simple_info:", blocker)
135                 for block_level, info in (
136                     {**federation["mrf_simple_info"],
137                     **(federation["quarantined_instances_info"]
138                     if "quarantined_instances_info" in federation
139                     else {})}
140                 ).items():
141                     # NOISY-DEBUG: print("DEBUG: block_level, info.items():", block_level, len(info.items()))
142                     block_level = fba.tidyup(block_level)
143                     # NOISY-DEBUG: print("DEBUG: BEFORE block_level:", block_level)
144
145                     if block_level == "":
146                         print("WARNING: block_level is now empty!")
147                         continue
148
149                     for blocked, reason in info.items():
150                         # NOISY-DEBUG: print("DEBUG: BEFORE blocked:", blocked)
151                         blocked = fba.tidyup(blocked)
152                         # NOISY-DEBUG: print("DEBUG: AFTER blocked:", blocked)
153
154                         if blocked == "":
155                             print("WARNING: blocked is empty after fba.tidyup():", blocker, block_level)
156                             continue
157                         elif blocked.count("*") > 1:
158                             # same domain guess as above, but for reasons field
159                             fba.cursor.execute(
160                                 "SELECT domain, origin, nodeinfo_url FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", [blocked.replace("*", "_")]
161                             )
162                             searchres = fba.cursor.fetchone()
163
164                             if searchres != None:
165                                 blocked = searchres[0]
166                                 origin = searchres[1]
167                                 nodeinfo_url = searchres[2]
168
169                         # NOISY-DEBUG: print("DEBUG: Looking up instance by domain:", blocked)
170                         if not fba.is_instance_registered(blocked):
171                             # NOISY-DEBUG: print(f"DEBUG: Domain blocked='{blocked}' wasn't found, adding ..., blocker='{blocker}',origin='{origin}',nodeinfo_url='{nodeinfo_url}'")
172                             fba.add_instance(blocked, blocker, origin, nodeinfo_url)
173
174                         # NOISY-DEBUG: print("DEBUG: Updating block reason:", blocker, blocked, reason["reason"])
175                         fba.update_block_reason(reason["reason"], blocker, blocked, block_level)
176
177                         for entry in blockdict:
178                             if entry["blocked"] == blocked:
179                                 # NOISY-DEBUG: print("DEBUG: Updating entry reason:", blocked)
180                                 entry["reason"] = reason["reason"]
181
182             fba.connection.commit()
183         except Exception as e:
184             print(f"ERROR: blocker='{blocker}',software='{software}',exception[{type(e)}]:'{str(e)}'")
185     elif software == "mastodon":
186         print("INFO: blocker:", blocker)
187         try:
188             # json endpoint for newer mastodongs
189             try:
190                 json = {
191                     "reject"        : [],
192                     "media_removal" : [],
193                     "followers_only": [],
194                     "report_removal": []
195                 }
196
197                 # handling CSRF, I've saw at least one server requiring it to access the endpoint
198                 # NOISY-DEBUG: print("DEBUG: Fetching meta:", blocker)
199                 meta = bs4.BeautifulSoup(
200                     reqto.get(f"https://{blocker}/about", headers=fba.headers, timeout=(fba.config["connection_timeout"], config["read_timeout"])).text,
201                     "html.parser",
202                 )
203                 try:
204                     csrf = meta.find("meta", attrs={"name": "csrf-token"})["content"]
205                     # NOISY-DEBUG: print("DEBUG: Adding CSRF token:", blocker, csrf)
206                     reqheaders = {**fba.api_headers, **{"X-CSRF-Token": csrf}}
207                 except:
208                     # NOISY-DEBUG: print("DEBUG: No CSRF token found, using normal headers:", blocker)
209                     reqheaders = fba.api_headers
210
211                 # NOISY-DEBUG: print("DEBUG: Quering API domain_blocks:", blocker)
212                 blocks = reqto.get(f"https://{blocker}/api/v1/instance/domain_blocks", headers=reqheaders, timeout=(fba.config["connection_timeout"], config["read_timeout"])).json()
213
214                 # NOISY-DEBUG: print("DEBUG: blocks():", len(blocks))
215                 for block in blocks:
216                     entry = {
217                         'domain': block['domain'],
218                         'hash'  : block['digest'],
219                         'reason': block['comment']
220                     }
221
222                     # NOISY-DEBUG: print("DEBUG: severity,domain,hash,comment:", block['severity'], block['domain'], block['digest'], block['comment'])
223                     if block['severity'] == 'suspend':
224                         json['reject'].append(entry)
225                     elif block['severity'] == 'silence':
226                         json['followers_only'].append(entry)
227                     elif block['severity'] == 'reject_media':
228                         json['media_removal'].append(entry)
229                     elif block['severity'] == 'reject_reports':
230                         json['report_removal'].append(entry)
231                     else:
232                         print("WARNING: Unknown severity:", block['severity'], block['domain'])
233             except:
234                 # NOISY-DEBUG: print("DEBUG: Failed, Trying mastodon-specific fetches:", blocker)
235                 json = fba.get_mastodon_blocks(blocker)
236
237             # NOISY-DEBUG: print("DEBUG: json.items():", blocker, len(json.items()))
238             for block_level, blocks in json.items():
239                 # NOISY-DEBUG: print("DEBUG: blocker,block_level,blocks():", blocker, block_level, len(blocks))
240                 block_level = fba.tidyup(block_level)
241                 # NOISY-DEBUG: print("DEBUG: AFTER-block_level:", block_level)
242                 if block_level == "":
243                     print("WARNING: block_level is empty, blocker:", blocker)
244                     continue
245
246                 for instance in blocks:
247                     blocked, blocked_hash, reason = instance.values()
248                     # NOISY-DEBUG: print("DEBUG: blocked,hash,reason:", blocked, blocked_hash, reason)
249                     blocked = fba.tidyup(blocked)
250                     # NOISY-DEBUG: print("DEBUG: AFTER-blocked:", blocked)
251
252                     if blocked == "":
253                         print("WARNING: blocked is empty:", blocker)
254                         continue
255                     elif blocked.count("*") < 1:
256                         # No obsfucation for this instance
257                         fba.cursor.execute(
258                             "SELECT hash FROM instances WHERE domain = ? LIMIT 1", [blocked]
259                         )
260
261                         if fba.cursor.fetchone() == None:
262                             # NOISY-DEBUG: print("DEBUG: Hash wasn't found, adding:", blocked, blocker)
263                             fba.add_instance(blocked, blocker, origin, nodeinfo_url)
264                     else:
265                         # Doing the hash search for instance names as well to tidy up DB
266                         fba.cursor.execute(
267                             "SELECT domain, origin, nodeinfo_url FROM instances WHERE hash = ? LIMIT 1", [blocked_hash]
268                         )
269                         searchres = fba.cursor.fetchone()
270
271                         if searchres != None:
272                             # NOISY-DEBUG: print("DEBUG: Updating domain: ", searchres[0])
273                             blocked = searchres[0]
274                             origin = searchres[1]
275                             nodeinfo_url = searchres[2]
276
277                         # NOISY-DEBUG: print("DEBUG: Looking up instance by domain:", blocked)
278                         if not fba.is_instance_registered(blocked):
279                             # NOISY-DEBUG: print(f"DEBUG: Domain blocked='{blocked}' wasn't found, adding ..., blocker='{blocker}',origin='{origin}',nodeinfo_url='{nodeinfo_url}'")
280                             fba.add_instance(blocked, blocker, origin, nodeinfo_url)
281
282                     fba.cursor.execute(
283                         "SELECT * FROM blocks WHERE blocker = ? AND blocked = ? AND block_level = ? LIMIT 1",
284                         (
285                             blocker,
286                             blocked if blocked.count("*") <= 1 else blocked_hash,
287                             block_level
288                         ),
289                     )
290
291                     if fba.cursor.fetchone() == None:
292                         fba.block_instance(blocker, blocked if blocked.count("*") <= 1 else blocked_hash, reason, block_level)
293
294                         if block_level == "reject":
295                             blockdict.append(
296                                 {
297                                     "blocked": blocked,
298                                     "reason" : reason
299                                 })
300                     else:
301                         fba.update_last_seen(blocker, blocked if blocked.count("*") <= 1 else blocked_hash, block_level)
302
303                     if reason != "":
304                         # NOISY-DEBUG: print("DEBUG: Updating block reason:", blocker, blocked, reason)
305                         fba.update_block_reason(reason, blocker, blocked if blocked.count("*") <= 1 else blocked_hash, block_level)
306
307             fba.connection.commit()
308         except Exception as e:
309             print(f"ERROR: blocker='{blocker}',software='{software}',exception[{type(e)}]:'{str(e)}'")
310     elif software == "friendica" or software == "misskey" or software == "bookwyrm" or software == "takahe":
311         print("INFO: blocker:", blocker)
312         try:
313             if software == "friendica":
314                 json = fba.get_friendica_blocks(blocker)
315             elif software == "misskey":
316                 json = fba.get_misskey_blocks(blocker)
317             elif software == "bookwyrm":
318                 print("WARNING: bookwyrm is not fully supported for fetching blacklist!", blocker)
319                 #json = fba.get_bookwyrm_blocks(blocker)
320             elif software == "takahe":
321                 print("WARNING: takahe is not fully supported for fetching blacklist!", blocker)
322                 #json = fba.get_takahe_blocks(blocker)
323
324             for block_level, blocks in json.items():
325                 # NOISY-DEBUG: print("DEBUG: blocker,block_level,blocks():", blocker, block_level, len(blocks))
326                 block_level = fba.tidyup(block_level)
327                 # NOISY-DEBUG: print("DEBUG: AFTER-block_level:", block_level)
328                 if block_level == "":
329                     print("WARNING: block_level is empty, blocker:", blocker)
330                     continue
331
332                 for instance in blocks:
333                     blocked, reason = instance.values()
334                     # NOISY-DEBUG: print("DEBUG: BEFORE blocked:", blocked)
335                     blocked = fba.tidyup(blocked)
336                     # NOISY-DEBUG: print("DEBUG: AFTER blocked:", blocked)
337
338                     if blocked == "":
339                         print("WARNING: blocked is empty:", blocker)
340                         continue
341                     elif blocked.count("*") > 0:
342                         # Some friendica servers also obscure domains without hash
343                         fba.cursor.execute(
344                             "SELECT domain, origin, nodeinfo_url FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", [blocked.replace("*", "_")]
345                         )
346
347                         searchres = fba.cursor.fetchone()
348
349                         if searchres != None:
350                             blocked = searchres[0]
351                             origin = searchres[1]
352                             nodeinfo_url = searchres[2]
353
354                     if blocked.count("?") > 0:
355                         # Some obscure them with question marks, not sure if that's dependent on version or not
356                         fba.cursor.execute(
357                             "SELECT domain, origin, nodeinfo_url FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", [blocked.replace("?", "_")]
358                         )
359                         searchres = fba.cursor.fetchone()
360                         if searchres != None:
361                             blocked = searchres[0]
362                             origin = searchres[1]
363                             nodeinfo_url = searchres[2]
364
365                     # NOISY-DEBUG: print("DEBUG: AFTER-blocked:", blocked)
366                     if not fba.is_instance_registered(blocked):
367                         # NOISY-DEBUG: print("DEBUG: Hash wasn't found, adding:", blocked, blocker)
368                         fba.add_instance(blocked, blocker, origin, nodeinfo_url)
369
370                     fba.cursor.execute(
371                         "SELECT * FROM blocks WHERE blocker = ? AND blocked = ?",
372                         (blocker, blocked),
373                     )
374
375                     if fba.cursor.fetchone() == None:
376                         fba.block_instance(blocker, blocked, reason, block_level)
377
378                         if block_level == "reject":
379                             blockdict.append(
380                                 {
381                                     "blocked": blocked,
382                                     "reason" : reason
383                                 })
384                     else:
385                         fba.update_last_seen(blocker, blocked, block_level)
386
387                     if reason != '':
388                         # NOISY-DEBUG: print("DEBUG: Updating block reason:", blocker, blocked, reason)
389                         fba.update_block_reason(reason, blocker, blocked, block_level)
390
391             fba.connection.commit()
392         except Exception as e:
393             print(f"ERROR: blocker='{blocker}',software='{software}',exception[{type(e)}]:'{str(e)}'")
394     elif software == "gotosocial":
395         print("INFO: blocker:", blocker)
396         try:
397             # Blocks
398             federation = reqto.get(f"https://{blocker}{get_peers_url}?filter=suspended", headers=fba.api_headers, timeout=(fba.config["connection_timeout"], config["read_timeout"])).json()
399
400             if (federation == None):
401                 print("WARNING: No valid response:", blocker);
402             elif "error" in federation:
403                 print("WARNING: API returned error:", federation["error"])
404             else:
405                 # NOISY-DEBUG: print("DEBUG: Checking fenderation():", len(federation))
406                 for peer in federation:
407                     blocked = peer["domain"].lower()
408                     # NOISY-DEBUG: print("DEBUG: BEFORE blocked:", blocked)
409                     blocked = fba.tidyup(blocked)
410                     # NOISY-DEBUG: print("DEBUG: AFTER blocked:", blocked)
411
412                     if blocked == "":
413                         print("WARNING: blocked is empty:", blocker)
414                         continue
415                     elif blocked.count("*") > 0:
416                         # GTS does not have hashes for obscured domains, so we have to guess it
417                         fba.cursor.execute(
418                             "SELECT domain, origin, nodeinfo_url FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", [blocked.replace("*", "_")]
419                         )
420                         searchres = fba.cursor.fetchone()
421
422                         if searchres != None:
423                             blocked = searchres[0]
424                             origin = searchres[1]
425                             nodeinfo_url = searchres[2]
426
427                     if not fba.is_instance_registered(blocked):
428                         # NOISY-DEBUG: print(f"DEBUG: Domain blocked='{blocked}' wasn't found, adding ..., blocker='{blocker}',origin='{origin}',nodeinfo_url='{nodeinfo_url}'")
429                         fba.add_instance(blocked, blocker, origin, nodeinfo_url)
430
431                     fba.cursor.execute(
432                         "SELECT * FROM blocks WHERE blocker = ? AND blocked = ? AND block_level = ? LIMIT 1",
433                         (
434                             blocker,
435                             blocked,
436                             "reject"
437                         ),
438                     )
439
440                     if fba.cursor.fetchone() == None:
441                         # NOISY-DEBUG: print(f"DEBUG: blocker='{blocker}' is blocking '{blocked}' for unknown reason at this point")
442                         fba.block_instance(blocker, blocked, "unknown", "reject")
443
444                         blockdict.append(
445                             {
446                                 "blocked": blocked,
447                                 "reason" : None
448                             })
449                     else:
450                         fba.update_last_seen(blocker, blocked, "reject")
451
452                     if "public_comment" in peer:
453                         # NOISY-DEBUG: print("DEBUG: Updating block reason:", blocker, blocked, peer["public_comment"])
454                         fba.update_block_reason(peer["public_comment"], blocker, blocked, "reject")
455
456                         for entry in blockdict:
457                             if entry["blocked"] == blocked:
458                                 # NOISY-DEBUG: print(f"DEBUG: Setting block reason for blocked='{blocked}':'{peer['public_comment']}'")
459                                 entry["reason"] = peer["public_comment"]
460
461                 fba.connection.commit()
462         except Exception as e:
463             print(f"ERROR: blocker='{blocker}',software='{software}',exception[{type(e)}]:'{str(e)}'")
464     else:
465         print("WARNING: Unknown software:", blocker, software)
466
467     if fba.config["bot_enabled"] and len(blockdict) > 0:
468         send_bot_post(blocker, blockdict)
469
470     blockdict = []
471
472 fba.connection.close()