]> git.mxchange.org Git - fba.git/blob - fetch_blocks.py
Let's do this proper. Here you go, FSF!
[fba.git] / fetch_blocks.py
1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
3 #
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
8 #
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 # GNU Affero General Public License for more details.
13 #
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program.  If not, see <https://www.gnu.org/licenses/>.
16
17 import reqto
18 import time
19 import bs4
20 import itertools
21 import re
22 import fba
23
24 fba.cursor.execute(
25     "SELECT domain, software FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'gotosocial', 'bookwyrm', 'takahe') AND (last_blocked IS NULL OR last_blocked < ?) ORDER BY rowid DESC", [time.time() - fba.config["recheck_block"]]
26 )
27
28 rows = fba.cursor.fetchall()
29 print(f"INFO: Checking {len(rows)} entries ...")
30 for blocker, software in rows:
31     # NOISY-DEBUG: print("DEBUG: BEFORE blocker,software:", blocker, software)
32     blockdict = []
33     blocker = fba.tidyup(blocker)
34     # NOISY-DEBUG: print("DEBUG: AFTER blocker,software:", blocker, software)
35
36     if blocker == "":
37         print("WARNING: blocker is now empty!")
38         continue
39     elif fba.is_blacklisted(blocker):
40         print(f"WARNING: blocker='{blocker}' is blacklisted now!")
41         continue
42
43     # NOISY-DEBUG: print(f"DEBUG: blocker='{blocker}'")
44     fba.update_last_blocked(blocker)
45
46     if software == "pleroma":
47         print("INFO: blocker:", blocker)
48         try:
49             # Blocks
50             json = fba.fetch_nodeinfo(blocker)
51             if json is None:
52                 print("WARNING: Could not fetch nodeinfo from blocker:", blocker)
53                 continue
54
55             print("DEBUG: Updating nodeinfo:", blocker)
56             fba.update_last_nodeinfo(blocker)
57
58             federation = json["metadata"]["federation"]
59
60             if "enabled" in federation:
61                 # NOISY-DEBUG: print("DEBUG: Instance has no block list to analyze:", blocker)
62                 continue
63
64             if "mrf_simple" in federation:
65                 for block_level, blocks in (
66                     {**federation["mrf_simple"],
67                     **{"quarantined_instances": federation["quarantined_instances"]}}
68                 ).items():
69                     # NOISY-DEBUG: print("DEBUG: block_level, blocks():", block_level, len(blocks))
70                     block_level = fba.tidyup(block_level)
71                     # NOISY-DEBUG: print("DEBUG: BEFORE block_level:", block_level)
72
73                     if block_level == "":
74                         print("WARNING: block_level is now empty!")
75                         continue
76
77                     for blocked in blocks:
78                         # NOISY-DEBUG: print("DEBUG: BEFORE blocked:", blocked)
79                         blocked = fba.tidyup(blocked)
80                         # NOISY-DEBUG: print("DEBUG: AFTER blocked:", blocked)
81
82                         if blocked == "":
83                             print("WARNING: blocked is empty after fba.tidyup():", blocker, block_level)
84                             continue
85
86                         if blocked.count("*") > 1:
87                             # -ACK!-oma also started obscuring domains without hash
88                             fba.cursor.execute(
89                                 "SELECT domain FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", [blocked.replace("*", "_")]
90                             )
91                             searchres = fba.cursor.fetchone()
92                             # NOISY-DEBUG: print("DEBUG: searchres[]:", type(searchres))
93                             if searchres != None:
94                                 blocked = searchres[0]
95                                 # NOISY-DEBUG: print("DEBUG: Looked up domain:", blocked)
96
97                         # NOISY-DEBUG: print("DEBUG: Looking up instance by domain:", blocked)
98                         if not fba.is_instance_registered(blocked):
99                             # NOISY-DEBUG: print("DEBUG: Domain wasn't found, adding:", blocked, blocker)
100                             fba.add_instance(blocked, blocker, argv[0])
101
102                         fba.cursor.execute(
103                             "SELECT * FROM blocks WHERE blocker = ? AND blocked = ? AND block_level = ? LIMIT 1",
104                             (
105                                blocker,
106                                blocked,
107                                block_level
108                            ),
109                         )
110
111                         if fba.cursor.fetchone() == None:
112                             # NOISY-DEBUG: print("DEBUG: Blocking:", blocker, blocked, block_level)
113                             fba.block_instance(blocker, blocked, "unknown", block_level)
114
115                             if block_level == "reject":
116                                 # NOISY-DEBUG: print("DEBUG: Adding to blockdict:", blocked)
117                                 blockdict.append(
118                                     {
119                                         "blocked": blocked,
120                                         "reason": None
121                                     })
122                         else:
123                             # NOISY-DEBUG: print("DEBUG: Updating last_seen:", blocker, blocked, block_level)
124                             fba.update_last_seen(blocker, blocked, block_level)
125
126             fba.connection.commit()
127
128             # Reasons
129             if "mrf_simple_info" in federation:
130                 # NOISY-DEBUG: print("DEBUG: Found mrf_simple_info:", blocker)
131                 for block_level, info in (
132                     {**federation["mrf_simple_info"],
133                     **(federation["quarantined_instances_info"]
134                     if "quarantined_instances_info" in federation
135                     else {})}
136                 ).items():
137                     # NOISY-DEBUG: print("DEBUG: block_level, info.items():", block_level, len(info.items()))
138                     block_level = fba.tidyup(block_level)
139                     # NOISY-DEBUG: print("DEBUG: BEFORE block_level:", block_level)
140
141                     if block_level == "":
142                         print("WARNING: block_level is now empty!")
143                         continue
144
145                     for blocked, reason in info.items():
146                         # NOISY-DEBUG: print("DEBUG: BEFORE blocked:", blocked)
147                         blocked = fba.tidyup(blocked)
148                         # NOISY-DEBUG: print("DEBUG: AFTER blocked:", blocked)
149
150                         if blocked == "":
151                             print("WARNING: blocked is empty after fba.tidyup():", blocker, block_level)
152                             continue
153                         elif blocked.count("*") > 1:
154                             # same domain guess as above, but for reasons field
155                             fba.cursor.execute(
156                                 "SELECT domain FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", [blocked.replace("*", "_")]
157                             )
158                             searchres = fba.cursor.fetchone()
159
160                             if searchres != None:
161                                 blocked = searchres[0]
162
163                         # NOISY-DEBUG: print("DEBUG: Updating block reason:", blocker, blocked, reason["reason"])
164                         fba.update_block_reason(reason["reason"], blocker, blocked, block_level)
165
166                         for entry in blockdict:
167                             if entry["blocked"] == blocked:
168                                 # NOISY-DEBUG: print("DEBUG: Updating entry reason:", blocked)
169                                 entry["reason"] = reason["reason"]
170
171             fba.connection.commit()
172         except Exception as e:
173             print(f"ERROR: blocker='{blocker}',software='{software}',exception='{str(e)}'")
174     elif software == "mastodon":
175         print("INFO: blocker:", blocker)
176         try:
177             # json endpoint for newer mastodongs
178             try:
179                 json = {
180                     "reject": [],
181                     "media_removal": [],
182                     "followers_only": [],
183                     "report_removal": []
184                 }
185
186                 # handling CSRF, I've saw at least one server requiring it to access the endpoint
187                 # NOISY-DEBUG: print("DEBUG: Fetching meta:", blocker)
188                 meta = bs4.BeautifulSoup(
189                     reqto.get(f"https://{blocker}/about", headers=fba.headers, timeout=(fba.config["connection_timeout"], config["read_timeout"])).text,
190                     "html.parser",
191                 )
192                 try:
193                     csrf = meta.find("meta", attrs={"name": "csrf-token"})["content"]
194                     # NOISY-DEBUG: print("DEBUG: Adding CSRF token:", blocker, csrf)
195                     reqheaders = {**fba.api_headers, **{"X-CSRF-Token": csrf}}
196                 except:
197                     # NOISY-DEBUG: print("DEBUG: No CSRF token found, using normal headers:", blocker)
198                     reqheaders = fba.api_headers
199
200                 # NOISY-DEBUG: print("DEBUG: Quering API domain_blocks:", blocker)
201                 blocks = reqto.get(f"https://{blocker}/api/v1/instance/domain_blocks", headers=reqheaders, timeout=(fba.config["connection_timeout"], config["read_timeout"])).json()
202
203                 # NOISY-DEBUG: print("DEBUG: blocks():", len(blocks))
204                 for block in blocks:
205                     entry = {
206                         'domain': block['domain'],
207                         'hash': block['digest'],
208                         'reason': block['comment']
209                     }
210
211                     # NOISY-DEBUG: print("DEBUG: severity,domain,hash,comment:", block['severity'], block['domain'], block['digest'], block['comment'])
212                     if block['severity'] == 'suspend':
213                         json['reject'].append(entry)
214                     elif block['severity'] == 'silence':
215                         json['followers_only'].append(entry)
216                     elif block['severity'] == 'reject_media':
217                         json['media_removal'].append(entry)
218                     elif block['severity'] == 'reject_reports':
219                         json['report_removal'].append(entry)
220                     else:
221                         print("WARNING: Unknown severity:", block['severity'], block['domain'])
222             except:
223                 # NOISY-DEBUG: print("DEBUG: Failed, Trying mastodon-specific fetches:", blocker)
224                 json = fba.get_mastodon_blocks(blocker)
225
226             # NOISY-DEBUG: print("DEBUG: json.items():", blocker, len(json.items()))
227             for block_level, blocks in json.items():
228                 # NOISY-DEBUG: print("DEBUG: blocker,block_level,blocks():", blocker, block_level, len(blocks))
229                 block_level = fba.tidyup(block_level)
230                 # NOISY-DEBUG: print("DEBUG: AFTER-block_level:", block_level)
231                 if block_level == "":
232                     print("WARNING: block_level is empty, blocker:", blocker)
233                     continue
234
235                 for instance in blocks:
236                     blocked, blocked_hash, reason = instance.values()
237                     # NOISY-DEBUG: print("DEBUG: blocked,hash,reason:", blocked, blocked_hash, reason)
238                     blocked = fba.tidyup(blocked)
239                     # NOISY-DEBUG: print("DEBUG: AFTER-blocked:", blocked)
240
241                     if blocked == "":
242                         print("WARNING: blocked is empty:", blocker)
243                         continue
244                     elif blocked.count("*") < 1:
245                         # No obsfucation for this instance
246                         fba.cursor.execute(
247                             "SELECT hash FROM instances WHERE domain = ? LIMIT 1", [blocked]
248                         )
249
250                         if fba.cursor.fetchone() == None:
251                             # NOISY-DEBUG: print("DEBUG: Hash wasn't found, adding:", blocked, blocker)
252                             fba.add_instance(blocked, blocker, argv[0])
253                     else:
254                         # Doing the hash search for instance names as well to tidy up DB
255                         fba.cursor.execute(
256                             "SELECT domain FROM instances WHERE hash = ? LIMIT 1", [blocked_hash]
257                         )
258                         searchres = fba.cursor.fetchone()
259
260                         if searchres != None:
261                             # NOISY-DEBUG: print("DEBUG: Updating domain: ", searchres[0])
262                             blocked = searchres[0]
263
264                     fba.cursor.execute(
265                         "SELECT * FROM blocks WHERE blocker = ? AND blocked = ? AND block_level = ? LIMIT 1",
266                         (
267                             blocker,
268                             blocked if blocked.count("*") <= 1 else blocked_hash,
269                             block_level
270                         ),
271                     )
272
273                     if fba.cursor.fetchone() == None:
274                         fba.block_instance(blocker, blocked if blocked.count("*") <= 1 else blocked_hash, reason, block_level)
275
276                         if block_level == "reject":
277                             blockdict.append(
278                                 {
279                                     "blocked": blocked,
280                                     "reason": reason
281                                 })
282                     else:
283                         fba.update_last_seen(blocker, blocked if blocked.count("*") <= 1 else blocked_hash, block_level)
284
285                     if reason != "":
286                         # NOISY-DEBUG: print("DEBUG: Updating block reason:", blocker, blocked, reason)
287                         fba.update_block_reason(reason, blocker, blocked if blocked.count("*") <= 1 else blocked_hash, block_level)
288
289             fba.connection.commit()
290         except Exception as e:
291             print(f"ERROR: blocker='{blocker}',software='{software}',exception='{str(e)}'")
292     elif software == "friendica" or software == "misskey" or software == "bookwyrm" or software == "takahe":
293         print("INFO: blocker:", blocker)
294         try:
295             if software == "friendica":
296                 json = fba.get_friendica_blocks(blocker)
297             elif software == "misskey":
298                 json = fba.get_misskey_blocks(blocker)
299             elif software == "bookwyrm":
300                 print("WARNING: bookwyrm is not fully supported for fetching blacklist!", blocker)
301                 #json = fba.get_bookwyrm_blocks(blocker)
302             elif software == "takahe":
303                 print("WARNING: takahe is not fully supported for fetching blacklist!", blocker)
304                 #json = fba.get_takahe_blocks(blocker)
305
306             for block_level, blocks in json.items():
307                 # NOISY-DEBUG: print("DEBUG: blocker,block_level,blocks():", blocker, block_level, len(blocks))
308                 block_level = fba.tidyup(block_level)
309                 # NOISY-DEBUG: print("DEBUG: AFTER-block_level:", block_level)
310                 if block_level == "":
311                     print("WARNING: block_level is empty, blocker:", blocker)
312                     continue
313
314                 for instance in blocks:
315                     blocked, reason = instance.values()
316                     # NOISY-DEBUG: print("DEBUG: BEFORE blocked:", blocked)
317                     blocked = fba.tidyup(blocked)
318                     # NOISY-DEBUG: print("DEBUG: AFTER blocked:", blocked)
319
320                     if blocked == "":
321                         print("WARNING: blocked is empty:", blocker)
322                         continue
323                     if blocked.count("*") > 0:
324                         # Some friendica servers also obscure domains without hash
325                         fba.cursor.execute(
326                             "SELECT domain FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", [blocked.replace("*", "_")]
327                         )
328                         searchres = fba.cursor.fetchone()
329                         if searchres != None:
330                             blocked = searchres[0]
331
332                     if blocked.count("?") > 0:
333                         # Some obscure them with question marks, not sure if that's dependent on version or not
334                         fba.cursor.execute(
335                             "SELECT domain FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", [blocked.replace("?", "_")]
336                         )
337                         searchres = fba.cursor.fetchone()
338                         if searchres != None:
339                             blocked = searchres[0]
340
341                     # NOISY-DEBUG: print("DEBUG: AFTER-blocked:", blocked)
342                     if not fba.is_instance_registered(blocked):
343                         # NOISY-DEBUG: print("DEBUG: Hash wasn't found, adding:", blocked, blocker)
344                         fba.add_instance(blocked, blocker)
345
346                     fba.cursor.execute(
347                         "SELECT * FROM blocks WHERE blocker = ? AND blocked = ?",
348                         (blocker, blocked),
349                     )
350
351                     if fba.cursor.fetchone() == None:
352                         fba.block_instance(blocker, blocked, reason, block_level)
353
354                         if block_level == "reject":
355                             blockdict.append(
356                                 {
357                                     "blocked": blocked,
358                                     "reason": reason
359                                 })
360                     else:
361                         fba.update_last_seen(blocker, blocked, block_level)
362
363                     if reason != '':
364                         # NOISY-DEBUG: print("DEBUG: Updating block reason:", blocker, blocked, reason)
365                         fba.update_block_reason(reason, blocker, blocked, block_level)
366
367             fba.connection.commit()
368         except Exception as e:
369             print(f"ERROR: blocker='{blocker}',software='{software}',exception='{str(e)}'")
370     elif software == "gotosocial":
371         print("INFO: blocker:", blocker)
372         try:
373             # Blocks
374             federation = reqto.get(f"https://{blocker}{get_peers_url}?filter=suspended", headers=fba.api_headers, timeout=(fba.config["connection_timeout"], config["read_timeout"])).json()
375
376             if (federation == None):
377                 print("WARNING: No valid response:", blocker);
378             elif "error" in federation:
379                 print("WARNING: API returned error:", federation["error"])
380             else:
381                 # NOISY-DEBUG: print("DEBUG: Checking fenderation():", len(federation))
382                 for peer in federation:
383                     blocked = peer["domain"].lower()
384                     # NOISY-DEBUG: print("DEBUG: BEFORE blocked:", blocked)
385                     blocked = fba.tidyup(blocked)
386                     # NOISY-DEBUG: print("DEBUG: AFTER blocked:", blocked)
387
388                     if blocked == "":
389                         print("WARNING: blocked is empty:", blocker)
390                         continue
391                     elif blocked.count("*") > 0:
392                         # GTS does not have hashes for obscured domains, so we have to guess it
393                         fba.cursor.execute(
394                             "SELECT domain FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", [blocked.replace("*", "_")]
395                         )
396                         searchres = fba.cursor.fetchone()
397
398                         if searchres != None:
399                             blocked = searchres[0]
400
401                     if not fba.is_instance_registered(blocked):
402                         # NOISY-DEBUG: print("DEBUG: Domain wasn't found, adding:", blocked, blocker)
403                         fba.add_instance(blocked, blocker)
404
405                     fba.cursor.execute(
406                         "SELECT * FROM blocks WHERE blocker = ? AND blocked = ? AND block_level = ? LIMIT 1",
407                         (
408                             blocker,
409                             blocked,
410                             "reject"
411                         ),
412                     )
413
414                     if fba.cursor.fetchone() == None:
415                         # NOISY-DEBUG: print(f"DEBUG: blocker='{blocker}' is blocking '{blocked}' for unknown reason at this point")
416                         fba.block_instance(blocker, blocked, "unknown", "reject")
417
418                         blockdict.append(
419                             {
420                                 "blocked": blocked,
421                                 "reason": None
422                             })
423                     else:
424                         fba.update_last_seen(blocker, blocked, "reject")
425
426                     if "public_comment" in peer:
427                         # NOISY-DEBUG: print("DEBUG: Updating block reason:", blocker, blocked, peer["public_comment"])
428                         fba.update_block_reason(peer["public_comment"], blocker, blocked, "reject")
429
430                         for entry in blockdict:
431                             if entry["blocked"] == blocked:
432                                 # NOISY-DEBUG: print(f"DEBUG: Setting block reason for blocked='{blocked}':'{peer['public_comment']}'")
433                                 entry["reason"] = peer["public_comment"]
434
435                 fba.connection.commit()
436         except Exception as e:
437             print(f"ERROR: blocker='{blocker}',software='{software}',exception='{str(e)}'")
438     else:
439         print("WARNING: Unknown software:", blocker, software)
440
441     if fba.config["bot_enabled"] and len(blockdict) > 0:
442         send_bot_post(blocker, blockdict)
443
444     blockdict = []
445
446 fba.connection.close()