]> git.mxchange.org Git - fba.git/blob - fetch_blocks.py
Continued:
[fba.git] / fetch_blocks.py
1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
3 #
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
8 #
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 # GNU Affero General Public License for more details.
13 #
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program.  If not, see <https://www.gnu.org/licenses/>.
16
17 import reqto
18 import time
19 import bs4
20 import itertools
21 import re
22 import fba
23
24 fba.cursor.execute(
25     "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'gotosocial', 'bookwyrm', 'takahe') AND (last_blocked IS NULL OR last_blocked < ?) ORDER BY rowid DESC", [time.time() - fba.config["recheck_block"]]
26 )
27
28 rows = fba.cursor.fetchall()
29 print(f"INFO: Checking {len(rows)} entries ...")
30 for blocker, software, origin, nodeinfo_url in rows:
31     # NOISY-DEBUG: print("DEBUG: BEFORE blocker,software,origin,nodeinfo_url:", blocker, software, origin, nodeinfo_url)
32     blockdict = []
33     blocker = fba.tidyup(blocker)
34     # NOISY-DEBUG: print("DEBUG: AFTER blocker,software:", blocker, software)
35
36     if blocker == "":
37         print("WARNING: blocker is now empty!")
38         continue
39     elif fba.is_blacklisted(blocker):
40         print(f"WARNING: blocker='{blocker}' is blacklisted now!")
41         continue
42
43     # NOISY-DEBUG: print(f"DEBUG: blocker='{blocker}'")
44     fba.update_last_blocked(blocker)
45
46     if software == "pleroma":
47         print("INFO: blocker:", blocker)
48         try:
49             # Blocks
50             json = fba.fetch_nodeinfo(blocker, nodeinfo_url)
51             if json is None:
52                 print("WARNING: Could not fetch nodeinfo from blocker:", blocker)
53                 continue
54
55             print("DEBUG: Updating nodeinfo:", blocker)
56             fba.update_last_nodeinfo(blocker)
57
58             federation = json["metadata"]["federation"]
59
60             if "enabled" in federation:
61                 # NOISY-DEBUG: print("DEBUG: Instance has no block list to analyze:", blocker)
62                 continue
63
64             if "mrf_simple" in federation:
65                 for block_level, blocks in (
66                     {**federation["mrf_simple"],
67                     **{"quarantined_instances": federation["quarantined_instances"]}}
68                 ).items():
69                     # NOISY-DEBUG: print("DEBUG: block_level, blocks():", block_level, len(blocks))
70                     block_level = fba.tidyup(block_level)
71                     # NOISY-DEBUG: print("DEBUG: BEFORE block_level:", block_level)
72
73                     if block_level == "":
74                         print("WARNING: block_level is now empty!")
75                         continue
76
77                     for blocked in blocks:
78                         # NOISY-DEBUG: print("DEBUG: BEFORE blocked:", blocked)
79                         blocked = fba.tidyup(blocked)
80                         # NOISY-DEBUG: print("DEBUG: AFTER blocked:", blocked)
81
82                         if blocked == "":
83                             print("WARNING: blocked is empty after fba.tidyup():", blocker, block_level)
84                             continue
85
86                         if blocked.count("*") > 1:
87                             # -ACK!-oma also started obscuring domains without hash
88                             fba.cursor.execute(
89                                 "SELECT domain, nodeinfo_url FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", [blocked.replace("*", "_")]
90                             )
91                             searchres = fba.cursor.fetchone()
92                             # NOISY-DEBUG: print("DEBUG: searchres[]:", type(searchres))
93                             if searchres != None:
94                                 blocked = searchres[0]
95                                 nodeinfo_url = searchres[1]
96                                 # NOISY-DEBUG: print("DEBUG: Looked up domain:", blocked)
97
98                         # NOISY-DEBUG: print("DEBUG: Looking up instance by domain:", blocked)
99                         if not fba.is_instance_registered(blocked):
100                             # NOISY-DEBUG: print("DEBUG: Domain wasn't found, adding:", blocked, blocker)
101                             fba.add_instance(blocked, blocker, origin, nodeinfo_url)
102
103                         fba.cursor.execute(
104                             "SELECT * FROM blocks WHERE blocker = ? AND blocked = ? AND block_level = ? LIMIT 1",
105                             (
106                                blocker,
107                                blocked,
108                                block_level
109                            ),
110                         )
111
112                         if fba.cursor.fetchone() == None:
113                             # NOISY-DEBUG: print("DEBUG: Blocking:", blocker, blocked, block_level)
114                             fba.block_instance(blocker, blocked, "unknown", block_level)
115
116                             if block_level == "reject":
117                                 # NOISY-DEBUG: print("DEBUG: Adding to blockdict:", blocked)
118                                 blockdict.append(
119                                     {
120                                         "blocked": blocked,
121                                         "reason": None
122                                     })
123                         else:
124                             # NOISY-DEBUG: print("DEBUG: Updating last_seen:", blocker, blocked, block_level)
125                             fba.update_last_seen(blocker, blocked, block_level)
126
127             fba.connection.commit()
128
129             # Reasons
130             if "mrf_simple_info" in federation:
131                 # NOISY-DEBUG: print("DEBUG: Found mrf_simple_info:", blocker)
132                 for block_level, info in (
133                     {**federation["mrf_simple_info"],
134                     **(federation["quarantined_instances_info"]
135                     if "quarantined_instances_info" in federation
136                     else {})}
137                 ).items():
138                     # NOISY-DEBUG: print("DEBUG: block_level, info.items():", block_level, len(info.items()))
139                     block_level = fba.tidyup(block_level)
140                     # NOISY-DEBUG: print("DEBUG: BEFORE block_level:", block_level)
141
142                     if block_level == "":
143                         print("WARNING: block_level is now empty!")
144                         continue
145
146                     for blocked, reason in info.items():
147                         # NOISY-DEBUG: print("DEBUG: BEFORE blocked:", blocked)
148                         blocked = fba.tidyup(blocked)
149                         # NOISY-DEBUG: print("DEBUG: AFTER blocked:", blocked)
150
151                         if blocked == "":
152                             print("WARNING: blocked is empty after fba.tidyup():", blocker, block_level)
153                             continue
154                         elif blocked.count("*") > 1:
155                             # same domain guess as above, but for reasons field
156                             fba.cursor.execute(
157                                 "SELECT domain FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", [blocked.replace("*", "_")]
158                             )
159                             searchres = fba.cursor.fetchone()
160
161                             if searchres != None:
162                                 blocked = searchres[0]
163
164                         # NOISY-DEBUG: print("DEBUG: Updating block reason:", blocker, blocked, reason["reason"])
165                         fba.update_block_reason(reason["reason"], blocker, blocked, block_level)
166
167                         for entry in blockdict:
168                             if entry["blocked"] == blocked:
169                                 # NOISY-DEBUG: print("DEBUG: Updating entry reason:", blocked)
170                                 entry["reason"] = reason["reason"]
171
172             fba.connection.commit()
173         except Exception as e:
174             print(f"ERROR: blocker='{blocker}',software='{software}',exception='{str(e)}'")
175     elif software == "mastodon":
176         print("INFO: blocker:", blocker)
177         try:
178             # json endpoint for newer mastodongs
179             try:
180                 json = {
181                     "reject": [],
182                     "media_removal": [],
183                     "followers_only": [],
184                     "report_removal": []
185                 }
186
187                 # handling CSRF, I've saw at least one server requiring it to access the endpoint
188                 # NOISY-DEBUG: print("DEBUG: Fetching meta:", blocker)
189                 meta = bs4.BeautifulSoup(
190                     reqto.get(f"https://{blocker}/about", headers=fba.headers, timeout=(fba.config["connection_timeout"], config["read_timeout"])).text,
191                     "html.parser",
192                 )
193                 try:
194                     csrf = meta.find("meta", attrs={"name": "csrf-token"})["content"]
195                     # NOISY-DEBUG: print("DEBUG: Adding CSRF token:", blocker, csrf)
196                     reqheaders = {**fba.api_headers, **{"X-CSRF-Token": csrf}}
197                 except:
198                     # NOISY-DEBUG: print("DEBUG: No CSRF token found, using normal headers:", blocker)
199                     reqheaders = fba.api_headers
200
201                 # NOISY-DEBUG: print("DEBUG: Quering API domain_blocks:", blocker)
202                 blocks = reqto.get(f"https://{blocker}/api/v1/instance/domain_blocks", headers=reqheaders, timeout=(fba.config["connection_timeout"], config["read_timeout"])).json()
203
204                 # NOISY-DEBUG: print("DEBUG: blocks():", len(blocks))
205                 for block in blocks:
206                     entry = {
207                         'domain': block['domain'],
208                         'hash': block['digest'],
209                         'reason': block['comment']
210                     }
211
212                     # NOISY-DEBUG: print("DEBUG: severity,domain,hash,comment:", block['severity'], block['domain'], block['digest'], block['comment'])
213                     if block['severity'] == 'suspend':
214                         json['reject'].append(entry)
215                     elif block['severity'] == 'silence':
216                         json['followers_only'].append(entry)
217                     elif block['severity'] == 'reject_media':
218                         json['media_removal'].append(entry)
219                     elif block['severity'] == 'reject_reports':
220                         json['report_removal'].append(entry)
221                     else:
222                         print("WARNING: Unknown severity:", block['severity'], block['domain'])
223             except:
224                 # NOISY-DEBUG: print("DEBUG: Failed, Trying mastodon-specific fetches:", blocker)
225                 json = fba.get_mastodon_blocks(blocker)
226
227             # NOISY-DEBUG: print("DEBUG: json.items():", blocker, len(json.items()))
228             for block_level, blocks in json.items():
229                 # NOISY-DEBUG: print("DEBUG: blocker,block_level,blocks():", blocker, block_level, len(blocks))
230                 block_level = fba.tidyup(block_level)
231                 # NOISY-DEBUG: print("DEBUG: AFTER-block_level:", block_level)
232                 if block_level == "":
233                     print("WARNING: block_level is empty, blocker:", blocker)
234                     continue
235
236                 for instance in blocks:
237                     blocked, blocked_hash, reason = instance.values()
238                     # NOISY-DEBUG: print("DEBUG: blocked,hash,reason:", blocked, blocked_hash, reason)
239                     blocked = fba.tidyup(blocked)
240                     # NOISY-DEBUG: print("DEBUG: AFTER-blocked:", blocked)
241
242                     if blocked == "":
243                         print("WARNING: blocked is empty:", blocker)
244                         continue
245                     elif blocked.count("*") < 1:
246                         # No obsfucation for this instance
247                         fba.cursor.execute(
248                             "SELECT hash FROM instances WHERE domain = ? LIMIT 1", [blocked]
249                         )
250
251                         if fba.cursor.fetchone() == None:
252                             # NOISY-DEBUG: print("DEBUG: Hash wasn't found, adding:", blocked, blocker)
253                             fba.add_instance(blocked, blocker, origin)
254                     else:
255                         # Doing the hash search for instance names as well to tidy up DB
256                         fba.cursor.execute(
257                             "SELECT domain FROM instances WHERE hash = ? LIMIT 1", [blocked_hash]
258                         )
259                         searchres = fba.cursor.fetchone()
260
261                         if searchres != None:
262                             # NOISY-DEBUG: print("DEBUG: Updating domain: ", searchres[0])
263                             blocked = searchres[0]
264
265                     fba.cursor.execute(
266                         "SELECT * FROM blocks WHERE blocker = ? AND blocked = ? AND block_level = ? LIMIT 1",
267                         (
268                             blocker,
269                             blocked if blocked.count("*") <= 1 else blocked_hash,
270                             block_level
271                         ),
272                     )
273
274                     if fba.cursor.fetchone() == None:
275                         fba.block_instance(blocker, blocked if blocked.count("*") <= 1 else blocked_hash, reason, block_level)
276
277                         if block_level == "reject":
278                             blockdict.append(
279                                 {
280                                     "blocked": blocked,
281                                     "reason": reason
282                                 })
283                     else:
284                         fba.update_last_seen(blocker, blocked if blocked.count("*") <= 1 else blocked_hash, block_level)
285
286                     if reason != "":
287                         # NOISY-DEBUG: print("DEBUG: Updating block reason:", blocker, blocked, reason)
288                         fba.update_block_reason(reason, blocker, blocked if blocked.count("*") <= 1 else blocked_hash, block_level)
289
290             fba.connection.commit()
291         except Exception as e:
292             print(f"ERROR: blocker='{blocker}',software='{software}',exception='{str(e)}'")
293     elif software == "friendica" or software == "misskey" or software == "bookwyrm" or software == "takahe":
294         print("INFO: blocker:", blocker)
295         try:
296             if software == "friendica":
297                 json = fba.get_friendica_blocks(blocker)
298             elif software == "misskey":
299                 json = fba.get_misskey_blocks(blocker)
300             elif software == "bookwyrm":
301                 print("WARNING: bookwyrm is not fully supported for fetching blacklist!", blocker)
302                 #json = fba.get_bookwyrm_blocks(blocker)
303             elif software == "takahe":
304                 print("WARNING: takahe is not fully supported for fetching blacklist!", blocker)
305                 #json = fba.get_takahe_blocks(blocker)
306
307             for block_level, blocks in json.items():
308                 # NOISY-DEBUG: print("DEBUG: blocker,block_level,blocks():", blocker, block_level, len(blocks))
309                 block_level = fba.tidyup(block_level)
310                 # NOISY-DEBUG: print("DEBUG: AFTER-block_level:", block_level)
311                 if block_level == "":
312                     print("WARNING: block_level is empty, blocker:", blocker)
313                     continue
314
315                 for instance in blocks:
316                     blocked, reason = instance.values()
317                     # NOISY-DEBUG: print("DEBUG: BEFORE blocked:", blocked)
318                     blocked = fba.tidyup(blocked)
319                     # NOISY-DEBUG: print("DEBUG: AFTER blocked:", blocked)
320
321                     if blocked == "":
322                         print("WARNING: blocked is empty:", blocker)
323                         continue
324                     if blocked.count("*") > 0:
325                         # Some friendica servers also obscure domains without hash
326                         fba.cursor.execute(
327                             "SELECT domain FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", [blocked.replace("*", "_")]
328                         )
329                         searchres = fba.cursor.fetchone()
330                         if searchres != None:
331                             blocked = searchres[0]
332
333                     if blocked.count("?") > 0:
334                         # Some obscure them with question marks, not sure if that's dependent on version or not
335                         fba.cursor.execute(
336                             "SELECT domain, origin, nodeinfo_url FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", [blocked.replace("?", "_")]
337                         )
338                         searchres = fba.cursor.fetchone()
339                         if searchres != None:
340                             blocked = searchres[0]
341                             origin = searchres[1]
342                             nodeinfo_url = searchres[2]
343
344                     # NOISY-DEBUG: print("DEBUG: AFTER-blocked:", blocked)
345                     if not fba.is_instance_registered(blocked):
346                         # NOISY-DEBUG: print("DEBUG: Hash wasn't found, adding:", blocked, blocker)
347                         fba.add_instance(blocked, blocker, origin, nodeinfo_url)
348
349                     fba.cursor.execute(
350                         "SELECT * FROM blocks WHERE blocker = ? AND blocked = ?",
351                         (blocker, blocked),
352                     )
353
354                     if fba.cursor.fetchone() == None:
355                         fba.block_instance(blocker, blocked, reason, block_level)
356
357                         if block_level == "reject":
358                             blockdict.append(
359                                 {
360                                     "blocked": blocked,
361                                     "reason": reason
362                                 })
363                     else:
364                         fba.update_last_seen(blocker, blocked, block_level)
365
366                     if reason != '':
367                         # NOISY-DEBUG: print("DEBUG: Updating block reason:", blocker, blocked, reason)
368                         fba.update_block_reason(reason, blocker, blocked, block_level)
369
370             fba.connection.commit()
371         except Exception as e:
372             print(f"ERROR: blocker='{blocker}',software='{software}',exception='{str(e)}'")
373     elif software == "gotosocial":
374         print("INFO: blocker:", blocker)
375         try:
376             # Blocks
377             federation = reqto.get(f"https://{blocker}{get_peers_url}?filter=suspended", headers=fba.api_headers, timeout=(fba.config["connection_timeout"], config["read_timeout"])).json()
378
379             if (federation == None):
380                 print("WARNING: No valid response:", blocker);
381             elif "error" in federation:
382                 print("WARNING: API returned error:", federation["error"])
383             else:
384                 # NOISY-DEBUG: print("DEBUG: Checking fenderation():", len(federation))
385                 for peer in federation:
386                     blocked = peer["domain"].lower()
387                     # NOISY-DEBUG: print("DEBUG: BEFORE blocked:", blocked)
388                     blocked = fba.tidyup(blocked)
389                     # NOISY-DEBUG: print("DEBUG: AFTER blocked:", blocked)
390
391                     if blocked == "":
392                         print("WARNING: blocked is empty:", blocker)
393                         continue
394                     elif blocked.count("*") > 0:
395                         # GTS does not have hashes for obscured domains, so we have to guess it
396                         fba.cursor.execute(
397                             "SELECT domain, origin, nodeinfo_url FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", [blocked.replace("*", "_")]
398                         )
399                         searchres = fba.cursor.fetchone()
400
401                         if searchres != None:
402                             blocked = searchres[0]
403                             origin = searchres[1]
404                             nodeinfo_url = searchres[2]
405
406                     if not fba.is_instance_registered(blocked):
407                         # NOISY-DEBUG: print("DEBUG: Domain wasn't found, adding:", blocked, blocker)
408                         fba.add_instance(blocked, blocker, origin, nodeinfo_url)
409
410                     fba.cursor.execute(
411                         "SELECT * FROM blocks WHERE blocker = ? AND blocked = ? AND block_level = ? LIMIT 1",
412                         (
413                             blocker,
414                             blocked,
415                             "reject"
416                         ),
417                     )
418
419                     if fba.cursor.fetchone() == None:
420                         # NOISY-DEBUG: print(f"DEBUG: blocker='{blocker}' is blocking '{blocked}' for unknown reason at this point")
421                         fba.block_instance(blocker, blocked, "unknown", "reject")
422
423                         blockdict.append(
424                             {
425                                 "blocked": blocked,
426                                 "reason": None
427                             })
428                     else:
429                         fba.update_last_seen(blocker, blocked, "reject")
430
431                     if "public_comment" in peer:
432                         # NOISY-DEBUG: print("DEBUG: Updating block reason:", blocker, blocked, peer["public_comment"])
433                         fba.update_block_reason(peer["public_comment"], blocker, blocked, "reject")
434
435                         for entry in blockdict:
436                             if entry["blocked"] == blocked:
437                                 # NOISY-DEBUG: print(f"DEBUG: Setting block reason for blocked='{blocked}':'{peer['public_comment']}'")
438                                 entry["reason"] = peer["public_comment"]
439
440                 fba.connection.commit()
441         except Exception as e:
442             print(f"ERROR: blocker='{blocker}',software='{software}',exception='{str(e)}'")
443     else:
444         print("WARNING: Unknown software:", blocker, software)
445
446     if fba.config["bot_enabled"] and len(blockdict) > 0:
447         send_bot_post(blocker, blockdict)
448
449     blockdict = []
450
451 fba.connection.close()