]> git.mxchange.org Git - fba.git/blob - fba/commands.py
84ae5eddd4a9ee0dfc98e719d9cdc152eadc764c
[fba.git] / fba / commands.py
1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
3 #
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
8 #
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 # GNU Affero General Public License for more details.
13 #
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program.  If not, see <https://www.gnu.org/licenses/>.
16
17 import argparse
18 import atoma
19 import bs4
20 import inspect
21 import itertools
22 import json
23 import re
24 import reqto
25 import sys
26 import time
27 import validators
28
29 from fba import blacklist
30 from fba import blocks
31 from fba import boot
32 from fba import config
33 from fba import fba
34 from fba.network import *
35
36 def check_instance(args: argparse.Namespace) -> int:
37     # DEBUG: print(f"DEBUG: args.domain='{args.domain}' - CALLED!")
38     status = 0
39     if not validators.domain(args.domain):
40         print(f"WARNING: args.domain='{args.domain}' is not valid")
41         status = 100
42     elif blacklist.is_blacklisted(args.domain):
43         print(f"WARNING: args.domain='{args.domain}' is blacklisted")
44         status = 101
45     elif fba.is_instance_registered(args.domain):
46         print(f"WARNING: args.domain='{args.domain}' is already registered")
47         status = 102
48     else:
49         print(f"INFO: args.domain='{args.domain}' is not known")
50
51     # DEBUG: print(f"DEBUG: status={status} - EXIT!")
52     return status
53
54 def fetch_bkali(args: argparse.Namespace):
55     # DEBUG: print(f"DEBUG: args[]={type(args)} - CALLED!")
56     domains = list()
57     try:
58         fetched = fba.post_json_api("gql.api.bka.li", "/v1/graphql", json.dumps({
59             "query": "query domainlist {nodeinfo(order_by: {domain: asc}) {domain}}"
60         }))
61
62         # DEBUG: print(f"DEBUG: fetched({len(fetched)})[]='{type(fetched)}'")
63         if len(fetched) == 0:
64             raise Exception("WARNING: Returned no records")
65         elif not "data" in fetched:
66             raise Exception(f"WARNING: fetched()={len(fetched)} does not contain key 'data'")
67         elif not "nodeinfo" in fetched["data"]:
68             raise Exception(f"WARNING: fetched()={len(fetched['data'])} does not contain key 'nodeinfo'")
69
70         for entry in fetched["data"]["nodeinfo"]:
71             # DEBUG: print(f"DEBUG: entry['{type(entry)}']='{entry}'")
72             if not "domain" in entry:
73                 print(f"WARNING: entry does not contain 'domain' - SKIPPED!")
74                 continue
75             elif not validators.domain(entry["domain"]):
76                 print(f"WARNING: domain='{entry['domain']}' is not a valid domain - SKIPPED!")
77                 continue
78             elif blacklist.is_blacklisted(entry["domain"]):
79                 # DEBUG: print(f"DEBUG: domain='{entry['domain']}' is blacklisted - SKIPPED!")
80                 continue
81             elif fba.is_instance_registered(entry["domain"]):
82                 # DEBUG: print(f"DEBUG: domain='{entry['domain']}' is already registered - SKIPPED!")
83                 continue
84
85             # DEBUG: print(f"DEBUG: Adding domain='{entry['domain']}' ...")
86             domains.append(entry["domain"])
87
88     except BaseException as e:
89         print(f"ERROR: Cannot fetch graphql,exception[{type(e)}]:'{str(e)}'")
90         sys.exit(255)
91
92     # DEBUG: print(f"DEBUG: domains()={len(domains)}")
93     if len(domains) > 0:
94         boot.acquire_lock()
95
96         print(f"INFO: Adding {len(domains)} new instances ...")
97         for domain in domains:
98             print(f"INFO: Fetching instances from domain='{domain}' ...")
99             fba.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
100
101     # DEBUG: print("DEBUG: EXIT!")
102
103 def fetch_blocks(args: argparse.Namespace):
104     # DEBUG: print(f"DEBUG: args[]={type(args)} - CALLED!")
105     if args.domain != None and args.domain != "":
106         if not validators.domain(args.domain):
107             print(f"WARNING: domain='{args.domain}' is not valid.")
108             return
109         elif blacklist.is_blacklisted(args.domain):
110             print(f"WARNING: domain='{args.domain}' is blacklisted, won't check it!")
111             return
112         elif not fba.is_instance_registered(args.domain):
113             print(f"WARNING: domain='{args.domain}' is not registered, please run ./fba.py fetch_instances {args.domain} first.")
114             return
115
116     boot.acquire_lock()
117
118     if args.domain != None and args.domain != "":
119         fba.cursor.execute(
120             "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'gotosocial', 'bookwyrm', 'takahe') AND domain = ?", [args.domain]
121         )
122     else:
123         fba.cursor.execute(
124             "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'gotosocial', 'bookwyrm', 'takahe') AND (last_blocked IS NULL OR last_blocked < ?) ORDER BY rowid DESC", [time.time() - config.get("recheck_block")]
125         )
126
127     rows = fba.cursor.fetchall()
128     print(f"INFO: Checking {len(rows)} entries ...")
129     for blocker, software, origin, nodeinfo_url in rows:
130         # DEBUG: print("DEBUG: BEFORE blocker,software,origin,nodeinfo_url:", blocker, software, origin, nodeinfo_url)
131         blockdict = list()
132         blocker = fba.tidyup_domain(blocker)
133         # DEBUG: print("DEBUG: AFTER blocker,software:", blocker, software)
134
135         if blocker == "":
136             print("WARNING: blocker is now empty!")
137             continue
138         elif blacklist.is_blacklisted(blocker):
139             print(f"WARNING: blocker='{blocker}' is blacklisted now!")
140             continue
141
142         # DEBUG: print(f"DEBUG: blocker='{blocker}'")
143         instances.update_last_blocked(blocker)
144
145         if software == "pleroma":
146             print(f"INFO: blocker='{blocker}',software='{software}'")
147             pleroma.fetch_blocks(blocker, software, origin, nodeinfo_url)
148         elif software == "mastodon":
149             print(f"INFO: blocker='{blocker}',software='{software}'")
150             mastodon.fetch_blocks(blocker, software, origin, nodeinfo_url)
151         elif software == "friendica" or software == "misskey":
152             print(f"INFO: blocker='{blocker}',software='{software}'")
153             try:
154                 if software == "friendica":
155                     json = fba.fetch_friendica_blocks(blocker)
156                 elif software == "misskey":
157                     json = fba.fetch_misskey_blocks(blocker)
158
159                 print(f"INFO: Checking {len(json.items())} entries from blocker='{blocker}',software='{software}' ...")
160                 for block_level, blocks in json.items():
161                     # DEBUG: print("DEBUG: blocker,block_level,blocks():", blocker, block_level, len(blocks))
162                     block_level = fba.tidyup_domain(block_level)
163                     # DEBUG: print("DEBUG: AFTER-block_level:", block_level)
164                     if block_level == "":
165                         print("WARNING: block_level is empty, blocker:", blocker)
166                         continue
167
168                     # DEBUG: print(f"DEBUG: Checking {len(blocks)} entries from blocker='{blocker}',software='{software}',block_level='{block_level}' ...")
169                     for block in blocks:
170                         blocked, reason = block.values()
171                         # DEBUG: print("DEBUG: BEFORE blocked:", blocked)
172                         blocked = fba.tidyup_domain(blocked)
173                         # DEBUG: print("DEBUG: AFTER blocked:", blocked)
174
175                         if blocked == "":
176                             print("WARNING: blocked is empty:", blocker)
177                             continue
178                         elif blacklist.is_blacklisted(blocked):
179                             # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - skipping!")
180                             continue
181                         elif blocked.count("*") > 0:
182                             # Some friendica servers also obscure domains without hash
183                             fba.cursor.execute(
184                                 "SELECT domain, origin, nodeinfo_url FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", [blocked.replace("*", "_")]
185                             )
186
187                             searchres = fba.cursor.fetchone()
188
189                             if searchres == None:
190                                 print(f"WARNING: Cannot deobsfucate blocked='{blocked}' - SKIPPED!")
191                                 continue
192
193                             blocked = searchres[0]
194                             origin = searchres[1]
195                             nodeinfo_url = searchres[2]
196                         elif blocked.count("?") > 0:
197                             # Some obscure them with question marks, not sure if that's dependent on version or not
198                             fba.cursor.execute(
199                                 "SELECT domain, origin, nodeinfo_url FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", [blocked.replace("?", "_")]
200                             )
201
202                             searchres = fba.cursor.fetchone()
203
204                             if searchres == None:
205                                 print(f"WARNING: Cannot deobsfucate blocked='{blocked}' - SKIPPED!")
206                                 continue
207
208                             blocked = searchres[0]
209                             origin = searchres[1]
210                             nodeinfo_url = searchres[2]
211                         elif not validators.domain(blocked):
212                             print(f"WARNING: blocked='{blocked}',software='{software}' is not a valid domain name - skipped!")
213                             continue
214
215                         # DEBUG: print("DEBUG: Looking up instance by domain:", blocked)
216                         if not validators.domain(blocked):
217                             print(f"WARNING: blocked='{blocked}',software='{software}' is not a valid domain name - skipped!")
218                             continue
219                         elif not fba.is_instance_registered(blocked):
220                             # DEBUG: print("DEBUG: Hash wasn't found, adding:", blocked, blocker)
221                             fba.add_instance(blocked, blocker, inspect.currentframe().f_code.co_name, nodeinfo_url)
222
223                         if not blocks.is_instance_blocked(blocker, blocked, block_level):
224                             blocks.add_instance(blocker, blocked, reason, block_level)
225
226                             if block_level == "reject":
227                                 blockdict.append({
228                                     "blocked": blocked,
229                                     "reason" : reason
230                                 })
231                         else:
232                             # DEBUG: print(f"DEBUG: Updating block last seen and reason for blocker='{blocker}',blocked='{blocked}' ...")
233                             fba.update_last_seen(blocker, blocked, block_level)
234                             blocks.update_reason(reason, blocker, blocked, block_level)
235
236                 # DEBUG: print("DEBUG: Committing changes ...")
237                 fba.connection.commit()
238             except Exception as e:
239                 print(f"ERROR: blocker='{blocker}',software='{software}',exception[{type(e)}]:'{str(e)}'")
240         elif software == "gotosocial":
241             print(f"INFO: blocker='{blocker}',software='{software}'")
242             try:
243                 # Blocks
244                 federation = fba.get_response(blocker, f"{fba.get_peers_url}?filter=suspended", fba.api_headers, (config.get("connection_timeout"), config.get("read_timeout"))).json()
245
246                 if (federation == None):
247                     print("WARNING: No valid response:", blocker);
248                 elif "error" in federation:
249                     print("WARNING: API returned error:", federation["error"])
250                 else:
251                     print(f"INFO: Checking {len(federation)} entries from blocker='{blocker}',software='{software}' ...")
252                     for peer in federation:
253                         blocked = peer["domain"].lower()
254                         # DEBUG: print("DEBUG: BEFORE blocked:", blocked)
255                         blocked = fba.tidyup_domain(blocked)
256                         # DEBUG: print("DEBUG: AFTER blocked:", blocked)
257
258                         if blocked == "":
259                             print("WARNING: blocked is empty:", blocker)
260                             continue
261                         elif blacklist.is_blacklisted(blocked):
262                             # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - skipping!")
263                             continue
264                         elif blocked.count("*") > 0:
265                             # GTS does not have hashes for obscured domains, so we have to guess it
266                             fba.cursor.execute(
267                                 "SELECT domain, origin, nodeinfo_url FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", [blocked.replace("*", "_")]
268                             )
269                             searchres = fba.cursor.fetchone()
270
271                             if searchres == None:
272                                 print(f"WARNING: Cannot deobsfucate blocked='{blocked}' - SKIPPED!")
273                                 continue
274
275                             blocked = searchres[0]
276                             origin = searchres[1]
277                             nodeinfo_url = searchres[2]
278                         elif not validators.domain(blocked):
279                             print(f"WARNING: blocked='{blocked}',software='{software}' is not a valid domain name - skipped!")
280                             continue
281
282                         # DEBUG: print("DEBUG: Looking up instance by domain:", blocked)
283                         if not validators.domain(blocked):
284                             print(f"WARNING: blocked='{blocked}',software='{software}' is not a valid domain name - skipped!")
285                             continue
286                         elif not fba.is_instance_registered(blocked):
287                             # DEBUG: print(f"DEBUG: Domain blocked='{blocked}' wasn't found, adding ..., blocker='{blocker}',origin='{origin}',nodeinfo_url='{nodeinfo_url}'")
288                             fba.add_instance(blocked, blocker, inspect.currentframe().f_code.co_name, nodeinfo_url)
289
290                         if not blocks.is_instance_blocked(blocker, blocked, "reject"):
291                             # DEBUG: print(f"DEBUG: blocker='{blocker}' is blocking '{blocked}' for unknown reason at this point")
292                             blocks.add_instance(blocker, blocked, "unknown", "reject")
293
294                             blockdict.append({
295                                 "blocked": blocked,
296                                 "reason" : None
297                             })
298                         else:
299                             # DEBUG: print(f"DEBUG: Updating block last seen for blocker='{blocker}',blocked='{blocked}' ...")
300                             fba.update_last_seen(blocker, blocked, "reject")
301
302                         if "public_comment" in peer:
303                             # DEBUG: print("DEBUG: Updating block reason:", blocker, blocked, peer["public_comment"])
304                             blocks.update_reason(peer["public_comment"], blocker, blocked, "reject")
305
306                             for entry in blockdict:
307                                 if entry["blocked"] == blocked:
308                                     # DEBUG: print(f"DEBUG: Setting block reason for blocked='{blocked}':'{peer['public_comment']}'")
309                                     entry["reason"] = peer["public_comment"]
310
311                     # DEBUG: print("DEBUG: Committing changes ...")
312                     fba.connection.commit()
313             except Exception as e:
314                 print(f"ERROR: blocker='{blocker}',software='{software}',exception[{type(e)}]:'{str(e)}'")
315         else:
316             print("WARNING: Unknown software:", blocker, software)
317
318         if config.get("bot_enabled") and len(blockdict) > 0:
319             send_bot_post(blocker, blockdict)
320
321         blockdict = []
322
323     # DEBUG: print("DEBUG: EXIT!")
324
325 def fetch_cs(args: argparse.Namespace):
326     # DEBUG: print(f"DEBUG: args[]={type(args)} - CALLED!")
327     domains = {
328         "silenced": list(),
329         "blocked": list(),
330     }
331
332     try:
333         doc = bs4.BeautifulSoup(
334             fba.get_response("meta.chaos.social", "/federation", fba.headers, (config.get("connection_timeout"), config.get("read_timeout"))).text,
335             "html.parser",
336         )
337         # DEBUG: print(f"DEBUG: doc()={len(doc)}[]={type(doc)}")
338         silenced = doc.find("h2", {"id": "silenced-instances"}).findNext("table")
339
340         # DEBUG: print(f"DEBUG: silenced[]={type(silenced)}")
341         domains["silenced"] = domains["silenced"] + find_domains(silenced)
342         blocked = doc.find("h2", {"id": "blocked-instances"}).findNext("table")
343
344         # DEBUG: print(f"DEBUG: blocked[]={type(blocked)}")
345         domains["blocked"] = domains["blocked"] + find_domains(blocked)
346
347     except BaseException as e:
348         print(f"ERROR: Cannot fetch from meta.chaos.social,exception[{type(e)}]:'{str(e)}'")
349         sys.exit(255)
350
351     # DEBUG: print(f"DEBUG: domains()={len(domains)}")
352     if len(domains) > 0:
353         boot.acquire_lock()
354
355         print(f"INFO: Adding {len(domains)} new instances ...")
356         for block_level in domains:
357             # DEBUG: print(f"DEBUG: block_level='{block_level}'")
358
359             for row in domains[block_level]:
360                 # DEBUG: print(f"DEBUG: row='{row}'")
361                 if not fba.is_instance_registered(row["domain"]):
362                     print(f"INFO: Fetching instances from domain='{row['domain']}' ...")
363                     fba.fetch_instances(row["domain"], None, None, inspect.currentframe().f_code.co_name)
364
365                 if not blocks.is_instance_blocked('chaos.social', row["domain"], block_level):
366                     # DEBUG: print(f"DEBUG: domain='{row['domain']}',block_level='{block_level}' blocked by chaos.social, adding ...")
367                     blocks.add_instance('chaos.social', row["domain"], row["reason"], block_level)
368
369         # DEBUG: print("DEBUG: Committing changes ...")
370         fba.connection.commit()
371
372     # DEBUG: print("DEBUG: EXIT!")
373
374 def fetch_fba_rss(args: argparse.Namespace):
375     # DEBUG: print(f"DEBUG: args[]={type(args)} - CALLED!")
376     domains = list()
377
378     try:
379         print(f"INFO: Fetch FBA-specific RSS args.feed='{args.feed}' ...")
380         response = fba.get_url(args.feed, fba.headers, (config.get("connection_timeout"), config.get("read_timeout")))
381
382         # DEBUG: print(f"DEBUG: response.ok={response.ok},response.status_code='{response.status_code}',response.text()={len(response.text)}")
383         if response.ok and response.status_code < 300 and len(response.text) > 0:
384             # DEBUG: print(f"DEBUG: Parsing RSS feed ...")
385             rss = atoma.parse_rss_bytes(response.content)
386
387             # DEBUG: print(f"DEBUG: rss[]={type(rss)}")
388             for item in rss.items:
389                 # DEBUG: print(f"DEBUG: item={item}")
390                 domain = item.link.split("=")[1]
391
392                 if blacklist.is_blacklisted(domain):
393                     # DEBUG: print(f"DEBUG: domain='{domain}' is blacklisted - SKIPPED!")
394                     continue
395                 elif domain in domains:
396                     # DEBUG: print(f"DEBUG: domain='{domain}' is already added - SKIPPED!")
397                     continue
398                 elif fba.is_instance_registered(domain):
399                     # DEBUG: print(f"DEBUG: domain='{domain}' is already registered - SKIPPED!")
400                     continue
401
402                 # DEBUG: print(f"DEBUG: Adding domain='{domain}'")
403                 domains.append(domain)
404
405     except BaseException as e:
406         print(f"ERROR: Cannot fetch feed='{feed}',exception[{type(e)}]:'{str(e)}'")
407         sys.exit(255)
408
409     # DEBUG: print(f"DEBUG: domains()={len(domains)}")
410     if len(domains) > 0:
411         boot.acquire_lock()
412
413         print(f"INFO: Adding {len(domains)} new instances ...")
414         for domain in domains:
415             print(f"INFO: Fetching instances from domain='{domain}' ...")
416             fba.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
417
418     # DEBUG: print("DEBUG: EXIT!")
419
420 def fetch_fbabot_atom(args: argparse.Namespace):
421     # DEBUG: print(f"DEBUG: args[]={type(args)} - CALLED!")
422     feed = "https://ryona.agency/users/fba/feed.atom"
423
424     domains = list()
425     try:
426         print(f"INFO: Fetching ATOM feed='{feed}' from FBA bot account ...")
427         response = fba.get_url(feed, fba.headers, (config.get("connection_timeout"), config.get("read_timeout")))
428
429         # DEBUG: print(f"DEBUG: response.ok={response.ok},response.status_code='{response.status_code}',response.text()={len(response.text)}")
430         if response.ok and response.status_code < 300 and len(response.text) > 0:
431             # DEBUG: print(f"DEBUG: Parsing ATOM feed ...")
432             atom = atoma.parse_atom_bytes(response.content)
433
434             # DEBUG: print(f"DEBUG: atom[]={type(atom)}")
435             for entry in atom.entries:
436                 # DEBUG: print(f"DEBUG: entry[]={type(entry)}")
437                 doc = bs4.BeautifulSoup(entry.content.value, "html.parser")
438                 # DEBUG: print(f"DEBUG: doc[]={type(doc)}")
439                 for element in doc.findAll("a"):
440                     for href in element["href"].split(","):
441                         # DEBUG: print(f"DEBUG: href[{type(href)}]={href}")
442                         domain = fba.tidyup_domain(href)
443
444                         # DEBUG: print(f"DEBUG: domain='{domain}'")
445                         if blacklist.is_blacklisted(domain):
446                             # DEBUG: print(f"DEBUG: domain='{domain}' is blacklisted - SKIPPED!")
447                             continue
448                         elif domain in domains:
449                             # DEBUG: print(f"DEBUG: domain='{domain}' is already added - SKIPPED!")
450                             continue
451                         elif fba.is_instance_registered(domain):
452                             # DEBUG: print(f"DEBUG: domain='{domain}' is already registered - SKIPPED!")
453                             continue
454
455                         # DEBUG: print(f"DEBUG: Adding domain='{domain}',domains()={len(domains)}")
456                         domains.append(domain)
457
458     except BaseException as e:
459         print(f"ERROR: Cannot fetch feed='{feed}',exception[{type(e)}]:'{str(e)}'")
460         sys.exit(255)
461
462     # DEBUG: print(f"DEBUG: domains({len(domains)})={domains}")
463     if len(domains) > 0:
464         boot.acquire_lock()
465
466         print(f"INFO: Adding {len(domains)} new instances ...")
467         for domain in domains:
468             print(f"INFO: Fetching instances from domain='{domain}' ...")
469             fba.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
470
471     # DEBUG: print("DEBUG: EXIT!")
472
473 def fetch_instances(args: argparse.Namespace):
474     # DEBUG: print(f"DEBUG: args[]={type(args)} - CALLED!")
475     boot.acquire_lock()
476
477     # Initial fetch
478     fba.fetch_instances(args.domain, None, None, inspect.currentframe().f_code.co_name)
479
480     if args.single:
481         # DEBUG: print(f"DEBUG: Not fetching more instances - EXIT!")
482         return
483
484     # Loop through some instances
485     fba.cursor.execute(
486         "SELECT domain, origin, software, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'gotosocial', 'bookwyrm', 'takahe', 'lemmy') AND (last_instance_fetch IS NULL OR last_instance_fetch < ?) ORDER BY rowid DESC", [time.time() - config.get("recheck_instance")]
487     )
488
489     rows = fba.cursor.fetchall()
490     print(f"INFO: Checking {len(rows)} entries ...")
491     for row in rows:
492         # DEBUG: print("DEBUG: domain:", row[0])
493         if blacklist.is_blacklisted(row[0]):
494             print("WARNING: domain is blacklisted:", row[0])
495             continue
496
497         print(f"INFO: Fetching instances for instance '{row[0]}' ('{row[2]}') of origin='{row[1]}',nodeinfo_url='{row[3]}'")
498         fba.fetch_instances(row[0], row[1], row[2], inspect.currentframe().f_code.co_name, row[3])
499
500     # DEBUG: print("DEBUG: EXIT!")