]> git.mxchange.org Git - fba.git/blob - fba/commands.py
WIP:
[fba.git] / fba / commands.py
1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
3 #
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
8 #
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 # GNU Affero General Public License for more details.
13 #
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program.  If not, see <https://www.gnu.org/licenses/>.
16
17 import argparse
18 import atoma
19 import bs4
20 import inspect
21 import itertools
22 import json
23 import re
24 import reqto
25 import sys
26 import time
27 import validators
28
29 from fba import boot
30 from fba import config
31 from fba import fba
32 from fba.network import *
33
34 def check_instance(args: argparse.Namespace) -> int:
35     # DEBUG: print(f"DEBUG: args.domain='{args.domain}' - CALLED!")
36     status = 0
37     if not validators.domain(args.domain):
38         print(f"WARNING: args.domain='{args.domain}' is not valid")
39         status = 100
40     elif fba.is_blacklisted(args.domain):
41         print(f"WARNING: args.domain='{args.domain}' is blacklisted")
42         status = 101
43     elif fba.is_instance_registered(args.domain):
44         print(f"WARNING: args.domain='{args.domain}' is already registered")
45         status = 102
46     else:
47         print(f"INFO: args.domain='{args.domain}' is not known")
48
49     # DEBUG: print(f"DEBUG: status={status} - EXIT!")
50     return status
51
52 def fetch_bkali(args: argparse.Namespace):
53     # DEBUG: print(f"DEBUG: args[]={type(args)} - CALLED!")
54     domains = list()
55     try:
56         fetched = fba.post_json_api("gql.api.bka.li", "/v1/graphql", json.dumps({
57             "query": "query domainlist {nodeinfo(order_by: {domain: asc}) {domain}}"
58         }))
59
60         # DEBUG: print(f"DEBUG: fetched({len(fetched)})[]='{type(fetched)}'")
61         if len(fetched) == 0:
62             raise Exception("WARNING: Returned no records")
63         elif not "data" in fetched:
64             raise Exception(f"WARNING: fetched()={len(fetched)} does not contain key 'data'")
65         elif not "nodeinfo" in fetched["data"]:
66             raise Exception(f"WARNING: fetched()={len(fetched['data'])} does not contain key 'nodeinfo'")
67
68         for entry in fetched["data"]["nodeinfo"]:
69             # DEBUG: print(f"DEBUG: entry['{type(entry)}']='{entry}'")
70             if not "domain" in entry:
71                 print(f"WARNING: entry does not contain 'domain' - SKIPPED!")
72                 continue
73             elif not validators.domain(entry["domain"]):
74                 print(f"WARNING: domain='{entry['domain']}' is not a valid domain - SKIPPED!")
75                 continue
76             elif fba.is_blacklisted(entry["domain"]):
77                 # DEBUG: print(f"DEBUG: domain='{entry['domain']}' is blacklisted - SKIPPED!")
78                 continue
79             elif fba.is_instance_registered(entry["domain"]):
80                 # DEBUG: print(f"DEBUG: domain='{entry['domain']}' is already registered - SKIPPED!")
81                 continue
82
83             # DEBUG: print(f"DEBUG: Adding domain='{entry['domain']}' ...")
84             domains.append(entry["domain"])
85
86     except BaseException as e:
87         print(f"ERROR: Cannot fetch graphql,exception[{type(e)}]:'{str(e)}'")
88         sys.exit(255)
89
90     # DEBUG: print(f"DEBUG: domains()={len(domains)}")
91     if len(domains) > 0:
92         boot.acquire_lock()
93
94         print(f"INFO: Adding {len(domains)} new instances ...")
95         for domain in domains:
96             print(f"INFO: Fetching instances from domain='{domain}' ...")
97             fba.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
98
99     # DEBUG: print("DEBUG: EXIT!")
100
101 def fetch_blocks(args: argparse.Namespace):
102     # DEBUG: print(f"DEBUG: args[]={type(args)} - CALLED!")
103     if args.domain != None and args.domain != "":
104         if not validators.domain(args.domain):
105             print(f"WARNING: domain='{args.domain}' is not valid.")
106             return
107         elif fba.is_blacklisted(args.domain):
108             print(f"WARNING: domain='{args.domain}' is blacklisted, won't check it!")
109             return
110         elif not fba.is_instance_registered(args.domain):
111             print(f"WARNING: domain='{args.domain}' is not registered, please run ./fba.py fetch_instances {args.domain} first.")
112             return
113
114     boot.acquire_lock()
115
116     if args.domain != None and args.domain != "":
117         fba.cursor.execute(
118             "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'gotosocial', 'bookwyrm', 'takahe') AND domain = ?", [args.domain]
119         )
120     else:
121         fba.cursor.execute(
122             "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'gotosocial', 'bookwyrm', 'takahe') AND (last_blocked IS NULL OR last_blocked < ?) ORDER BY rowid DESC", [time.time() - config.get("recheck_block")]
123         )
124
125     rows = fba.cursor.fetchall()
126     print(f"INFO: Checking {len(rows)} entries ...")
127     for blocker, software, origin, nodeinfo_url in rows:
128         # DEBUG: print("DEBUG: BEFORE blocker,software,origin,nodeinfo_url:", blocker, software, origin, nodeinfo_url)
129         blockdict = list()
130         blocker = fba.tidyup_domain(blocker)
131         # DEBUG: print("DEBUG: AFTER blocker,software:", blocker, software)
132
133         if blocker == "":
134             print("WARNING: blocker is now empty!")
135             continue
136         elif fba.is_blacklisted(blocker):
137             print(f"WARNING: blocker='{blocker}' is blacklisted now!")
138             continue
139
140         # DEBUG: print(f"DEBUG: blocker='{blocker}'")
141         fba.update_last_blocked(blocker)
142
143         if software == "pleroma":
144             print(f"INFO: blocker='{blocker}',software='{software}'")
145             pleroma.fetch_blocks(blocker, software, origin, nodeinfo_url)
146         elif software == "mastodon":
147             print(f"INFO: blocker='{blocker}',software='{software}'")
148             mastodon.fetch_blocks(blocker, software, origin, nodeinfo_url)
149         elif software == "friendica" or software == "misskey":
150             print(f"INFO: blocker='{blocker}',software='{software}'")
151             try:
152                 if software == "friendica":
153                     json = fba.fetch_friendica_blocks(blocker)
154                 elif software == "misskey":
155                     json = fba.fetch_misskey_blocks(blocker)
156
157                 print(f"INFO: Checking {len(json.items())} entries from blocker='{blocker}',software='{software}' ...")
158                 for block_level, blocks in json.items():
159                     # DEBUG: print("DEBUG: blocker,block_level,blocks():", blocker, block_level, len(blocks))
160                     block_level = fba.tidyup_domain(block_level)
161                     # DEBUG: print("DEBUG: AFTER-block_level:", block_level)
162                     if block_level == "":
163                         print("WARNING: block_level is empty, blocker:", blocker)
164                         continue
165
166                     # DEBUG: print(f"DEBUG: Checking {len(blocks)} entries from blocker='{blocker}',software='{software}',block_level='{block_level}' ...")
167                     for block in blocks:
168                         blocked, reason = block.values()
169                         # DEBUG: print("DEBUG: BEFORE blocked:", blocked)
170                         blocked = fba.tidyup_domain(blocked)
171                         # DEBUG: print("DEBUG: AFTER blocked:", blocked)
172
173                         if blocked == "":
174                             print("WARNING: blocked is empty:", blocker)
175                             continue
176                         elif fba.is_blacklisted(blocked):
177                             # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - skipping!")
178                             continue
179                         elif blocked.count("*") > 0:
180                             # Some friendica servers also obscure domains without hash
181                             fba.cursor.execute(
182                                 "SELECT domain, origin, nodeinfo_url FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", [blocked.replace("*", "_")]
183                             )
184
185                             searchres = fba.cursor.fetchone()
186
187                             if searchres == None:
188                                 print(f"WARNING: Cannot deobsfucate blocked='{blocked}' - SKIPPED!")
189                                 continue
190
191                             blocked = searchres[0]
192                             origin = searchres[1]
193                             nodeinfo_url = searchres[2]
194                         elif blocked.count("?") > 0:
195                             # Some obscure them with question marks, not sure if that's dependent on version or not
196                             fba.cursor.execute(
197                                 "SELECT domain, origin, nodeinfo_url FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", [blocked.replace("?", "_")]
198                             )
199
200                             searchres = fba.cursor.fetchone()
201
202                             if searchres == None:
203                                 print(f"WARNING: Cannot deobsfucate blocked='{blocked}' - SKIPPED!")
204                                 continue
205
206                             blocked = searchres[0]
207                             origin = searchres[1]
208                             nodeinfo_url = searchres[2]
209                         elif not validators.domain(blocked):
210                             print(f"WARNING: blocked='{blocked}',software='{software}' is not a valid domain name - skipped!")
211                             continue
212
213                         # DEBUG: print("DEBUG: Looking up instance by domain:", blocked)
214                         if not validators.domain(blocked):
215                             print(f"WARNING: blocked='{blocked}',software='{software}' is not a valid domain name - skipped!")
216                             continue
217                         elif not fba.is_instance_registered(blocked):
218                             # DEBUG: print("DEBUG: Hash wasn't found, adding:", blocked, blocker)
219                             fba.add_instance(blocked, blocker, inspect.currentframe().f_code.co_name, nodeinfo_url)
220
221                         if not fba.is_instance_blocked(blocker, blocked, block_level):
222                             fba.block_instance(blocker, blocked, reason, block_level)
223
224                             if block_level == "reject":
225                                 blockdict.append({
226                                     "blocked": blocked,
227                                     "reason" : reason
228                                 })
229                         else:
230                             # DEBUG: print(f"DEBUG: Updating block last seen and reason for blocker='{blocker}',blocked='{blocked}' ...")
231                             fba.update_last_seen(blocker, blocked, block_level)
232                             fba.update_block_reason(reason, blocker, blocked, block_level)
233
234                 # DEBUG: print("DEBUG: Committing changes ...")
235                 fba.connection.commit()
236             except Exception as e:
237                 print(f"ERROR: blocker='{blocker}',software='{software}',exception[{type(e)}]:'{str(e)}'")
238         elif software == "gotosocial":
239             print(f"INFO: blocker='{blocker}',software='{software}'")
240             try:
241                 # Blocks
242                 federation = fba.get_response(blocker, f"{fba.get_peers_url}?filter=suspended", fba.api_headers, (config.get("connection_timeout"), config.get("read_timeout"))).json()
243
244                 if (federation == None):
245                     print("WARNING: No valid response:", blocker);
246                 elif "error" in federation:
247                     print("WARNING: API returned error:", federation["error"])
248                 else:
249                     print(f"INFO: Checking {len(federation)} entries from blocker='{blocker}',software='{software}' ...")
250                     for peer in federation:
251                         blocked = peer["domain"].lower()
252                         # DEBUG: print("DEBUG: BEFORE blocked:", blocked)
253                         blocked = fba.tidyup_domain(blocked)
254                         # DEBUG: print("DEBUG: AFTER blocked:", blocked)
255
256                         if blocked == "":
257                             print("WARNING: blocked is empty:", blocker)
258                             continue
259                         elif fba.is_blacklisted(blocked):
260                             # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - skipping!")
261                             continue
262                         elif blocked.count("*") > 0:
263                             # GTS does not have hashes for obscured domains, so we have to guess it
264                             fba.cursor.execute(
265                                 "SELECT domain, origin, nodeinfo_url FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", [blocked.replace("*", "_")]
266                             )
267                             searchres = fba.cursor.fetchone()
268
269                             if searchres == None:
270                                 print(f"WARNING: Cannot deobsfucate blocked='{blocked}' - SKIPPED!")
271                                 continue
272
273                             blocked = searchres[0]
274                             origin = searchres[1]
275                             nodeinfo_url = searchres[2]
276                         elif not validators.domain(blocked):
277                             print(f"WARNING: blocked='{blocked}',software='{software}' is not a valid domain name - skipped!")
278                             continue
279
280                         # DEBUG: print("DEBUG: Looking up instance by domain:", blocked)
281                         if not validators.domain(blocked):
282                             print(f"WARNING: blocked='{blocked}',software='{software}' is not a valid domain name - skipped!")
283                             continue
284                         elif not fba.is_instance_registered(blocked):
285                             # DEBUG: print(f"DEBUG: Domain blocked='{blocked}' wasn't found, adding ..., blocker='{blocker}',origin='{origin}',nodeinfo_url='{nodeinfo_url}'")
286                             fba.add_instance(blocked, blocker, inspect.currentframe().f_code.co_name, nodeinfo_url)
287
288                         if not fba.is_instance_blocked(blocker, blocked, "reject"):
289                             # DEBUG: print(f"DEBUG: blocker='{blocker}' is blocking '{blocked}' for unknown reason at this point")
290                             fba.block_instance(blocker, blocked, "unknown", "reject")
291
292                             blockdict.append({
293                                 "blocked": blocked,
294                                 "reason" : None
295                             })
296                         else:
297                             # DEBUG: print(f"DEBUG: Updating block last seen for blocker='{blocker}',blocked='{blocked}' ...")
298                             fba.update_last_seen(blocker, blocked, "reject")
299
300                         if "public_comment" in peer:
301                             # DEBUG: print("DEBUG: Updating block reason:", blocker, blocked, peer["public_comment"])
302                             fba.update_block_reason(peer["public_comment"], blocker, blocked, "reject")
303
304                             for entry in blockdict:
305                                 if entry["blocked"] == blocked:
306                                     # DEBUG: print(f"DEBUG: Setting block reason for blocked='{blocked}':'{peer['public_comment']}'")
307                                     entry["reason"] = peer["public_comment"]
308
309                     # DEBUG: print("DEBUG: Committing changes ...")
310                     fba.connection.commit()
311             except Exception as e:
312                 print(f"ERROR: blocker='{blocker}',software='{software}',exception[{type(e)}]:'{str(e)}'")
313         else:
314             print("WARNING: Unknown software:", blocker, software)
315
316         if config.get("bot_enabled") and len(blockdict) > 0:
317             send_bot_post(blocker, blockdict)
318
319         blockdict = []
320
321     # DEBUG: print("DEBUG: EXIT!")
322
323 def fetch_cs(args: argparse.Namespace):
324     # DEBUG: print(f"DEBUG: args[]={type(args)} - CALLED!")
325     domains = {
326         "silenced": list(),
327         "blocked": list(),
328     }
329
330     try:
331         doc = bs4.BeautifulSoup(
332             fba.get_response("meta.chaos.social", "/federation", fba.headers, (config.get("connection_timeout"), config.get("read_timeout"))).text,
333             "html.parser",
334         )
335         # DEBUG: print(f"DEBUG: doc()={len(doc)}[]={type(doc)}")
336         silenced = doc.find("h2", {"id": "silenced-instances"}).findNext("table")
337
338         # DEBUG: print(f"DEBUG: silenced[]={type(silenced)}")
339         domains["silenced"] = domains["silenced"] + find_domains(silenced)
340         blocked = doc.find("h2", {"id": "blocked-instances"}).findNext("table")
341
342         # DEBUG: print(f"DEBUG: blocked[]={type(blocked)}")
343         domains["blocked"] = domains["blocked"] + find_domains(blocked)
344
345     except BaseException as e:
346         print(f"ERROR: Cannot fetch from meta.chaos.social,exception[{type(e)}]:'{str(e)}'")
347         sys.exit(255)
348
349     # DEBUG: print(f"DEBUG: domains()={len(domains)}")
350     if len(domains) > 0:
351         boot.acquire_lock()
352
353         print(f"INFO: Adding {len(domains)} new instances ...")
354         for block_level in domains:
355             # DEBUG: print(f"DEBUG: block_level='{block_level}'")
356
357             for row in domains[block_level]:
358                 # DEBUG: print(f"DEBUG: row='{row}'")
359                 if not fba.is_instance_registered(row["domain"]):
360                     print(f"INFO: Fetching instances from domain='{row['domain']}' ...")
361                     fba.fetch_instances(row["domain"], None, None, inspect.currentframe().f_code.co_name)
362
363                 if not fba.is_instance_blocked('chaos.social', row["domain"], block_level):
364                     # DEBUG: print(f"DEBUG: domain='{row['domain']}',block_level='{block_level}' blocked by chaos.social, adding ...")
365                     fba.block_instance('chaos.social', row["domain"], row["reason"], block_level)
366
367         # DEBUG: print("DEBUG: Committing changes ...")
368         fba.connection.commit()
369
370     # DEBUG: print("DEBUG: EXIT!")
371
372 def fetch_fba_rss(args: argparse.Namespace):
373     # DEBUG: print(f"DEBUG: args[]={type(args)} - CALLED!")
374     domains = list()
375
376     try:
377         print(f"INFO: Fetch FBA-specific RSS args.feed='{args.feed}' ...")
378         response = fba.get_url(args.feed, fba.headers, (config.get("connection_timeout"), config.get("read_timeout")))
379
380         # DEBUG: print(f"DEBUG: response.ok={response.ok},response.status_code='{response.status_code}',response.text()={len(response.text)}")
381         if response.ok and response.status_code < 300 and len(response.text) > 0:
382             # DEBUG: print(f"DEBUG: Parsing RSS feed ...")
383             rss = atoma.parse_rss_bytes(response.content)
384
385             # DEBUG: print(f"DEBUG: rss[]={type(rss)}")
386             for item in rss.items:
387                 # DEBUG: print(f"DEBUG: item={item}")
388                 domain = item.link.split("=")[1]
389
390                 if fba.is_blacklisted(domain):
391                     # DEBUG: print(f"DEBUG: domain='{domain}' is blacklisted - SKIPPED!")
392                     continue
393                 elif domain in domains:
394                     # DEBUG: print(f"DEBUG: domain='{domain}' is already added - SKIPPED!")
395                     continue
396                 elif fba.is_instance_registered(domain):
397                     # DEBUG: print(f"DEBUG: domain='{domain}' is already registered - SKIPPED!")
398                     continue
399
400                 # DEBUG: print(f"DEBUG: Adding domain='{domain}'")
401                 domains.append(domain)
402
403     except BaseException as e:
404         print(f"ERROR: Cannot fetch feed='{feed}',exception[{type(e)}]:'{str(e)}'")
405         sys.exit(255)
406
407     # DEBUG: print(f"DEBUG: domains()={len(domains)}")
408     if len(domains) > 0:
409         boot.acquire_lock()
410
411         print(f"INFO: Adding {len(domains)} new instances ...")
412         for domain in domains:
413             print(f"INFO: Fetching instances from domain='{domain}' ...")
414             fba.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
415
416     # DEBUG: print("DEBUG: EXIT!")
417
418 def fetch_fbabot_atom(args: argparse.Namespace):
419     # DEBUG: print(f"DEBUG: args[]={type(args)} - CALLED!")
420     feed = "https://ryona.agency/users/fba/feed.atom"
421
422     domains = list()
423     try:
424         print(f"INFO: Fetching ATOM feed='{feed}' from FBA bot account ...")
425         response = fba.get_url(feed, fba.headers, (config.get("connection_timeout"), config.get("read_timeout")))
426
427         # DEBUG: print(f"DEBUG: response.ok={response.ok},response.status_code='{response.status_code}',response.text()={len(response.text)}")
428         if response.ok and response.status_code < 300 and len(response.text) > 0:
429             # DEBUG: print(f"DEBUG: Parsing ATOM feed ...")
430             atom = atoma.parse_atom_bytes(response.content)
431
432             # DEBUG: print(f"DEBUG: atom[]={type(atom)}")
433             for entry in atom.entries:
434                 # DEBUG: print(f"DEBUG: entry[]={type(entry)}")
435                 doc = bs4.BeautifulSoup(entry.content.value, "html.parser")
436                 # DEBUG: print(f"DEBUG: doc[]={type(doc)}")
437                 for element in doc.findAll("a"):
438                     for href in element["href"].split(","):
439                         # DEBUG: print(f"DEBUG: href[{type(href)}]={href}")
440                         domain = fba.tidyup_domain(href)
441
442                         # DEBUG: print(f"DEBUG: domain='{domain}'")
443                         if fba.is_blacklisted(domain):
444                             # DEBUG: print(f"DEBUG: domain='{domain}' is blacklisted - SKIPPED!")
445                             continue
446                         elif domain in domains:
447                             # DEBUG: print(f"DEBUG: domain='{domain}' is already added - SKIPPED!")
448                             continue
449                         elif fba.is_instance_registered(domain):
450                             # DEBUG: print(f"DEBUG: domain='{domain}' is already registered - SKIPPED!")
451                             continue
452
453                         # DEBUG: print(f"DEBUG: Adding domain='{domain}',domains()={len(domains)}")
454                         domains.append(domain)
455
456     except BaseException as e:
457         print(f"ERROR: Cannot fetch feed='{feed}',exception[{type(e)}]:'{str(e)}'")
458         sys.exit(255)
459
460     # DEBUG: print(f"DEBUG: domains({len(domains)})={domains}")
461     if len(domains) > 0:
462         boot.acquire_lock()
463
464         print(f"INFO: Adding {len(domains)} new instances ...")
465         for domain in domains:
466             print(f"INFO: Fetching instances from domain='{domain}' ...")
467             fba.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
468
469     # DEBUG: print("DEBUG: EXIT!")
470
471 def fetch_instances(args: argparse.Namespace):
472     # DEBUG: print(f"DEBUG: args[]={type(args)} - CALLED!")
473     boot.acquire_lock()
474
475     # Initial fetch
476     fba.fetch_instances(args.domain, None, None, inspect.currentframe().f_code.co_name)
477
478     if args.single:
479         # DEBUG: print(f"DEBUG: Not fetching more instances - EXIT!")
480         return
481
482     # Loop through some instances
483     fba.cursor.execute(
484         "SELECT domain, origin, software, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'gotosocial', 'bookwyrm', 'takahe', 'lemmy') AND (last_instance_fetch IS NULL OR last_instance_fetch < ?) ORDER BY rowid DESC", [time.time() - config.get("recheck_instance")]
485     )
486
487     rows = fba.cursor.fetchall()
488     print(f"INFO: Checking {len(rows)} entries ...")
489     for row in rows:
490         # DEBUG: print("DEBUG: domain:", row[0])
491         if fba.is_blacklisted(row[0]):
492             print("WARNING: domain is blacklisted:", row[0])
493             continue
494
495         print(f"INFO: Fetching instances for instance '{row[0]}' ('{row[2]}') of origin='{row[1]}',nodeinfo_url='{row[3]}'")
496         fba.fetch_instances(row[0], row[1], row[2], inspect.currentframe().f_code.co_name, row[3])
497
498     # DEBUG: print("DEBUG: EXIT!")