]> git.mxchange.org Git - fba.git/blob - fba/commands.py
Continued:
[fba.git] / fba / commands.py
1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
3 #
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
8 #
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 # GNU Affero General Public License for more details.
13 #
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program.  If not, see <https://www.gnu.org/licenses/>.
16
17 import argparse
18 import atoma
19 import bs4
20 import inspect
21 import itertools
22 import json
23 import re
24 import reqto
25 import sys
26 import time
27 import validators
28
29 from fba import blacklist
30 from fba import blocks
31 from fba import boot
32 from fba import config
33 from fba import fba
34 from fba import instances
35
36 from fba.federation import *
37
38 def check_instance(args: argparse.Namespace) -> int:
39     # DEBUG: print(f"DEBUG: args.domain='{args.domain}' - CALLED!")
40     status = 0
41     if not validators.domain(args.domain):
42         print(f"WARNING: args.domain='{args.domain}' is not valid")
43         status = 100
44     elif blacklist.is_blacklisted(args.domain):
45         print(f"WARNING: args.domain='{args.domain}' is blacklisted")
46         status = 101
47     elif fba.is_instance_registered(args.domain):
48         print(f"WARNING: args.domain='{args.domain}' is already registered")
49         status = 102
50     else:
51         print(f"INFO: args.domain='{args.domain}' is not known")
52
53     # DEBUG: print(f"DEBUG: status={status} - EXIT!")
54     return status
55
56 def fetch_bkali(args: argparse.Namespace):
57     # DEBUG: print(f"DEBUG: args[]={type(args)} - CALLED!")
58     domains = list()
59     try:
60         fetched = fba.post_json_api("gql.api.bka.li", "/v1/graphql", json.dumps({
61             "query": "query domainlist {nodeinfo(order_by: {domain: asc}) {domain}}"
62         }))
63
64         # DEBUG: print(f"DEBUG: fetched({len(fetched)})[]='{type(fetched)}'")
65         if len(fetched) == 0:
66             raise Exception("WARNING: Returned no records")
67         elif not "data" in fetched:
68             raise Exception(f"WARNING: fetched()={len(fetched)} does not contain key 'data'")
69         elif not "nodeinfo" in fetched["data"]:
70             raise Exception(f"WARNING: fetched()={len(fetched['data'])} does not contain key 'nodeinfo'")
71
72         for entry in fetched["data"]["nodeinfo"]:
73             # DEBUG: print(f"DEBUG: entry['{type(entry)}']='{entry}'")
74             if not "domain" in entry:
75                 print(f"WARNING: entry does not contain 'domain' - SKIPPED!")
76                 continue
77             elif not validators.domain(entry["domain"]):
78                 print(f"WARNING: domain='{entry['domain']}' is not a valid domain - SKIPPED!")
79                 continue
80             elif blacklist.is_blacklisted(entry["domain"]):
81                 # DEBUG: print(f"DEBUG: domain='{entry['domain']}' is blacklisted - SKIPPED!")
82                 continue
83             elif fba.is_instance_registered(entry["domain"]):
84                 # DEBUG: print(f"DEBUG: domain='{entry['domain']}' is already registered - SKIPPED!")
85                 continue
86
87             # DEBUG: print(f"DEBUG: Adding domain='{entry['domain']}' ...")
88             domains.append(entry["domain"])
89
90     except BaseException as e:
91         print(f"ERROR: Cannot fetch graphql,exception[{type(e)}]:'{str(e)}'")
92         sys.exit(255)
93
94     # DEBUG: print(f"DEBUG: domains()={len(domains)}")
95     if len(domains) > 0:
96         boot.acquire_lock()
97
98         print(f"INFO: Adding {len(domains)} new instances ...")
99         for domain in domains:
100             print(f"INFO: Fetching instances from domain='{domain}' ...")
101             fba.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
102
103     # DEBUG: print("DEBUG: EXIT!")
104
105 def fetch_blocks(args: argparse.Namespace):
106     # DEBUG: print(f"DEBUG: args[]={type(args)} - CALLED!")
107     if args.domain != None and args.domain != "":
108         if not validators.domain(args.domain):
109             print(f"WARNING: domain='{args.domain}' is not valid.")
110             return
111         elif blacklist.is_blacklisted(args.domain):
112             print(f"WARNING: domain='{args.domain}' is blacklisted, won't check it!")
113             return
114         elif not fba.is_instance_registered(args.domain):
115             print(f"WARNING: domain='{args.domain}' is not registered, please run ./fba.py fetch_instances {args.domain} first.")
116             return
117
118     boot.acquire_lock()
119
120     if args.domain != None and args.domain != "":
121         fba.cursor.execute(
122             "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'gotosocial', 'bookwyrm', 'takahe') AND domain = ?", [args.domain]
123         )
124     else:
125         fba.cursor.execute(
126             "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'gotosocial', 'bookwyrm', 'takahe') AND (last_blocked IS NULL OR last_blocked < ?) ORDER BY rowid DESC", [time.time() - config.get("recheck_block")]
127         )
128
129     rows = fba.cursor.fetchall()
130     print(f"INFO: Checking {len(rows)} entries ...")
131     for blocker, software, origin, nodeinfo_url in rows:
132         # DEBUG: print("DEBUG: BEFORE blocker,software,origin,nodeinfo_url:", blocker, software, origin, nodeinfo_url)
133         blockdict = list()
134         blocker = fba.tidyup_domain(blocker)
135         # DEBUG: print("DEBUG: AFTER blocker,software:", blocker, software)
136
137         if blocker == "":
138             print("WARNING: blocker is now empty!")
139             continue
140         elif blacklist.is_blacklisted(blocker):
141             print(f"WARNING: blocker='{blocker}' is blacklisted now!")
142             continue
143
144         # DEBUG: print(f"DEBUG: blocker='{blocker}'")
145         instances.update_last_blocked(blocker)
146
147         if software == "pleroma":
148             print(f"INFO: blocker='{blocker}',software='{software}'")
149             pleroma.fetch_blocks(blocker, origin, nodeinfo_url)
150         elif software == "mastodon":
151             print(f"INFO: blocker='{blocker}',software='{software}'")
152             mastodon.fetch_blocks(blocker, origin, nodeinfo_url)
153         elif software == "friendica" or software == "misskey":
154             print(f"INFO: blocker='{blocker}',software='{software}'")
155             try:
156                 if software == "friendica":
157                     json = fba.fetch_friendica_blocks(blocker)
158                 elif software == "misskey":
159                     json = fba.fetch_misskey_blocks(blocker)
160
161                 print(f"INFO: Checking {len(json.items())} entries from blocker='{blocker}',software='{software}' ...")
162                 for block_level, blocklist in json.items():
163                     # DEBUG: print("DEBUG: blocker,block_level,blocklist():", blocker, block_level, len(blocklist))
164                     block_level = fba.tidyup_domain(block_level)
165                     # DEBUG: print("DEBUG: AFTER-block_level:", block_level)
166                     if block_level == "":
167                         print("WARNING: block_level is empty, blocker:", blocker)
168                         continue
169
170                     # DEBUG: print(f"DEBUG: Checking {len(blocklist)} entries from blocker='{blocker}',software='{software}',block_level='{block_level}' ...")
171                     for block in blocklist:
172                         blocked, reason = block.values()
173                         # DEBUG: print("DEBUG: BEFORE blocked:", blocked)
174                         blocked = fba.tidyup_domain(blocked)
175                         # DEBUG: print("DEBUG: AFTER blocked:", blocked)
176
177                         if blocked == "":
178                             print("WARNING: blocked is empty:", blocker)
179                             continue
180                         elif blacklist.is_blacklisted(blocked):
181                             # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - skipping!")
182                             continue
183                         elif blocked.count("*") > 0:
184                             # Some friendica servers also obscure domains without hash
185                             fba.cursor.execute(
186                                 "SELECT domain, origin, nodeinfo_url FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", [blocked.replace("*", "_")]
187                             )
188
189                             searchres = fba.cursor.fetchone()
190
191                             if searchres == None:
192                                 print(f"WARNING: Cannot deobsfucate blocked='{blocked}' - SKIPPED!")
193                                 continue
194
195                             blocked = searchres[0]
196                             origin = searchres[1]
197                             nodeinfo_url = searchres[2]
198                         elif blocked.count("?") > 0:
199                             # Some obscure them with question marks, not sure if that's dependent on version or not
200                             fba.cursor.execute(
201                                 "SELECT domain, origin, nodeinfo_url FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", [blocked.replace("?", "_")]
202                             )
203
204                             searchres = fba.cursor.fetchone()
205
206                             if searchres == None:
207                                 print(f"WARNING: Cannot deobsfucate blocked='{blocked}' - SKIPPED!")
208                                 continue
209
210                             blocked = searchres[0]
211                             origin = searchres[1]
212                             nodeinfo_url = searchres[2]
213                         elif not validators.domain(blocked):
214                             print(f"WARNING: blocked='{blocked}',software='{software}' is not a valid domain name - skipped!")
215                             continue
216
217                         # DEBUG: print("DEBUG: Looking up instance by domain:", blocked)
218                         if not validators.domain(blocked):
219                             print(f"WARNING: blocked='{blocked}',software='{software}' is not a valid domain name - skipped!")
220                             continue
221                         elif not fba.is_instance_registered(blocked):
222                             # DEBUG: print("DEBUG: Hash wasn't found, adding:", blocked, blocker)
223                             fba.add_instance(blocked, blocker, inspect.currentframe().f_code.co_name, nodeinfo_url)
224
225                         if not blocks.is_instance_blocked(blocker, blocked, block_level):
226                             blocks.add_instance(blocker, blocked, reason, block_level)
227
228                             if block_level == "reject":
229                                 blockdict.append({
230                                     "blocked": blocked,
231                                     "reason" : reason
232                                 })
233                         else:
234                             # DEBUG: print(f"DEBUG: Updating block last seen and reason for blocker='{blocker}',blocked='{blocked}' ...")
235                             blocks.update_last_seen(blocker, blocked, block_level)
236                             blocks.update_reason(reason, blocker, blocked, block_level)
237
238                 # DEBUG: print("DEBUG: Committing changes ...")
239                 fba.connection.commit()
240             except Exception as e:
241                 print(f"ERROR: blocker='{blocker}',software='{software}',exception[{type(e)}]:'{str(e)}'")
242         elif software == "gotosocial":
243             print(f"INFO: blocker='{blocker}',software='{software}'")
244             try:
245                 # Blocks
246                 federation = fba.get_response(blocker, f"{fba.get_peers_url}?filter=suspended", fba.api_headers, (config.get("connection_timeout"), config.get("read_timeout"))).json()
247
248                 if (federation == None):
249                     print("WARNING: No valid response:", blocker);
250                 elif "error" in federation:
251                     print("WARNING: API returned error:", federation["error"])
252                 else:
253                     print(f"INFO: Checking {len(federation)} entries from blocker='{blocker}',software='{software}' ...")
254                     for peer in federation:
255                         blocked = peer["domain"].lower()
256                         # DEBUG: print("DEBUG: BEFORE blocked:", blocked)
257                         blocked = fba.tidyup_domain(blocked)
258                         # DEBUG: print("DEBUG: AFTER blocked:", blocked)
259
260                         if blocked == "":
261                             print("WARNING: blocked is empty:", blocker)
262                             continue
263                         elif blacklist.is_blacklisted(blocked):
264                             # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - skipping!")
265                             continue
266                         elif blocked.count("*") > 0:
267                             # GTS does not have hashes for obscured domains, so we have to guess it
268                             fba.cursor.execute(
269                                 "SELECT domain, origin, nodeinfo_url FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", [blocked.replace("*", "_")]
270                             )
271                             searchres = fba.cursor.fetchone()
272
273                             if searchres == None:
274                                 print(f"WARNING: Cannot deobsfucate blocked='{blocked}' - SKIPPED!")
275                                 continue
276
277                             blocked = searchres[0]
278                             origin = searchres[1]
279                             nodeinfo_url = searchres[2]
280                         elif not validators.domain(blocked):
281                             print(f"WARNING: blocked='{blocked}',software='{software}' is not a valid domain name - skipped!")
282                             continue
283
284                         # DEBUG: print("DEBUG: Looking up instance by domain:", blocked)
285                         if not validators.domain(blocked):
286                             print(f"WARNING: blocked='{blocked}',software='{software}' is not a valid domain name - skipped!")
287                             continue
288                         elif not fba.is_instance_registered(blocked):
289                             # DEBUG: print(f"DEBUG: Domain blocked='{blocked}' wasn't found, adding ..., blocker='{blocker}',origin='{origin}',nodeinfo_url='{nodeinfo_url}'")
290                             fba.add_instance(blocked, blocker, inspect.currentframe().f_code.co_name, nodeinfo_url)
291
292                         if not blocks.is_instance_blocked(blocker, blocked, "reject"):
293                             # DEBUG: print(f"DEBUG: blocker='{blocker}' is blocking '{blocked}' for unknown reason at this point")
294                             blocks.add_instance(blocker, blocked, "unknown", "reject")
295
296                             blockdict.append({
297                                 "blocked": blocked,
298                                 "reason" : None
299                             })
300                         else:
301                             # DEBUG: print(f"DEBUG: Updating block last seen for blocker='{blocker}',blocked='{blocked}' ...")
302                             blocks.update_last_seen(blocker, blocked, "reject")
303
304                         if "public_comment" in peer:
305                             # DEBUG: print("DEBUG: Updating block reason:", blocker, blocked, peer["public_comment"])
306                             blocks.update_reason(peer["public_comment"], blocker, blocked, "reject")
307
308                             for entry in blockdict:
309                                 if entry["blocked"] == blocked:
310                                     # DEBUG: print(f"DEBUG: Setting block reason for blocked='{blocked}':'{peer['public_comment']}'")
311                                     entry["reason"] = peer["public_comment"]
312
313                     # DEBUG: print("DEBUG: Committing changes ...")
314                     fba.connection.commit()
315             except Exception as e:
316                 print(f"ERROR: blocker='{blocker}',software='{software}',exception[{type(e)}]:'{str(e)}'")
317         else:
318             print("WARNING: Unknown software:", blocker, software)
319
320         if config.get("bot_enabled") and len(blockdict) > 0:
321             send_bot_post(blocker, blockdict)
322
323         blockdict = []
324
325     # DEBUG: print("DEBUG: EXIT!")
326
327 def fetch_cs(args: argparse.Namespace):
328     # DEBUG: print(f"DEBUG: args[]={type(args)} - CALLED!")
329     domains = {
330         "silenced": list(),
331         "blocked": list(),
332     }
333
334     try:
335         doc = bs4.BeautifulSoup(
336             fba.get_response("meta.chaos.social", "/federation", fba.headers, (config.get("connection_timeout"), config.get("read_timeout"))).text,
337             "html.parser",
338         )
339         # DEBUG: print(f"DEBUG: doc()={len(doc)}[]={type(doc)}")
340         silenced = doc.find("h2", {"id": "silenced-instances"}).findNext("table")
341
342         # DEBUG: print(f"DEBUG: silenced[]={type(silenced)}")
343         domains["silenced"] = domains["silenced"] + find_domains(silenced)
344         blocked = doc.find("h2", {"id": "blocked-instances"}).findNext("table")
345
346         # DEBUG: print(f"DEBUG: blocked[]={type(blocked)}")
347         domains["blocked"] = domains["blocked"] + find_domains(blocked)
348
349     except BaseException as e:
350         print(f"ERROR: Cannot fetch from meta.chaos.social,exception[{type(e)}]:'{str(e)}'")
351         sys.exit(255)
352
353     # DEBUG: print(f"DEBUG: domains()={len(domains)}")
354     if len(domains) > 0:
355         boot.acquire_lock()
356
357         print(f"INFO: Adding {len(domains)} new instances ...")
358         for block_level in domains:
359             # DEBUG: print(f"DEBUG: block_level='{block_level}'")
360
361             for row in domains[block_level]:
362                 # DEBUG: print(f"DEBUG: row='{row}'")
363                 if not fba.is_instance_registered(row["domain"]):
364                     print(f"INFO: Fetching instances from domain='{row['domain']}' ...")
365                     fba.fetch_instances(row["domain"], None, None, inspect.currentframe().f_code.co_name)
366
367                 if not blocks.is_instance_blocked('chaos.social', row["domain"], block_level):
368                     # DEBUG: print(f"DEBUG: domain='{row['domain']}',block_level='{block_level}' blocked by chaos.social, adding ...")
369                     blocks.add_instance('chaos.social', row["domain"], row["reason"], block_level)
370
371         # DEBUG: print("DEBUG: Committing changes ...")
372         fba.connection.commit()
373
374     # DEBUG: print("DEBUG: EXIT!")
375
376 def fetch_fba_rss(args: argparse.Namespace):
377     # DEBUG: print(f"DEBUG: args[]={type(args)} - CALLED!")
378     domains = list()
379
380     try:
381         print(f"INFO: Fetch FBA-specific RSS args.feed='{args.feed}' ...")
382         response = fba.get_url(args.feed, fba.headers, (config.get("connection_timeout"), config.get("read_timeout")))
383
384         # DEBUG: print(f"DEBUG: response.ok={response.ok},response.status_code='{response.status_code}',response.text()={len(response.text)}")
385         if response.ok and response.status_code < 300 and len(response.text) > 0:
386             # DEBUG: print(f"DEBUG: Parsing RSS feed ...")
387             rss = atoma.parse_rss_bytes(response.content)
388
389             # DEBUG: print(f"DEBUG: rss[]={type(rss)}")
390             for item in rss.items:
391                 # DEBUG: print(f"DEBUG: item={item}")
392                 domain = item.link.split("=")[1]
393
394                 if blacklist.is_blacklisted(domain):
395                     # DEBUG: print(f"DEBUG: domain='{domain}' is blacklisted - SKIPPED!")
396                     continue
397                 elif domain in domains:
398                     # DEBUG: print(f"DEBUG: domain='{domain}' is already added - SKIPPED!")
399                     continue
400                 elif fba.is_instance_registered(domain):
401                     # DEBUG: print(f"DEBUG: domain='{domain}' is already registered - SKIPPED!")
402                     continue
403
404                 # DEBUG: print(f"DEBUG: Adding domain='{domain}'")
405                 domains.append(domain)
406
407     except BaseException as e:
408         print(f"ERROR: Cannot fetch feed='{feed}',exception[{type(e)}]:'{str(e)}'")
409         sys.exit(255)
410
411     # DEBUG: print(f"DEBUG: domains()={len(domains)}")
412     if len(domains) > 0:
413         boot.acquire_lock()
414
415         print(f"INFO: Adding {len(domains)} new instances ...")
416         for domain in domains:
417             print(f"INFO: Fetching instances from domain='{domain}' ...")
418             fba.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
419
420     # DEBUG: print("DEBUG: EXIT!")
421
422 def fetch_fbabot_atom(args: argparse.Namespace):
423     # DEBUG: print(f"DEBUG: args[]={type(args)} - CALLED!")
424     feed = "https://ryona.agency/users/fba/feed.atom"
425
426     domains = list()
427     try:
428         print(f"INFO: Fetching ATOM feed='{feed}' from FBA bot account ...")
429         response = fba.get_url(feed, fba.headers, (config.get("connection_timeout"), config.get("read_timeout")))
430
431         # DEBUG: print(f"DEBUG: response.ok={response.ok},response.status_code='{response.status_code}',response.text()={len(response.text)}")
432         if response.ok and response.status_code < 300 and len(response.text) > 0:
433             # DEBUG: print(f"DEBUG: Parsing ATOM feed ...")
434             atom = atoma.parse_atom_bytes(response.content)
435
436             # DEBUG: print(f"DEBUG: atom[]={type(atom)}")
437             for entry in atom.entries:
438                 # DEBUG: print(f"DEBUG: entry[]={type(entry)}")
439                 doc = bs4.BeautifulSoup(entry.content.value, "html.parser")
440                 # DEBUG: print(f"DEBUG: doc[]={type(doc)}")
441                 for element in doc.findAll("a"):
442                     for href in element["href"].split(","):
443                         # DEBUG: print(f"DEBUG: href[{type(href)}]={href}")
444                         domain = fba.tidyup_domain(href)
445
446                         # DEBUG: print(f"DEBUG: domain='{domain}'")
447                         if blacklist.is_blacklisted(domain):
448                             # DEBUG: print(f"DEBUG: domain='{domain}' is blacklisted - SKIPPED!")
449                             continue
450                         elif domain in domains:
451                             # DEBUG: print(f"DEBUG: domain='{domain}' is already added - SKIPPED!")
452                             continue
453                         elif fba.is_instance_registered(domain):
454                             # DEBUG: print(f"DEBUG: domain='{domain}' is already registered - SKIPPED!")
455                             continue
456
457                         # DEBUG: print(f"DEBUG: Adding domain='{domain}',domains()={len(domains)}")
458                         domains.append(domain)
459
460     except BaseException as e:
461         print(f"ERROR: Cannot fetch feed='{feed}',exception[{type(e)}]:'{str(e)}'")
462         sys.exit(255)
463
464     # DEBUG: print(f"DEBUG: domains({len(domains)})={domains}")
465     if len(domains) > 0:
466         boot.acquire_lock()
467
468         print(f"INFO: Adding {len(domains)} new instances ...")
469         for domain in domains:
470             print(f"INFO: Fetching instances from domain='{domain}' ...")
471             fba.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
472
473     # DEBUG: print("DEBUG: EXIT!")
474
475 def fetch_instances(args: argparse.Namespace):
476     # DEBUG: print(f"DEBUG: args[]={type(args)} - CALLED!")
477     boot.acquire_lock()
478
479     # Initial fetch
480     fba.fetch_instances(args.domain, None, None, inspect.currentframe().f_code.co_name)
481
482     if args.single:
483         # DEBUG: print(f"DEBUG: Not fetching more instances - EXIT!")
484         return
485
486     # Loop through some instances
487     fba.cursor.execute(
488         "SELECT domain, origin, software, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'gotosocial', 'bookwyrm', 'takahe', 'lemmy') AND (last_instance_fetch IS NULL OR last_instance_fetch < ?) ORDER BY rowid DESC", [time.time() - config.get("recheck_instance")]
489     )
490
491     rows = fba.cursor.fetchall()
492     print(f"INFO: Checking {len(rows)} entries ...")
493     for row in rows:
494         # DEBUG: print("DEBUG: domain:", row[0])
495         if blacklist.is_blacklisted(row[0]):
496             print("WARNING: domain is blacklisted:", row[0])
497             continue
498
499         print(f"INFO: Fetching instances for instance '{row[0]}' ('{row[2]}') of origin='{row[1]}',nodeinfo_url='{row[3]}'")
500         fba.fetch_instances(row[0], row[1], row[2], inspect.currentframe().f_code.co_name, row[3])
501
502     # DEBUG: print("DEBUG: EXIT!")