]> git.mxchange.org Git - fba.git/blob - fba/commands.py
401f6a4701bc089f3b8f47c02ded8125e10a6d83
[fba.git] / fba / commands.py
1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
3 #
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
8 #
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 # GNU Affero General Public License for more details.
13 #
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program.  If not, see <https://www.gnu.org/licenses/>.
16
17 import csv
18 import inspect
19 import json
20 import sys
21 import time
22
23 import argparse
24 import atoma
25 import bs4
26 import markdown
27 import reqto
28 import requests
29 import validators
30
31 from fba import blacklist
32 from fba import blocks
33 from fba import config
34 from fba import federation
35 from fba import fba
36 from fba import instances
37 from fba import locking
38 from fba import network
39
40 from fba.helpers import tidyup
41
42 from fba.networks import friendica
43 from fba.networks import mastodon
44 from fba.networks import misskey
45 from fba.networks import pleroma
46
47 def check_instance(args: argparse.Namespace) -> int:
48     # DEBUG: print(f"DEBUG: args.domain='{args.domain}' - CALLED!")
49     status = 0
50     if not validators.domain(args.domain):
51         print(f"WARNING: args.domain='{args.domain}' is not valid")
52         status = 100
53     elif blacklist.is_blacklisted(args.domain):
54         print(f"WARNING: args.domain='{args.domain}' is blacklisted")
55         status = 101
56     elif instances.is_registered(args.domain):
57         print(f"WARNING: args.domain='{args.domain}' is already registered")
58         status = 102
59     else:
60         print(f"INFO: args.domain='{args.domain}' is not known")
61
62     # DEBUG: print(f"DEBUG: status={status} - EXIT!")
63     return status
64
65 def fetch_bkali(args: argparse.Namespace):
66     # DEBUG: print(f"DEBUG: args[]={type(args)} - CALLED!")
67     domains = list()
68     try:
69         fetched = network.post_json_api("gql.api.bka.li", "/v1/graphql", json.dumps({
70             "query": "query domainlist {nodeinfo(order_by: {domain: asc}) {domain}}"
71         }))
72
73         # DEBUG: print(f"DEBUG: fetched({len(fetched)})[]='{type(fetched)}'")
74         if len(fetched) == 0:
75             raise Exception("WARNING: Returned no records")
76         elif "data" not in fetched:
77             raise Exception(f"WARNING: fetched()={len(fetched)} does not contain key 'data'")
78         elif "nodeinfo" not in fetched["data"]:
79             raise Exception(f"WARNING: fetched()={len(fetched['data'])} does not contain key 'nodeinfo'")
80
81         for entry in fetched["data"]["nodeinfo"]:
82             # DEBUG: print(f"DEBUG: entry['{type(entry)}']='{entry}'")
83             if not "domain" in entry:
84                 print(f"WARNING: entry()={len(entry)} does not contain 'domain' - SKIPPED!")
85                 continue
86             elif not validators.domain(entry["domain"]):
87                 print(f"WARNING: domain='{entry['domain']}' is not a valid domain - SKIPPED!")
88                 continue
89             elif blacklist.is_blacklisted(entry["domain"]):
90                 # DEBUG: print(f"DEBUG: domain='{entry['domain']}' is blacklisted - SKIPPED!")
91                 continue
92             elif instances.is_registered(entry["domain"]):
93                 # DEBUG: print(f"DEBUG: domain='{entry['domain']}' is already registered - SKIPPED!")
94                 continue
95
96             # DEBUG: print(f"DEBUG: Adding domain='{entry['domain']}' ...")
97             domains.append(entry["domain"])
98
99     except network.exceptions as exception:
100         print(f"ERROR: Cannot fetch graphql,exception[{type(exception)}]:'{str(exception)}'")
101         sys.exit(255)
102
103     # DEBUG: print(f"DEBUG: domains()={len(domains)}")
104     if len(domains) > 0:
105         locking.acquire()
106
107         print(f"INFO: Adding {len(domains)} new instances ...")
108         for domain in domains:
109             try:
110                 print(f"INFO: Fetching instances from domain='{domain}' ...")
111                 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
112             except network.exceptions as exception:
113                 print(f"WARNING: Exception '{type(exception)}' during fetching instances from domain='{domain}'")
114                 instances.update_last_error(domain, exception)
115
116     # DEBUG: print("DEBUG: EXIT!")
117
118 def fetch_blocks(args: argparse.Namespace):
119     # DEBUG: print(f"DEBUG: args[]={type(args)} - CALLED!")
120     if args.domain is not None and args.domain != "":
121         # DEBUG: print(f"DEBUG: args.domain='{args.domain}' - checking ...")
122         if not validators.domain(args.domain):
123             print(f"WARNING: domain='{args.domain}' is not valid.")
124             return
125         elif blacklist.is_blacklisted(args.domain):
126             print(f"WARNING: domain='{args.domain}' is blacklisted, won't check it!")
127             return
128         elif not instances.is_registered(args.domain):
129             print(f"WARNING: domain='{args.domain}' is not registered, please run ./fba.py fetch_instances {args.domain} first.")
130             return
131
132     locking.acquire()
133
134     if args.domain is not None and args.domain != "":
135         # Re-check single domain
136         fba.cursor.execute(
137             "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'bookwyrm', 'takahe') AND domain = ?", [args.domain]
138         )
139     else:
140         # Re-check after "timeout" (aka. minimum interval)
141         fba.cursor.execute(
142             "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'bookwyrm', 'takahe') AND (last_blocked IS NULL OR last_blocked < ?) ORDER BY rowid DESC", [time.time() - config.get("recheck_block")]
143         )
144
145     rows = fba.cursor.fetchall()
146     print(f"INFO: Checking {len(rows)} entries ...")
147     for blocker, software, origin, nodeinfo_url in rows:
148         # DEBUG: print("DEBUG: BEFORE blocker,software,origin,nodeinfo_url:", blocker, software, origin, nodeinfo_url)
149         blockdict = list()
150         blocker = tidyup.domain(blocker)
151         # DEBUG: print("DEBUG: AFTER blocker,software:", blocker, software)
152
153         if blocker == "":
154             print("WARNING: blocker is now empty!")
155             continue
156         elif blacklist.is_blacklisted(blocker):
157             print(f"WARNING: blocker='{blocker}' is blacklisted now!")
158             continue
159
160         # DEBUG: print(f"DEBUG: blocker='{blocker}'")
161         instances.update_last_blocked(blocker)
162
163         if software == "pleroma":
164             print(f"INFO: blocker='{blocker}',software='{software}'")
165             pleroma.fetch_blocks(blocker, origin, nodeinfo_url)
166         elif software == "mastodon":
167             print(f"INFO: blocker='{blocker}',software='{software}'")
168             mastodon.fetch_blocks(blocker, origin, nodeinfo_url)
169         elif software == "friendica" or software == "misskey":
170             print(f"INFO: blocker='{blocker}',software='{software}'")
171
172             blocking = list()
173             if software == "friendica":
174                 blocking = friendica.fetch_blocks(blocker)
175             elif software == "misskey":
176                 blocking = misskey.fetch_blocks(blocker)
177
178             print(f"INFO: Checking {len(blocking.items())} entries from blocker='{blocker}',software='{software}' ...")
179             for block_level, blocklist in blocking.items():
180                 # DEBUG: print("DEBUG: blocker,block_level,blocklist():", blocker, block_level, len(blocklist))
181                 block_level = tidyup.domain(block_level)
182                 # DEBUG: print("DEBUG: AFTER-block_level:", block_level)
183                 if block_level == "":
184                     print("WARNING: block_level is empty, blocker:", blocker)
185                     continue
186
187                 # DEBUG: print(f"DEBUG: Checking {len(blocklist)} entries from blocker='{blocker}',software='{software}',block_level='{block_level}' ...")
188                 for block in blocklist:
189                     blocked, reason = block.values()
190                     # DEBUG: print(f"DEBUG: blocked='{blocked}',reason='{reason}' - BEFORE!")
191                     blocked = tidyup.domain(blocked)
192                     reason  = tidyup.reason(reason) if reason is not None and reason != "" else None
193                     # DEBUG: print(f"DEBUG: blocked='{blocked}',reason='{reason}' - AFTER!")
194
195                     if blocked == "":
196                         print("WARNING: blocked is empty:", blocker)
197                         continue
198                     elif blacklist.is_blacklisted(blocked):
199                         # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - skipping!")
200                         continue
201                     elif blocked.count("*") > 0:
202                         # Some friendica servers also obscure domains without hash
203                         fba.cursor.execute(
204                             "SELECT domain, origin, nodeinfo_url FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", [blocked.replace("*", "_")]
205                         )
206
207                         searchres = fba.cursor.fetchone()
208
209                         print(f"DEBUG: searchres[]='{type(searchres)}'")
210                         if searchres is None:
211                             print(f"WARNING: Cannot deobsfucate blocked='{blocked}' - SKIPPED!")
212                             continue
213
214                         blocked      = searchres[0]
215                         origin       = searchres[1]
216                         nodeinfo_url = searchres[2]
217                     elif blocked.count("?") > 0:
218                         # Some obscure them with question marks, not sure if that's dependent on version or not
219                         fba.cursor.execute(
220                             "SELECT domain, origin, nodeinfo_url FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", [blocked.replace("?", "_")]
221                         )
222
223                         searchres = fba.cursor.fetchone()
224
225                         print(f"DEBUG: searchres[]='{type(searchres)}'")
226                         if searchres is None:
227                             print(f"WARNING: Cannot deobsfucate blocked='{blocked}' - SKIPPED!")
228                             continue
229
230                         blocked      = searchres[0]
231                         origin       = searchres[1]
232                         nodeinfo_url = searchres[2]
233                     elif not validators.domain(blocked):
234                         print(f"WARNING: blocked='{blocked}',software='{software}' is not a valid domain name - skipped!")
235                         continue
236
237                     # DEBUG: print("DEBUG: Looking up instance by domain:", blocked)
238                     if not validators.domain(blocked):
239                         print(f"WARNING: blocked='{blocked}',software='{software}' is not a valid domain name - skipped!")
240                         continue
241                     elif not instances.is_registered(blocked):
242                         # DEBUG: print("DEBUG: Hash wasn't found, adding:", blocked, blocker)
243                         instances.add(blocked, blocker, inspect.currentframe().f_code.co_name, nodeinfo_url)
244
245                     if not blocks.is_instance_blocked(blocker, blocked, block_level):
246                         blocks.add_instance(blocker, blocked, reason, block_level)
247
248                         if block_level == "reject":
249                             blockdict.append({
250                                 "blocked": blocked,
251                                 "reason" : reason
252                             })
253                     else:
254                         # DEBUG: print(f"DEBUG: Updating block last seen and reason for blocker='{blocker}',blocked='{blocked}' ...")
255                         blocks.update_last_seen(blocker, blocked, block_level)
256                         blocks.update_reason(reason, blocker, blocked, block_level)
257
258             # DEBUG: print("DEBUG: Committing changes ...")
259             fba.connection.commit()
260         else:
261             print("WARNING: Unknown software:", blocker, software)
262
263         if config.get("bot_enabled") and len(blockdict) > 0:
264             network.send_bot_post(blocker, blockdict)
265
266     # DEBUG: print("DEBUG: EXIT!")
267
268 def fetch_cs(args: argparse.Namespace):
269     # DEBUG: print(f"DEBUG: args[]={type(args)} - CALLED!")
270     extensions = [
271         'extra',
272         'abbr',
273         'attr_list',
274         'def_list',
275         'fenced_code',
276         'footnotes',
277         'md_in_html',
278         'admonition',
279         'codehilite',
280         'legacy_attrs',
281         'legacy_em',
282         'meta',
283         'nl2br',
284         'sane_lists',
285         'smarty',
286         'toc',
287         'wikilinks'
288     ]
289
290     domains = {
291         "silenced": list(),
292         "reject"  : list(),
293     }
294
295     raw = fba.fetch_url("https://raw.githubusercontent.com/chaossocial/meta/master/federation.md", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))).text
296     # DEBUG: print(f"DEBUG: raw()={len(raw)}[]={type(raw)}")
297
298     doc = bs4.BeautifulSoup(markdown.markdown(raw, extensions=extensions), features='html.parser')
299
300     # DEBUG: print(f"DEBUG: doc()={len(doc)}[]={type(doc)}")
301     silenced = doc.find("h2", {"id": "silenced-instances"}).findNext("table").find("tbody")
302     # DEBUG: print(f"DEBUG: silenced[]={type(silenced)}")
303     domains["silenced"] = domains["silenced"] + federation.find_domains(silenced)
304
305     blocked = doc.find("h2", {"id": "blocked-instances"}).findNext("table").find("tbody")
306     # DEBUG: print(f"DEBUG: blocked[]={type(blocked)}")
307     domains["reject"] = domains["reject"] + federation.find_domains(blocked)
308
309     # DEBUG: print(f"DEBUG: domains()={len(domains)}")
310     if len(domains) > 0:
311         locking.acquire()
312
313         print(f"INFO: Adding {len(domains)} new instances ...")
314         for block_level in domains:
315             # DEBUG: print(f"DEBUG: block_level='{block_level}'")
316
317             for row in domains[block_level]:
318                 # DEBUG: print(f"DEBUG: row='{row}'")
319                 if not blocks.is_instance_blocked('chaos.social', row["domain"], block_level):
320                     # DEBUG: print(f"DEBUG: domain='{row['domain']}',block_level='{block_level}' blocked by chaos.social, adding ...")
321                     blocks.add_instance('chaos.social', row["domain"], row["reason"], block_level)
322
323                 if not instances.is_registered(row["domain"]):
324                     try:
325                         print(f"INFO: Fetching instances from domain='{row['domain']}' ...")
326                         federation.fetch_instances(row["domain"], 'chaos.social', None, inspect.currentframe().f_code.co_name)
327                     except network.exceptions as exception:
328                         print(f"WARNING: Exception '{type(exception)}' during fetching instances from domain='{row['domain']}'")
329                         instances.update_last_error(row["domain"], exception)
330
331         # DEBUG: print("DEBUG: Committing changes ...")
332         fba.connection.commit()
333
334     # DEBUG: print("DEBUG: EXIT!")
335
336 def fetch_fba_rss(args: argparse.Namespace):
337     # DEBUG: print(f"DEBUG: args[]={type(args)} - CALLED!")
338     domains = list()
339
340     print(f"INFO: Fetch FBA-specific RSS args.feed='{args.feed}' ...")
341     response = fba.fetch_url(args.feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
342
343     # DEBUG: print(f"DEBUG: response.ok={response.ok},response.status_code='{response.status_code}',response.text()={len(response.text)}")
344     if response.ok and response.status_code < 300 and len(response.text) > 0:
345         # DEBUG: print(f"DEBUG: Parsing RSS feed ({len(response.text)} Bytes) ...")
346         rss = atoma.parse_rss_bytes(response.content)
347
348         # DEBUG: print(f"DEBUG: rss[]={type(rss)}")
349         for item in rss.items:
350             # DEBUG: print(f"DEBUG: item={item}")
351             domain = item.link.split("=")[1]
352
353             if blacklist.is_blacklisted(domain):
354                 # DEBUG: print(f"DEBUG: domain='{domain}' is blacklisted - SKIPPED!")
355                 continue
356             elif domain in domains:
357                 # DEBUG: print(f"DEBUG: domain='{domain}' is already added - SKIPPED!")
358                 continue
359             elif instances.is_registered(domain):
360                 # DEBUG: print(f"DEBUG: domain='{domain}' is already registered - SKIPPED!")
361                 continue
362
363             # DEBUG: print(f"DEBUG: Adding domain='{domain}'")
364             domains.append(domain)
365
366     # DEBUG: print(f"DEBUG: domains()={len(domains)}")
367     if len(domains) > 0:
368         locking.acquire()
369
370         print(f"INFO: Adding {len(domains)} new instances ...")
371         for domain in domains:
372             try:
373                 print(f"INFO: Fetching instances from domain='{domain}' ...")
374                 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
375             except network.exceptions as exception:
376                 print(f"WARNING: Exception '{type(exception)}' during fetching instances from domain='{domain}'")
377                 instances.update_last_error(domain, exception)
378
379     # DEBUG: print("DEBUG: EXIT!")
380
381 def fetch_fbabot_atom(args: argparse.Namespace):
382     # DEBUG: print(f"DEBUG: args[]={type(args)} - CALLED!")
383     feed = "https://ryona.agency/users/fba/feed.atom"
384
385     domains = list()
386
387     print(f"INFO: Fetching ATOM feed='{feed}' from FBA bot account ...")
388     response = fba.fetch_url(feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
389
390     # DEBUG: print(f"DEBUG: response.ok={response.ok},response.status_code='{response.status_code}',response.text()={len(response.text)}")
391     if response.ok and response.status_code < 300 and len(response.text) > 0:
392         # DEBUG: print(f"DEBUG: Parsing ATOM feed ({len(response.text)} Bytes) ...")
393         atom = atoma.parse_atom_bytes(response.content)
394
395         # DEBUG: print(f"DEBUG: atom[]={type(atom)}")
396         for entry in atom.entries:
397             # DEBUG: print(f"DEBUG: entry[]={type(entry)}")
398             doc = bs4.BeautifulSoup(entry.content.value, "html.parser")
399             # DEBUG: print(f"DEBUG: doc[]={type(doc)}")
400             for element in doc.findAll("a"):
401                 for href in element["href"].split(","):
402                     # DEBUG: print(f"DEBUG: href[{type(href)}]={href}")
403                     domain = tidyup.domain(href)
404
405                     # DEBUG: print(f"DEBUG: domain='{domain}'")
406                     if blacklist.is_blacklisted(domain):
407                         # DEBUG: print(f"DEBUG: domain='{domain}' is blacklisted - SKIPPED!")
408                         continue
409                     elif domain in domains:
410                         # DEBUG: print(f"DEBUG: domain='{domain}' is already added - SKIPPED!")
411                         continue
412                     elif instances.is_registered(domain):
413                         # DEBUG: print(f"DEBUG: domain='{domain}' is already registered - SKIPPED!")
414                         continue
415
416                     # DEBUG: print(f"DEBUG: Adding domain='{domain}',domains()={len(domains)}")
417                     domains.append(domain)
418
419     # DEBUG: print(f"DEBUG: domains({len(domains)})={domains}")
420     if len(domains) > 0:
421         locking.acquire()
422
423         print(f"INFO: Adding {len(domains)} new instances ...")
424         for domain in domains:
425             try:
426                 print(f"INFO: Fetching instances from domain='{domain}' ...")
427                 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
428             except network.exceptions as exception:
429                 print(f"WARNING: Exception '{type(exception)}' during fetching instances from domain='{domain}'")
430                 instances.update_last_error(domain, exception)
431
432     # DEBUG: print("DEBUG: EXIT!")
433
434 def fetch_instances(args: argparse.Namespace):
435     # DEBUG: print(f"DEBUG: args[]={type(args)} - CALLED!")
436     locking.acquire()
437
438     # Initial fetch
439     try:
440         print(f"INFO: Fetching instances from args.domain='{args.domain}' ...")
441         federation.fetch_instances(args.domain, None, None, inspect.currentframe().f_code.co_name)
442     except network.exceptions as exception:
443         print(f"WARNING: Exception '{type(exception)}' during fetching instances from args.domain='{args.domain}'")
444         instances.update_last_error(args.domain, exception)
445         return
446
447     if args.single:
448         # DEBUG: print("DEBUG: Not fetching more instances - EXIT!")
449         return
450
451     # Loop through some instances
452     fba.cursor.execute(
453         "SELECT domain, origin, software, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'bookwyrm', 'takahe', 'lemmy') AND (last_instance_fetch IS NULL OR last_instance_fetch < ?) ORDER BY rowid DESC", [time.time() - config.get("recheck_instance")]
454     )
455
456     rows = fba.cursor.fetchall()
457     print(f"INFO: Checking {len(rows)} entries ...")
458     for row in rows:
459         # DEBUG: print(f"DEBUG: domain='{row[0]}'")
460         if blacklist.is_blacklisted(row[0]):
461             print("WARNING: domain is blacklisted:", row[0])
462             continue
463
464         try:
465             print(f"INFO: Fetching instances for instance '{row[0]}' ('{row[2]}') of origin='{row[1]}',nodeinfo_url='{row[3]}'")
466             federation.fetch_instances(row[0], row[1], row[2], inspect.currentframe().f_code.co_name, row[3])
467         except network.exceptions as exception:
468             print(f"WARNING: Exception '{type(exception)}' during fetching instances from domain='{row[0]}'")
469             instances.update_last_error(row[0], exception)
470
471     # DEBUG: print("DEBUG: EXIT!")
472
473 def fetch_federater(args: argparse.Namespace):
474     # DEBUG: print(f"DEBUG: args[]={type(args)} - CALLED!")
475     locking.acquire()
476
477     # Fetch this URL
478     response = fba.fetch_url("https://github.com/federater/blocks_recommended/raw/main/federater.csv", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
479     # DEBUG: print(f"DEBUG: response[]='{type(response)}'")
480     if response.ok and response.content != "":
481         # DEBUG: print(f"DEBUG: Fetched {len(response.content)} Bytes, parsing CSV ...")
482         ## DEBUG: print(f"DEBUG: response.content={response.content}")
483         reader = csv.DictReader(response.content.decode('utf-8').splitlines(), dialect='unix')
484         #, fieldnames='domain,severity,reject_media,reject_reports,public_comment,obfuscate'
485         # DEBUG: print(f"DEBUG: reader[]={type(reader)}")
486         for row in reader:
487             if not validators.domain(row["#domain"]):
488                 print(f"WARNING: domain='{row['#domain']}' is not a valid domain - skipped!")
489                 continue
490             elif blacklist.is_blacklisted(row["#domain"]):
491                 print(f"WARNING: domain='{row['#domain']}' is blacklisted - skipped!")
492                 continue
493             elif instances.is_registered(row["#domain"]):
494                 # DEBUG: print(f"DEBUG: domain='{row['#domain']}' is already registered - skipped!")
495                 continue
496
497             try:
498                 print(f"INFO: Fetching instances for instane='{row['#domain']}' ...")
499                 federation.fetch_instances(row["#domain"], None, None, inspect.currentframe().f_code.co_name)
500             except network.exceptions as exception:
501                 print(f"WARNING: Exception '{type(exception)}' during fetching instances from domain='{row['#domain']}'")
502                 instances.update_last_error(row["#domain"], exception)
503
504     # DEBUG: print("DEBUG: EXIT!")