]> git.mxchange.org Git - fba.git/blob - fba/commands.py
Continued:
[fba.git] / fba / commands.py
1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
3 #
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
8 #
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 # GNU Affero General Public License for more details.
13 #
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program.  If not, see <https://www.gnu.org/licenses/>.
16
17 import csv
18 import inspect
19 import json
20 import sys
21 import time
22
23 import argparse
24 import atoma
25 import bs4
26 import markdown
27 import validators
28
29 from fba import blacklist
30 from fba import blocks
31 from fba import boot
32 from fba import config
33 from fba import fba
34 from fba import instances
35 from fba import network
36
37 from fba.federation import *
38
39 def check_instance(args: argparse.Namespace) -> int:
40     # DEBUG: print(f"DEBUG: args.domain='{args.domain}' - CALLED!")
41     status = 0
42     if not validators.domain(args.domain):
43         print(f"WARNING: args.domain='{args.domain}' is not valid")
44         status = 100
45     elif blacklist.is_blacklisted(args.domain):
46         print(f"WARNING: args.domain='{args.domain}' is blacklisted")
47         status = 101
48     elif instances.is_registered(args.domain):
49         print(f"WARNING: args.domain='{args.domain}' is already registered")
50         status = 102
51     else:
52         print(f"INFO: args.domain='{args.domain}' is not known")
53
54     # DEBUG: print(f"DEBUG: status={status} - EXIT!")
55     return status
56
57 def fetch_bkali(args: argparse.Namespace):
58     # DEBUG: print(f"DEBUG: args[]={type(args)} - CALLED!")
59     domains = list()
60     try:
61         fetched = network.post_json_api("gql.api.bka.li", "/v1/graphql", json.dumps({
62             "query": "query domainlist {nodeinfo(order_by: {domain: asc}) {domain}}"
63         }))
64
65         # DEBUG: print(f"DEBUG: fetched({len(fetched)})[]='{type(fetched)}'")
66         if len(fetched) == 0:
67             raise Exception("WARNING: Returned no records")
68         elif not "data" in fetched:
69             raise Exception(f"WARNING: fetched()={len(fetched)} does not contain key 'data'")
70         elif not "nodeinfo" in fetched["data"]:
71             raise Exception(f"WARNING: fetched()={len(fetched['data'])} does not contain key 'nodeinfo'")
72
73         for entry in fetched["data"]["nodeinfo"]:
74             # DEBUG: print(f"DEBUG: entry['{type(entry)}']='{entry}'")
75             if not "domain" in entry:
76                 print(f"WARNING: entry()={len(entry)} does not contain 'domain' - SKIPPED!")
77                 continue
78             elif not validators.domain(entry["domain"]):
79                 print(f"WARNING: domain='{entry['domain']}' is not a valid domain - SKIPPED!")
80                 continue
81             elif blacklist.is_blacklisted(entry["domain"]):
82                 # DEBUG: print(f"DEBUG: domain='{entry['domain']}' is blacklisted - SKIPPED!")
83                 continue
84             elif instances.is_registered(entry["domain"]):
85                 # DEBUG: print(f"DEBUG: domain='{entry['domain']}' is already registered - SKIPPED!")
86                 continue
87
88             # DEBUG: print(f"DEBUG: Adding domain='{entry['domain']}' ...")
89             domains.append(entry["domain"])
90
91     except BaseException as exception:
92         print(f"ERROR: Cannot fetch graphql,exception[{type(exception)}]:'{str(exception)}'")
93         sys.exit(255)
94
95     # DEBUG: print(f"DEBUG: domains()={len(domains)}")
96     if len(domains) > 0:
97         boot.acquire_lock()
98
99         print(f"INFO: Adding {len(domains)} new instances ...")
100         for domain in domains:
101             print(f"INFO: Fetching instances from domain='{domain}' ...")
102             fba.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
103
104     # DEBUG: print("DEBUG: EXIT!")
105
106 def fetch_blocks(args: argparse.Namespace):
107     # DEBUG: print(f"DEBUG: args[]={type(args)} - CALLED!")
108     if args.domain is not None and args.domain != "":
109         # DEBUG: print(f"DEBUG: args.domain='{args.domain}' - checking ...")
110         if not validators.domain(args.domain):
111             print(f"WARNING: domain='{args.domain}' is not valid.")
112             return
113         elif blacklist.is_blacklisted(args.domain):
114             print(f"WARNING: domain='{args.domain}' is blacklisted, won't check it!")
115             return
116         elif not instances.is_registered(args.domain):
117             print(f"WARNING: domain='{args.domain}' is not registered, please run ./fba.py fetch_instances {args.domain} first.")
118             return
119
120     boot.acquire_lock()
121
122     if args.domain is not None and args.domain != "":
123         # Re-check single domain
124         fba.cursor.execute(
125             "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'bookwyrm', 'takahe') AND domain = ?", [args.domain]
126         )
127     else:
128         # Re-check after "timeout" (aka. minimum interval)
129         fba.cursor.execute(
130             "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'bookwyrm', 'takahe') AND (last_blocked IS NULL OR last_blocked < ?) ORDER BY rowid DESC", [time.time() - config.get("recheck_block")]
131         )
132
133     rows = fba.cursor.fetchall()
134     print(f"INFO: Checking {len(rows)} entries ...")
135     for blocker, software, origin, nodeinfo_url in rows:
136         # DEBUG: print("DEBUG: BEFORE blocker,software,origin,nodeinfo_url:", blocker, software, origin, nodeinfo_url)
137         blockdict = list()
138         blocker = fba.tidyup_domain(blocker)
139         # DEBUG: print("DEBUG: AFTER blocker,software:", blocker, software)
140
141         if blocker == "":
142             print("WARNING: blocker is now empty!")
143             continue
144         elif blacklist.is_blacklisted(blocker):
145             print(f"WARNING: blocker='{blocker}' is blacklisted now!")
146             continue
147
148         # DEBUG: print(f"DEBUG: blocker='{blocker}'")
149         instances.update_last_blocked(blocker)
150
151         if software == "pleroma":
152             print(f"INFO: blocker='{blocker}',software='{software}'")
153             pleroma.fetch_blocks(blocker, origin, nodeinfo_url)
154         elif software == "mastodon":
155             print(f"INFO: blocker='{blocker}',software='{software}'")
156             mastodon.fetch_blocks(blocker, origin, nodeinfo_url)
157         elif software == "friendica" or software == "misskey":
158             print(f"INFO: blocker='{blocker}',software='{software}'")
159             try:
160                 if software == "friendica":
161                     rows = friendica.fetch_blocks(blocker)
162                 elif software == "misskey":
163                     rows = misskey.fetch_blocks(blocker)
164
165                 print(f"INFO: Checking {len(rows.items())} entries from blocker='{blocker}',software='{software}' ...")
166                 for block_level, blocklist in rows.items():
167                     # DEBUG: print("DEBUG: blocker,block_level,blocklist():", blocker, block_level, len(blocklist))
168                     block_level = fba.tidyup_domain(block_level)
169                     # DEBUG: print("DEBUG: AFTER-block_level:", block_level)
170                     if block_level == "":
171                         print("WARNING: block_level is empty, blocker:", blocker)
172                         continue
173
174                     # DEBUG: print(f"DEBUG: Checking {len(blocklist)} entries from blocker='{blocker}',software='{software}',block_level='{block_level}' ...")
175                     for block in blocklist:
176                         blocked, reason = block.values()
177                         # DEBUG: print(f"DEBUG: blocked='{blocked}',reason='{reason}' - BEFORE!")
178                         blocked = fba.tidyup_domain(blocked)
179                         reason  = fba.tidyup_reason(reason) if reason is not None and reason != "" else None
180                         # DEBUG: print(f"DEBUG: blocked='{blocked}',reason='{reason}' - AFTER!")
181
182                         if blocked == "":
183                             print("WARNING: blocked is empty:", blocker)
184                             continue
185                         elif blacklist.is_blacklisted(blocked):
186                             # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - skipping!")
187                             continue
188                         elif blocked.count("*") > 0:
189                             # Some friendica servers also obscure domains without hash
190                             fba.cursor.execute(
191                                 "SELECT domain, origin, nodeinfo_url FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", [blocked.replace("*", "_")]
192                             )
193
194                             searchres = fba.cursor.fetchone()
195
196                             if searchres is None:
197                                 print(f"WARNING: Cannot deobsfucate blocked='{blocked}' - SKIPPED!")
198                                 continue
199
200                             blocked = searchres[0]
201                             origin = searchres[1]
202                             nodeinfo_url = searchres[2]
203                         elif blocked.count("?") > 0:
204                             # Some obscure them with question marks, not sure if that's dependent on version or not
205                             fba.cursor.execute(
206                                 "SELECT domain, origin, nodeinfo_url FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", [blocked.replace("?", "_")]
207                             )
208
209                             searchres = fba.cursor.fetchone()
210
211                             if searchres is None:
212                                 print(f"WARNING: Cannot deobsfucate blocked='{blocked}' - SKIPPED!")
213                                 continue
214
215                             blocked = searchres[0]
216                             origin = searchres[1]
217                             nodeinfo_url = searchres[2]
218                         elif not validators.domain(blocked):
219                             print(f"WARNING: blocked='{blocked}',software='{software}' is not a valid domain name - skipped!")
220                             continue
221
222                         # DEBUG: print("DEBUG: Looking up instance by domain:", blocked)
223                         if not validators.domain(blocked):
224                             print(f"WARNING: blocked='{blocked}',software='{software}' is not a valid domain name - skipped!")
225                             continue
226                         elif not instances.is_registered(blocked):
227                             # DEBUG: print("DEBUG: Hash wasn't found, adding:", blocked, blocker)
228                             instances.add(blocked, blocker, inspect.currentframe().f_code.co_name, nodeinfo_url)
229
230                         if not blocks.is_instance_blocked(blocker, blocked, block_level):
231                             blocks.add_instance(blocker, blocked, reason, block_level)
232
233                             if block_level == "reject":
234                                 blockdict.append({
235                                     "blocked": blocked,
236                                     "reason" : reason
237                                 })
238                         else:
239                             # DEBUG: print(f"DEBUG: Updating block last seen and reason for blocker='{blocker}',blocked='{blocked}' ...")
240                             blocks.update_last_seen(blocker, blocked, block_level)
241                             blocks.update_reason(reason, blocker, blocked, block_level)
242
243                 # DEBUG: print("DEBUG: Committing changes ...")
244                 fba.connection.commit()
245             except BaseException as exception:
246                 print(f"ERROR: blocker='{blocker}',software='{software}',exception[{type(exception)}]:'{str(exception)}'")
247         else:
248             print("WARNING: Unknown software:", blocker, software)
249
250         if config.get("bot_enabled") and len(blockdict) > 0:
251             network.send_bot_post(blocker, blockdict)
252
253         blockdict = []
254
255     # DEBUG: print("DEBUG: EXIT!")
256
257 def fetch_cs(args: argparse.Namespace):
258     # DEBUG: print(f"DEBUG: args[]={type(args)} - CALLED!")
259     extensions = [
260         'extra',
261         'abbr',
262         'attr_list',
263         'def_list',
264         'fenced_code',
265         'footnotes',
266         'md_in_html',
267         'admonition',
268         'codehilite',
269         'legacy_attrs',
270         'legacy_em',
271         'meta',
272         'nl2br',
273         'sane_lists',
274         'smarty',
275         'toc',
276         'wikilinks'
277     ]
278
279     domains = {
280         "silenced": list(),
281         "reject"  : list(),
282     }
283
284     try:
285         raw = fba.fetch_url("https://raw.githubusercontent.com/chaossocial/meta/master/federation.md", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))).text
286         # DEBUG: print(f"DEBUG: raw()={len(raw)}[]={type(raw)}")
287
288         doc = bs4.BeautifulSoup(markdown.markdown(raw, extensions=extensions), features='html.parser')
289
290         # DEBUG: print(f"DEBUG: doc()={len(doc)}[]={type(doc)}")
291         silenced = doc.find("h2", {"id": "silenced-instances"}).findNext("table").find("tbody")
292         # DEBUG: print(f"DEBUG: silenced[]={type(silenced)}")
293         domains["silenced"] = domains["silenced"] + fba.find_domains(silenced)
294
295         blocked = doc.find("h2", {"id": "blocked-instances"}).findNext("table").find("tbody")
296         # DEBUG: print(f"DEBUG: blocked[]={type(blocked)}")
297         domains["reject"] = domains["reject"] + fba.find_domains(blocked)
298
299     except BaseException as exception:
300         print(f"ERROR: Cannot fetch from meta.chaos.social,exception[{type(exception)}]:'{str(exception)}'")
301         sys.exit(255)
302
303     # DEBUG: print(f"DEBUG: domains()={len(domains)}")
304     if len(domains) > 0:
305         boot.acquire_lock()
306
307         print(f"INFO: Adding {len(domains)} new instances ...")
308         for block_level in domains:
309             # DEBUG: print(f"DEBUG: block_level='{block_level}'")
310
311             for row in domains[block_level]:
312                 # DEBUG: print(f"DEBUG: row='{row}'")
313                 if not blocks.is_instance_blocked('chaos.social', row["domain"], block_level):
314                     # DEBUG: print(f"DEBUG: domain='{row['domain']}',block_level='{block_level}' blocked by chaos.social, adding ...")
315                     blocks.add_instance('chaos.social', row["domain"], row["reason"], block_level)
316
317                 if not instances.is_registered(row["domain"]):
318                     print(f"INFO: Fetching instances from domain='{row['domain']}' ...")
319                     fba.fetch_instances(row["domain"], 'chaos.social', None, inspect.currentframe().f_code.co_name)
320
321         # DEBUG: print("DEBUG: Committing changes ...")
322         fba.connection.commit()
323
324     # DEBUG: print("DEBUG: EXIT!")
325
326 def fetch_fba_rss(args: argparse.Namespace):
327     # DEBUG: print(f"DEBUG: args[]={type(args)} - CALLED!")
328     domains = list()
329
330     try:
331         print(f"INFO: Fetch FBA-specific RSS args.feed='{args.feed}' ...")
332         response = fba.fetch_url(args.feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
333
334         # DEBUG: print(f"DEBUG: response.ok={response.ok},response.status_code='{response.status_code}',response.text()={len(response.text)}")
335         if response.ok and response.status_code < 300 and len(response.text) > 0:
336             # DEBUG: print(f"DEBUG: Parsing RSS feed ...")
337             rss = atoma.parse_rss_bytes(response.content)
338
339             # DEBUG: print(f"DEBUG: rss[]={type(rss)}")
340             for item in rss.items:
341                 # DEBUG: print(f"DEBUG: item={item}")
342                 domain = item.link.split("=")[1]
343
344                 if blacklist.is_blacklisted(domain):
345                     # DEBUG: print(f"DEBUG: domain='{domain}' is blacklisted - SKIPPED!")
346                     continue
347                 elif domain in domains:
348                     # DEBUG: print(f"DEBUG: domain='{domain}' is already added - SKIPPED!")
349                     continue
350                 elif instances.is_registered(domain):
351                     # DEBUG: print(f"DEBUG: domain='{domain}' is already registered - SKIPPED!")
352                     continue
353
354                 # DEBUG: print(f"DEBUG: Adding domain='{domain}'")
355                 domains.append(domain)
356
357     except BaseException as exception:
358         print(f"ERROR: Cannot fetch args.feed='{args.feed}',exception[{type(exception)}]:'{str(exception)}'")
359         sys.exit(255)
360
361     # DEBUG: print(f"DEBUG: domains()={len(domains)}")
362     if len(domains) > 0:
363         boot.acquire_lock()
364
365         print(f"INFO: Adding {len(domains)} new instances ...")
366         for domain in domains:
367             print(f"INFO: Fetching instances from domain='{domain}' ...")
368             fba.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
369
370     # DEBUG: print("DEBUG: EXIT!")
371
372 def fetch_fbabot_atom(args: argparse.Namespace):
373     # DEBUG: print(f"DEBUG: args[]={type(args)} - CALLED!")
374     feed = "https://ryona.agency/users/fba/feed.atom"
375
376     domains = list()
377     try:
378         print(f"INFO: Fetching ATOM feed='{feed}' from FBA bot account ...")
379         response = fba.fetch_url(feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
380
381         # DEBUG: print(f"DEBUG: response.ok={response.ok},response.status_code='{response.status_code}',response.text()={len(response.text)}")
382         if response.ok and response.status_code < 300 and len(response.text) > 0:
383             # DEBUG: print(f"DEBUG: Parsing ATOM feed ...")
384             atom = atoma.parse_atom_bytes(response.content)
385
386             # DEBUG: print(f"DEBUG: atom[]={type(atom)}")
387             for entry in atom.entries:
388                 # DEBUG: print(f"DEBUG: entry[]={type(entry)}")
389                 doc = bs4.BeautifulSoup(entry.content.value, "html.parser")
390                 # DEBUG: print(f"DEBUG: doc[]={type(doc)}")
391                 for element in doc.findAll("a"):
392                     for href in element["href"].split(","):
393                         # DEBUG: print(f"DEBUG: href[{type(href)}]={href}")
394                         domain = fba.tidyup_domain(href)
395
396                         # DEBUG: print(f"DEBUG: domain='{domain}'")
397                         if blacklist.is_blacklisted(domain):
398                             # DEBUG: print(f"DEBUG: domain='{domain}' is blacklisted - SKIPPED!")
399                             continue
400                         elif domain in domains:
401                             # DEBUG: print(f"DEBUG: domain='{domain}' is already added - SKIPPED!")
402                             continue
403                         elif instances.is_registered(domain):
404                             # DEBUG: print(f"DEBUG: domain='{domain}' is already registered - SKIPPED!")
405                             continue
406
407                         # DEBUG: print(f"DEBUG: Adding domain='{domain}',domains()={len(domains)}")
408                         domains.append(domain)
409
410     except BaseException as exception:
411         print(f"ERROR: Cannot fetch feed='{feed}',exception[{type(exception)}]:'{str(exception)}'")
412         sys.exit(255)
413
414     # DEBUG: print(f"DEBUG: domains({len(domains)})={domains}")
415     if len(domains) > 0:
416         boot.acquire_lock()
417
418         print(f"INFO: Adding {len(domains)} new instances ...")
419         for domain in domains:
420             print(f"INFO: Fetching instances from domain='{domain}' ...")
421             fba.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
422
423     # DEBUG: print("DEBUG: EXIT!")
424
425 def fetch_instances(args: argparse.Namespace):
426     # DEBUG: print(f"DEBUG: args[]={type(args)} - CALLED!")
427     boot.acquire_lock()
428
429     # Initial fetch
430     fba.fetch_instances(args.domain, None, None, inspect.currentframe().f_code.co_name)
431
432     if args.single:
433         # DEBUG: print(f"DEBUG: Not fetching more instances - EXIT!")
434         return
435
436     # Loop through some instances
437     fba.cursor.execute(
438         "SELECT domain, origin, software, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'bookwyrm', 'takahe', 'lemmy') AND (last_instance_fetch IS NULL OR last_instance_fetch < ?) ORDER BY rowid DESC", [time.time() - config.get("recheck_instance")]
439     )
440
441     rows = fba.cursor.fetchall()
442     print(f"INFO: Checking {len(rows)} entries ...")
443     for row in rows:
444         # DEBUG: print("DEBUG: domain:", row[0])
445         if blacklist.is_blacklisted(row[0]):
446             print("WARNING: domain is blacklisted:", row[0])
447             continue
448
449         print(f"INFO: Fetching instances for instance '{row[0]}' ('{row[2]}') of origin='{row[1]}',nodeinfo_url='{row[3]}'")
450         fba.fetch_instances(row[0], row[1], row[2], inspect.currentframe().f_code.co_name, row[3])
451
452     # DEBUG: print("DEBUG: EXIT!")
453
454 def fetch_federater(args: argparse.Namespace):
455     # DEBUG: print(f"DEBUG: args[]={type(args)} - CALLED!")
456     boot.acquire_lock()
457
458     # Fetch this URL
459     response = fba.fetch_url("https://github.com/federater/blocks_recommended/raw/main/federater.csv", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
460     # DEBUG: print(f"DEBUG: response[]='{type(response)}'")
461     if response.ok and response.content != "":
462         # DEBUG: print(f"DEBUG: Fetched {len(response.content)} Bytes, parsing CSV ...")
463         #print(f"DEBUG: response.content={response.content}")
464         reader = csv.DictReader(response.content.decode('utf-8').splitlines(), dialect='unix')
465         #, fieldnames='domain,severity,reject_media,reject_reports,public_comment,obfuscate'
466         # DEBUG: print(f"DEBUG: reader[]={type(reader)}")
467         for row in reader:
468             if not validators.domain(row["#domain"]):
469                 print(f"WARNING: domain='{row['#domain']}' is not a valid domain - skipped!")
470                 continue
471             elif blacklist.is_blacklisted(row["#domain"]):
472                 print(f"WARNING: domain='{row['#domain']}' is blacklisted - skipped!")
473                 continue
474             elif instances.is_registered(row["#domain"]):
475                 # DEBUG: print(f"DEBUG: domain='{row['#domain']}' is already registered - skipped!")
476                 continue
477
478             print(f"INFO: Fetching instances for instane='{row['#domain']}' ...")
479             fba.fetch_instances(row["#domain"], None, None, inspect.currentframe().f_code.co_name)
480
481     # DEBUG: print("DEBUG: EXIT!")