]> git.mxchange.org Git - fba.git/blob - fba/commands.py
WIP:
[fba.git] / fba / commands.py
1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
3 #
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
8 #
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 # GNU Affero General Public License for more details.
13 #
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program.  If not, see <https://www.gnu.org/licenses/>.
16
17 import csv
18 import inspect
19 import json
20 import sys
21 import time
22
23 import argparse
24 import atoma
25 import bs4
26 import markdown
27 import validators
28
29 from fba import blacklist
30 from fba import blocks
31 from fba import boot
32 from fba import config
33 from fba import fba
34 from fba import instances
35 from fba import network
36
37 from fba.federation import *
38
39 def check_instance(args: argparse.Namespace) -> int:
40     # DEBUG: print(f"DEBUG: args.domain='{args.domain}' - CALLED!")
41     status = 0
42     if not validators.domain(args.domain):
43         print(f"WARNING: args.domain='{args.domain}' is not valid")
44         status = 100
45     elif blacklist.is_blacklisted(args.domain):
46         print(f"WARNING: args.domain='{args.domain}' is blacklisted")
47         status = 101
48     elif instances.is_registered(args.domain):
49         print(f"WARNING: args.domain='{args.domain}' is already registered")
50         status = 102
51     else:
52         print(f"INFO: args.domain='{args.domain}' is not known")
53
54     # DEBUG: print(f"DEBUG: status={status} - EXIT!")
55     return status
56
57 def fetch_bkali(args: argparse.Namespace):
58     # DEBUG: print(f"DEBUG: args[]={type(args)} - CALLED!")
59     domains = list()
60     try:
61         fetched = network.post_json_api("gql.api.bka.li", "/v1/graphql", json.dumps({
62             "query": "query domainlist {nodeinfo(order_by: {domain: asc}) {domain}}"
63         }))
64
65         # DEBUG: print(f"DEBUG: fetched({len(fetched)})[]='{type(fetched)}'")
66         if len(fetched) == 0:
67             raise Exception("WARNING: Returned no records")
68         elif not "data" in fetched:
69             raise Exception(f"WARNING: fetched()={len(fetched)} does not contain key 'data'")
70         elif not "nodeinfo" in fetched["data"]:
71             raise Exception(f"WARNING: fetched()={len(fetched['data'])} does not contain key 'nodeinfo'")
72
73         for entry in fetched["data"]["nodeinfo"]:
74             # DEBUG: print(f"DEBUG: entry['{type(entry)}']='{entry}'")
75             if not "domain" in entry:
76                 print(f"WARNING: entry()={len(entry)} does not contain 'domain' - SKIPPED!")
77                 continue
78             elif not validators.domain(entry["domain"]):
79                 print(f"WARNING: domain='{entry['domain']}' is not a valid domain - SKIPPED!")
80                 continue
81             elif blacklist.is_blacklisted(entry["domain"]):
82                 # DEBUG: print(f"DEBUG: domain='{entry['domain']}' is blacklisted - SKIPPED!")
83                 continue
84             elif instances.is_registered(entry["domain"]):
85                 # DEBUG: print(f"DEBUG: domain='{entry['domain']}' is already registered - SKIPPED!")
86                 continue
87
88             # DEBUG: print(f"DEBUG: Adding domain='{entry['domain']}' ...")
89             domains.append(entry["domain"])
90
91     except BaseException as exception:
92         print(f"ERROR: Cannot fetch graphql,exception[{type(exception)}]:'{str(exception)}'")
93         sys.exit(255)
94
95     # DEBUG: print(f"DEBUG: domains()={len(domains)}")
96     if len(domains) > 0:
97         boot.acquire_lock()
98
99         print(f"INFO: Adding {len(domains)} new instances ...")
100         for domain in domains:
101             print(f"INFO: Fetching instances from domain='{domain}' ...")
102             fba.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
103
104     # DEBUG: print("DEBUG: EXIT!")
105
106 def fetch_blocks(args: argparse.Namespace):
107     # DEBUG: print(f"DEBUG: args[]={type(args)} - CALLED!")
108     if args.domain is not None and args.domain != "":
109         # DEBUG: print(f"DEBUG: args.domain='{args.domain}' - checking ...")
110         if not validators.domain(args.domain):
111             print(f"WARNING: domain='{args.domain}' is not valid.")
112             return
113         elif blacklist.is_blacklisted(args.domain):
114             print(f"WARNING: domain='{args.domain}' is blacklisted, won't check it!")
115             return
116         elif not instances.is_registered(args.domain):
117             print(f"WARNING: domain='{args.domain}' is not registered, please run ./fba.py fetch_instances {args.domain} first.")
118             return
119
120     boot.acquire_lock()
121
122     if args.domain is not None and args.domain != "":
123         # Re-check single domain
124         fba.cursor.execute(
125             "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'bookwyrm', 'takahe') AND domain = ?", [args.domain]
126         )
127     else:
128         # Re-check after "timeout" (aka. minimum interval)
129         fba.cursor.execute(
130             "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'bookwyrm', 'takahe') AND (last_blocked IS NULL OR last_blocked < ?) ORDER BY rowid DESC", [time.time() - config.get("recheck_block")]
131         )
132
133     rows = fba.cursor.fetchall()
134     print(f"INFO: Checking {len(rows)} entries ...")
135     for blocker, software, origin, nodeinfo_url in rows:
136         # DEBUG: print("DEBUG: BEFORE blocker,software,origin,nodeinfo_url:", blocker, software, origin, nodeinfo_url)
137         blockdict = list()
138         blocker = fba.tidyup_domain(blocker)
139         # DEBUG: print("DEBUG: AFTER blocker,software:", blocker, software)
140
141         if blocker == "":
142             print("WARNING: blocker is now empty!")
143             continue
144         elif blacklist.is_blacklisted(blocker):
145             print(f"WARNING: blocker='{blocker}' is blacklisted now!")
146             continue
147
148         # DEBUG: print(f"DEBUG: blocker='{blocker}'")
149         instances.update_last_blocked(blocker)
150
151         if software == "pleroma":
152             print(f"INFO: blocker='{blocker}',software='{software}'")
153             pleroma.fetch_blocks(blocker, origin, nodeinfo_url)
154         elif software == "mastodon":
155             print(f"INFO: blocker='{blocker}',software='{software}'")
156             mastodon.fetch_blocks(blocker, origin, nodeinfo_url)
157         elif software == "friendica" or software == "misskey":
158             print(f"INFO: blocker='{blocker}',software='{software}'")
159             if software == "friendica":
160                 rows = friendica.fetch_blocks(blocker)
161             elif software == "misskey":
162                 rows = misskey.fetch_blocks(blocker)
163
164             print(f"INFO: Checking {len(rows.items())} entries from blocker='{blocker}',software='{software}' ...")
165             for block_level, blocklist in rows.items():
166                 # DEBUG: print("DEBUG: blocker,block_level,blocklist():", blocker, block_level, len(blocklist))
167                 block_level = fba.tidyup_domain(block_level)
168                 # DEBUG: print("DEBUG: AFTER-block_level:", block_level)
169                 if block_level == "":
170                     print("WARNING: block_level is empty, blocker:", blocker)
171                     continue
172
173                 # DEBUG: print(f"DEBUG: Checking {len(blocklist)} entries from blocker='{blocker}',software='{software}',block_level='{block_level}' ...")
174                 for block in blocklist:
175                     blocked, reason = block.values()
176                     # DEBUG: print(f"DEBUG: blocked='{blocked}',reason='{reason}' - BEFORE!")
177                     blocked = fba.tidyup_domain(blocked)
178                     reason  = fba.tidyup_reason(reason) if reason is not None and reason != "" else None
179                     # DEBUG: print(f"DEBUG: blocked='{blocked}',reason='{reason}' - AFTER!")
180
181                     if blocked == "":
182                         print("WARNING: blocked is empty:", blocker)
183                         continue
184                     elif blacklist.is_blacklisted(blocked):
185                         # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - skipping!")
186                         continue
187                     elif blocked.count("*") > 0:
188                         # Some friendica servers also obscure domains without hash
189                         fba.cursor.execute(
190                             "SELECT domain, origin, nodeinfo_url FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", [blocked.replace("*", "_")]
191                         )
192
193                         searchres = fba.cursor.fetchone()
194
195                         if searchres is None:
196                             print(f"WARNING: Cannot deobsfucate blocked='{blocked}' - SKIPPED!")
197                             continue
198
199                         blocked = searchres[0]
200                         origin = searchres[1]
201                         nodeinfo_url = searchres[2]
202                     elif blocked.count("?") > 0:
203                         # Some obscure them with question marks, not sure if that's dependent on version or not
204                         fba.cursor.execute(
205                             "SELECT domain, origin, nodeinfo_url FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", [blocked.replace("?", "_")]
206                         )
207
208                         searchres = fba.cursor.fetchone()
209
210                         if searchres is None:
211                             print(f"WARNING: Cannot deobsfucate blocked='{blocked}' - SKIPPED!")
212                             continue
213
214                         blocked = searchres[0]
215                         origin = searchres[1]
216                         nodeinfo_url = searchres[2]
217                     elif not validators.domain(blocked):
218                         print(f"WARNING: blocked='{blocked}',software='{software}' is not a valid domain name - skipped!")
219                         continue
220
221                     # DEBUG: print("DEBUG: Looking up instance by domain:", blocked)
222                     if not validators.domain(blocked):
223                         print(f"WARNING: blocked='{blocked}',software='{software}' is not a valid domain name - skipped!")
224                         continue
225                     elif not instances.is_registered(blocked):
226                         # DEBUG: print("DEBUG: Hash wasn't found, adding:", blocked, blocker)
227                         instances.add(blocked, blocker, inspect.currentframe().f_code.co_name, nodeinfo_url)
228
229                     if not blocks.is_instance_blocked(blocker, blocked, block_level):
230                         blocks.add_instance(blocker, blocked, reason, block_level)
231
232                         if block_level == "reject":
233                             blockdict.append({
234                                 "blocked": blocked,
235                                 "reason" : reason
236                             })
237                     else:
238                         # DEBUG: print(f"DEBUG: Updating block last seen and reason for blocker='{blocker}',blocked='{blocked}' ...")
239                         blocks.update_last_seen(blocker, blocked, block_level)
240                         blocks.update_reason(reason, blocker, blocked, block_level)
241
242             # DEBUG: print("DEBUG: Committing changes ...")
243             fba.connection.commit()
244         else:
245             print("WARNING: Unknown software:", blocker, software)
246
247         if config.get("bot_enabled") and len(blockdict) > 0:
248             network.send_bot_post(blocker, blockdict)
249
250         blockdict = []
251
252     # DEBUG: print("DEBUG: EXIT!")
253
254 def fetch_cs(args: argparse.Namespace):
255     # DEBUG: print(f"DEBUG: args[]={type(args)} - CALLED!")
256     extensions = [
257         'extra',
258         'abbr',
259         'attr_list',
260         'def_list',
261         'fenced_code',
262         'footnotes',
263         'md_in_html',
264         'admonition',
265         'codehilite',
266         'legacy_attrs',
267         'legacy_em',
268         'meta',
269         'nl2br',
270         'sane_lists',
271         'smarty',
272         'toc',
273         'wikilinks'
274     ]
275
276     domains = {
277         "silenced": list(),
278         "reject"  : list(),
279     }
280
281     raw = fba.fetch_url("https://raw.githubusercontent.com/chaossocial/meta/master/federation.md", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))).text
282     # DEBUG: print(f"DEBUG: raw()={len(raw)}[]={type(raw)}")
283
284     doc = bs4.BeautifulSoup(markdown.markdown(raw, extensions=extensions), features='html.parser')
285
286     # DEBUG: print(f"DEBUG: doc()={len(doc)}[]={type(doc)}")
287     silenced = doc.find("h2", {"id": "silenced-instances"}).findNext("table").find("tbody")
288     # DEBUG: print(f"DEBUG: silenced[]={type(silenced)}")
289     domains["silenced"] = domains["silenced"] + fba.find_domains(silenced)
290
291     blocked = doc.find("h2", {"id": "blocked-instances"}).findNext("table").find("tbody")
292     # DEBUG: print(f"DEBUG: blocked[]={type(blocked)}")
293     domains["reject"] = domains["reject"] + fba.find_domains(blocked)
294
295     # DEBUG: print(f"DEBUG: domains()={len(domains)}")
296     if len(domains) > 0:
297         boot.acquire_lock()
298
299         print(f"INFO: Adding {len(domains)} new instances ...")
300         for block_level in domains:
301             # DEBUG: print(f"DEBUG: block_level='{block_level}'")
302
303             for row in domains[block_level]:
304                 # DEBUG: print(f"DEBUG: row='{row}'")
305                 if not blocks.is_instance_blocked('chaos.social', row["domain"], block_level):
306                     # DEBUG: print(f"DEBUG: domain='{row['domain']}',block_level='{block_level}' blocked by chaos.social, adding ...")
307                     blocks.add_instance('chaos.social', row["domain"], row["reason"], block_level)
308
309                 if not instances.is_registered(row["domain"]):
310                     print(f"INFO: Fetching instances from domain='{row['domain']}' ...")
311                     fba.fetch_instances(row["domain"], 'chaos.social', None, inspect.currentframe().f_code.co_name)
312
313         # DEBUG: print("DEBUG: Committing changes ...")
314         fba.connection.commit()
315
316     # DEBUG: print("DEBUG: EXIT!")
317
318 def fetch_fba_rss(args: argparse.Namespace):
319     # DEBUG: print(f"DEBUG: args[]={type(args)} - CALLED!")
320     domains = list()
321
322     print(f"INFO: Fetch FBA-specific RSS args.feed='{args.feed}' ...")
323     response = fba.fetch_url(args.feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
324
325     # DEBUG: print(f"DEBUG: response.ok={response.ok},response.status_code='{response.status_code}',response.text()={len(response.text)}")
326     if response.ok and response.status_code < 300 and len(response.text) > 0:
327         # DEBUG: print(f"DEBUG: Parsing RSS feed ({len(response.text)} Bytes) ...")
328         rss = atoma.parse_rss_bytes(response.content)
329
330         # DEBUG: print(f"DEBUG: rss[]={type(rss)}")
331         for item in rss.items:
332             # DEBUG: print(f"DEBUG: item={item}")
333             domain = item.link.split("=")[1]
334
335             if blacklist.is_blacklisted(domain):
336                 # DEBUG: print(f"DEBUG: domain='{domain}' is blacklisted - SKIPPED!")
337                 continue
338             elif domain in domains:
339                 # DEBUG: print(f"DEBUG: domain='{domain}' is already added - SKIPPED!")
340                 continue
341             elif instances.is_registered(domain):
342                 # DEBUG: print(f"DEBUG: domain='{domain}' is already registered - SKIPPED!")
343                 continue
344
345             # DEBUG: print(f"DEBUG: Adding domain='{domain}'")
346             domains.append(domain)
347
348     # DEBUG: print(f"DEBUG: domains()={len(domains)}")
349     if len(domains) > 0:
350         boot.acquire_lock()
351
352         print(f"INFO: Adding {len(domains)} new instances ...")
353         for domain in domains:
354             print(f"INFO: Fetching instances from domain='{domain}' ...")
355             fba.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
356
357     # DEBUG: print("DEBUG: EXIT!")
358
359 def fetch_fbabot_atom(args: argparse.Namespace):
360     # DEBUG: print(f"DEBUG: args[]={type(args)} - CALLED!")
361     feed = "https://ryona.agency/users/fba/feed.atom"
362
363     domains = list()
364
365     print(f"INFO: Fetching ATOM feed='{feed}' from FBA bot account ...")
366     response = fba.fetch_url(feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
367
368     # DEBUG: print(f"DEBUG: response.ok={response.ok},response.status_code='{response.status_code}',response.text()={len(response.text)}")
369     if response.ok and response.status_code < 300 and len(response.text) > 0:
370         # DEBUG: print(f"DEBUG: Parsing ATOM feed ({len(response.text)} Bytes) ...")
371         atom = atoma.parse_atom_bytes(response.content)
372
373         # DEBUG: print(f"DEBUG: atom[]={type(atom)}")
374         for entry in atom.entries:
375             # DEBUG: print(f"DEBUG: entry[]={type(entry)}")
376             doc = bs4.BeautifulSoup(entry.content.value, "html.parser")
377             # DEBUG: print(f"DEBUG: doc[]={type(doc)}")
378             for element in doc.findAll("a"):
379                 for href in element["href"].split(","):
380                     # DEBUG: print(f"DEBUG: href[{type(href)}]={href}")
381                     domain = fba.tidyup_domain(href)
382
383                     # DEBUG: print(f"DEBUG: domain='{domain}'")
384                     if blacklist.is_blacklisted(domain):
385                         # DEBUG: print(f"DEBUG: domain='{domain}' is blacklisted - SKIPPED!")
386                         continue
387                     elif domain in domains:
388                         # DEBUG: print(f"DEBUG: domain='{domain}' is already added - SKIPPED!")
389                         continue
390                     elif instances.is_registered(domain):
391                         # DEBUG: print(f"DEBUG: domain='{domain}' is already registered - SKIPPED!")
392                         continue
393
394                     # DEBUG: print(f"DEBUG: Adding domain='{domain}',domains()={len(domains)}")
395                     domains.append(domain)
396
397     # DEBUG: print(f"DEBUG: domains({len(domains)})={domains}")
398     if len(domains) > 0:
399         boot.acquire_lock()
400
401         print(f"INFO: Adding {len(domains)} new instances ...")
402         for domain in domains:
403             print(f"INFO: Fetching instances from domain='{domain}' ...")
404             fba.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
405
406     # DEBUG: print("DEBUG: EXIT!")
407
408 def fetch_instances(args: argparse.Namespace):
409     # DEBUG: print(f"DEBUG: args[]={type(args)} - CALLED!")
410     boot.acquire_lock()
411
412     # Initial fetch
413     fba.fetch_instances(args.domain, None, None, inspect.currentframe().f_code.co_name)
414
415     if args.single:
416         # DEBUG: print("DEBUG: Not fetching more instances - EXIT!")
417         return
418
419     # Loop through some instances
420     fba.cursor.execute(
421         "SELECT domain, origin, software, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'bookwyrm', 'takahe', 'lemmy') AND (last_instance_fetch IS NULL OR last_instance_fetch < ?) ORDER BY rowid DESC", [time.time() - config.get("recheck_instance")]
422     )
423
424     rows = fba.cursor.fetchall()
425     print(f"INFO: Checking {len(rows)} entries ...")
426     for row in rows:
427         # DEBUG: print(f"DEBUG: domain='{row[0]}'")
428         if blacklist.is_blacklisted(row[0]):
429             print("WARNING: domain is blacklisted:", row[0])
430             continue
431
432         print(f"INFO: Fetching instances for instance '{row[0]}' ('{row[2]}') of origin='{row[1]}',nodeinfo_url='{row[3]}'")
433         fba.fetch_instances(row[0], row[1], row[2], inspect.currentframe().f_code.co_name, row[3])
434
435     # DEBUG: print("DEBUG: EXIT!")
436
437 def fetch_federater(args: argparse.Namespace):
438     # DEBUG: print(f"DEBUG: args[]={type(args)} - CALLED!")
439     boot.acquire_lock()
440
441     # Fetch this URL
442     response = fba.fetch_url("https://github.com/federater/blocks_recommended/raw/main/federater.csv", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
443     # DEBUG: print(f"DEBUG: response[]='{type(response)}'")
444     if response.ok and response.content != "":
445         # DEBUG: print(f"DEBUG: Fetched {len(response.content)} Bytes, parsing CSV ...")
446         ## DEBUG: print(f"DEBUG: response.content={response.content}")
447         reader = csv.DictReader(response.content.decode('utf-8').splitlines(), dialect='unix')
448         #, fieldnames='domain,severity,reject_media,reject_reports,public_comment,obfuscate'
449         # DEBUG: print(f"DEBUG: reader[]={type(reader)}")
450         for row in reader:
451             if not validators.domain(row["#domain"]):
452                 print(f"WARNING: domain='{row['#domain']}' is not a valid domain - skipped!")
453                 continue
454             elif blacklist.is_blacklisted(row["#domain"]):
455                 print(f"WARNING: domain='{row['#domain']}' is blacklisted - skipped!")
456                 continue
457             elif instances.is_registered(row["#domain"]):
458                 # DEBUG: print(f"DEBUG: domain='{row['#domain']}' is already registered - skipped!")
459                 continue
460
461             print(f"INFO: Fetching instances for instane='{row['#domain']}' ...")
462             fba.fetch_instances(row["#domain"], None, None, inspect.currentframe().f_code.co_name)
463
464     # DEBUG: print("DEBUG: EXIT!")