]> git.mxchange.org Git - fba.git/blob - fba/commands.py
Continued:
[fba.git] / fba / commands.py
1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
3 #
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
8 #
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 # GNU Affero General Public License for more details.
13 #
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program.  If not, see <https://www.gnu.org/licenses/>.
16
17 import argparse
18 import atoma
19 import bs4
20 import csv
21 import inspect
22 import itertools
23 import json
24 import markdown
25 import re
26 import reqto
27 import sys
28 import time
29 import validators
30
31 from fba import blacklist
32 from fba import blocks
33 from fba import boot
34 from fba import config
35 from fba import fba
36 from fba import instances
37 from fba import network
38
39 from fba.federation import *
40
41 def check_instance(args: argparse.Namespace) -> int:
42     # DEBUG: print(f"DEBUG: args.domain='{args.domain}' - CALLED!")
43     status = 0
44     if not validators.domain(args.domain):
45         print(f"WARNING: args.domain='{args.domain}' is not valid")
46         status = 100
47     elif blacklist.is_blacklisted(args.domain):
48         print(f"WARNING: args.domain='{args.domain}' is blacklisted")
49         status = 101
50     elif instances.is_registered(args.domain):
51         print(f"WARNING: args.domain='{args.domain}' is already registered")
52         status = 102
53     else:
54         print(f"INFO: args.domain='{args.domain}' is not known")
55
56     # DEBUG: print(f"DEBUG: status={status} - EXIT!")
57     return status
58
59 def fetch_bkali(args: argparse.Namespace):
60     # DEBUG: print(f"DEBUG: args[]={type(args)} - CALLED!")
61     domains = list()
62     try:
63         fetched = network.post_json_api("gql.api.bka.li", "/v1/graphql", json.dumps({
64             "query": "query domainlist {nodeinfo(order_by: {domain: asc}) {domain}}"
65         }))
66
67         # DEBUG: print(f"DEBUG: fetched({len(fetched)})[]='{type(fetched)}'")
68         if len(fetched) == 0:
69             raise Exception("WARNING: Returned no records")
70         elif not "data" in fetched:
71             raise Exception(f"WARNING: fetched()={len(fetched)} does not contain key 'data'")
72         elif not "nodeinfo" in fetched["data"]:
73             raise Exception(f"WARNING: fetched()={len(fetched['data'])} does not contain key 'nodeinfo'")
74
75         for entry in fetched["data"]["nodeinfo"]:
76             # DEBUG: print(f"DEBUG: entry['{type(entry)}']='{entry}'")
77             if not "domain" in entry:
78                 print(f"WARNING: entry()={len(entry)} does not contain 'domain' - SKIPPED!")
79                 continue
80             elif not validators.domain(entry["domain"]):
81                 print(f"WARNING: domain='{entry['domain']}' is not a valid domain - SKIPPED!")
82                 continue
83             elif blacklist.is_blacklisted(entry["domain"]):
84                 # DEBUG: print(f"DEBUG: domain='{entry['domain']}' is blacklisted - SKIPPED!")
85                 continue
86             elif instances.is_registered(entry["domain"]):
87                 # DEBUG: print(f"DEBUG: domain='{entry['domain']}' is already registered - SKIPPED!")
88                 continue
89
90             # DEBUG: print(f"DEBUG: Adding domain='{entry['domain']}' ...")
91             domains.append(entry["domain"])
92
93     except BaseException as exception:
94         print(f"ERROR: Cannot fetch graphql,exception[{type(exception)}]:'{str(exception)}'")
95         sys.exit(255)
96
97     # DEBUG: print(f"DEBUG: domains()={len(domains)}")
98     if len(domains) > 0:
99         boot.acquire_lock()
100
101         print(f"INFO: Adding {len(domains)} new instances ...")
102         for domain in domains:
103             print(f"INFO: Fetching instances from domain='{domain}' ...")
104             fba.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
105
106     # DEBUG: print("DEBUG: EXIT!")
107
108 def fetch_blocks(args: argparse.Namespace):
109     # DEBUG: print(f"DEBUG: args[]={type(args)} - CALLED!")
110     if args.domain is not None and args.domain != "":
111         # DEBUG: print(f"DEBUG: args.domain='{args.domain}' - checking ...")
112         if not validators.domain(args.domain):
113             print(f"WARNING: domain='{args.domain}' is not valid.")
114             return
115         elif blacklist.is_blacklisted(args.domain):
116             print(f"WARNING: domain='{args.domain}' is blacklisted, won't check it!")
117             return
118         elif not instances.is_registered(args.domain):
119             print(f"WARNING: domain='{args.domain}' is not registered, please run ./fba.py fetch_instances {args.domain} first.")
120             return
121
122     boot.acquire_lock()
123
124     if args.domain is not None and args.domain != "":
125         # Re-check single domain
126         fba.cursor.execute(
127             "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'bookwyrm', 'takahe') AND domain = ?", [args.domain]
128         )
129     else:
130         # Re-check after "timeout" (aka. minimum interval)
131         fba.cursor.execute(
132             "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'bookwyrm', 'takahe') AND (last_blocked IS NULL OR last_blocked < ?) ORDER BY rowid DESC", [time.time() - config.get("recheck_block")]
133         )
134
135     rows = fba.cursor.fetchall()
136     print(f"INFO: Checking {len(rows)} entries ...")
137     for blocker, software, origin, nodeinfo_url in rows:
138         # DEBUG: print("DEBUG: BEFORE blocker,software,origin,nodeinfo_url:", blocker, software, origin, nodeinfo_url)
139         blockdict = list()
140         blocker = fba.tidyup_domain(blocker)
141         # DEBUG: print("DEBUG: AFTER blocker,software:", blocker, software)
142
143         if blocker == "":
144             print("WARNING: blocker is now empty!")
145             continue
146         elif blacklist.is_blacklisted(blocker):
147             print(f"WARNING: blocker='{blocker}' is blacklisted now!")
148             continue
149
150         # DEBUG: print(f"DEBUG: blocker='{blocker}'")
151         instances.update_last_blocked(blocker)
152
153         if software == "pleroma":
154             print(f"INFO: blocker='{blocker}',software='{software}'")
155             pleroma.fetch_blocks(blocker, origin, nodeinfo_url)
156         elif software == "mastodon":
157             print(f"INFO: blocker='{blocker}',software='{software}'")
158             mastodon.fetch_blocks(blocker, origin, nodeinfo_url)
159         elif software == "friendica" or software == "misskey":
160             print(f"INFO: blocker='{blocker}',software='{software}'")
161             try:
162                 if software == "friendica":
163                     json = friendica.fetch_blocks(blocker)
164                 elif software == "misskey":
165                     json = misskey.fetch_blocks(blocker)
166
167                 print(f"INFO: Checking {len(json.items())} entries from blocker='{blocker}',software='{software}' ...")
168                 for block_level, blocklist in json.items():
169                     # DEBUG: print("DEBUG: blocker,block_level,blocklist():", blocker, block_level, len(blocklist))
170                     block_level = fba.tidyup_domain(block_level)
171                     # DEBUG: print("DEBUG: AFTER-block_level:", block_level)
172                     if block_level == "":
173                         print("WARNING: block_level is empty, blocker:", blocker)
174                         continue
175
176                     # DEBUG: print(f"DEBUG: Checking {len(blocklist)} entries from blocker='{blocker}',software='{software}',block_level='{block_level}' ...")
177                     for block in blocklist:
178                         blocked, reason = block.values()
179                         # DEBUG: print(f"DEBUG: blocked='{blocked}',reason='{reason}' - BEFORE!")
180                         blocked = fba.tidyup_domain(blocked)
181                         reason  = fba.tidyup_reason(reason) if reason is not None and reason != "" else None
182                         # DEBUG: print(f"DEBUG: blocked='{blocked}',reason='{reason}' - AFTER!")
183
184                         if blocked == "":
185                             print("WARNING: blocked is empty:", blocker)
186                             continue
187                         elif blacklist.is_blacklisted(blocked):
188                             # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - skipping!")
189                             continue
190                         elif blocked.count("*") > 0:
191                             # Some friendica servers also obscure domains without hash
192                             fba.cursor.execute(
193                                 "SELECT domain, origin, nodeinfo_url FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", [blocked.replace("*", "_")]
194                             )
195
196                             searchres = fba.cursor.fetchone()
197
198                             if searchres is None:
199                                 print(f"WARNING: Cannot deobsfucate blocked='{blocked}' - SKIPPED!")
200                                 continue
201
202                             blocked = searchres[0]
203                             origin = searchres[1]
204                             nodeinfo_url = searchres[2]
205                         elif blocked.count("?") > 0:
206                             # Some obscure them with question marks, not sure if that's dependent on version or not
207                             fba.cursor.execute(
208                                 "SELECT domain, origin, nodeinfo_url FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", [blocked.replace("?", "_")]
209                             )
210
211                             searchres = fba.cursor.fetchone()
212
213                             if searchres is None:
214                                 print(f"WARNING: Cannot deobsfucate blocked='{blocked}' - SKIPPED!")
215                                 continue
216
217                             blocked = searchres[0]
218                             origin = searchres[1]
219                             nodeinfo_url = searchres[2]
220                         elif not validators.domain(blocked):
221                             print(f"WARNING: blocked='{blocked}',software='{software}' is not a valid domain name - skipped!")
222                             continue
223
224                         # DEBUG: print("DEBUG: Looking up instance by domain:", blocked)
225                         if not validators.domain(blocked):
226                             print(f"WARNING: blocked='{blocked}',software='{software}' is not a valid domain name - skipped!")
227                             continue
228                         elif not instances.is_registered(blocked):
229                             # DEBUG: print("DEBUG: Hash wasn't found, adding:", blocked, blocker)
230                             instances.add(blocked, blocker, inspect.currentframe().f_code.co_name, nodeinfo_url)
231
232                         if not blocks.is_instance_blocked(blocker, blocked, block_level):
233                             blocks.add_instance(blocker, blocked, reason, block_level)
234
235                             if block_level == "reject":
236                                 blockdict.append({
237                                     "blocked": blocked,
238                                     "reason" : reason
239                                 })
240                         else:
241                             # DEBUG: print(f"DEBUG: Updating block last seen and reason for blocker='{blocker}',blocked='{blocked}' ...")
242                             blocks.update_last_seen(blocker, blocked, block_level)
243                             blocks.update_reason(reason, blocker, blocked, block_level)
244
245                 # DEBUG: print("DEBUG: Committing changes ...")
246                 fba.connection.commit()
247             except BaseException as exception:
248                 print(f"ERROR: blocker='{blocker}',software='{software}',exception[{type(exception)}]:'{str(exception)}'")
249         else:
250             print("WARNING: Unknown software:", blocker, software)
251
252         if config.get("bot_enabled") and len(blockdict) > 0:
253             network.send_bot_post(blocker, blockdict)
254
255         blockdict = []
256
257     # DEBUG: print("DEBUG: EXIT!")
258
259 def fetch_cs(args: argparse.Namespace):
260     # DEBUG: print(f"DEBUG: args[]={type(args)} - CALLED!")
261     extensions = [
262         'extra',
263         'abbr',
264         'attr_list',
265         'def_list',
266         'fenced_code',
267         'footnotes',
268         'md_in_html',
269         'admonition',
270         'codehilite',
271         'legacy_attrs',
272         'legacy_em',
273         'meta',
274         'nl2br',
275         'sane_lists',
276         'smarty',
277         'toc',
278         'wikilinks'
279     ]
280
281     domains = {
282         "silenced": list(),
283         "reject"  : list(),
284     }
285
286     try:
287         raw = fba.fetch_url("https://raw.githubusercontent.com/chaossocial/meta/master/federation.md", network.headers, (config.get("connection_timeout"), config.get("read_timeout"))).text
288         # DEBUG: print(f"DEBUG: raw()={len(raw)}[]={type(raw)}")
289
290         doc = bs4.BeautifulSoup(markdown.markdown(raw, extensions=extensions), features='html.parser')
291
292         # DEBUG: print(f"DEBUG: doc()={len(doc)}[]={type(doc)}")
293         silenced = doc.find("h2", {"id": "silenced-instances"}).findNext("table").find("tbody")
294         # DEBUG: print(f"DEBUG: silenced[]={type(silenced)}")
295         domains["silenced"] = domains["silenced"] + fba.find_domains(silenced)
296
297         blocked = doc.find("h2", {"id": "blocked-instances"}).findNext("table").find("tbody")
298         # DEBUG: print(f"DEBUG: blocked[]={type(blocked)}")
299         domains["reject"] = domains["reject"] + fba.find_domains(blocked)
300
301     except BaseException as exception:
302         print(f"ERROR: Cannot fetch from meta.chaos.social,exception[{type(exception)}]:'{str(exception)}'")
303         sys.exit(255)
304
305     # DEBUG: print(f"DEBUG: domains()={len(domains)}")
306     if len(domains) > 0:
307         boot.acquire_lock()
308
309         print(f"INFO: Adding {len(domains)} new instances ...")
310         for block_level in domains:
311             # DEBUG: print(f"DEBUG: block_level='{block_level}'")
312
313             for row in domains[block_level]:
314                 # DEBUG: print(f"DEBUG: row='{row}'")
315                 if not blocks.is_instance_blocked('chaos.social', row["domain"], block_level):
316                     # DEBUG: print(f"DEBUG: domain='{row['domain']}',block_level='{block_level}' blocked by chaos.social, adding ...")
317                     blocks.add_instance('chaos.social', row["domain"], row["reason"], block_level)
318
319                 if not instances.is_registered(row["domain"]):
320                     print(f"INFO: Fetching instances from domain='{row['domain']}' ...")
321                     fba.fetch_instances(row["domain"], 'chaos.social', None, inspect.currentframe().f_code.co_name)
322
323         # DEBUG: print("DEBUG: Committing changes ...")
324         fba.connection.commit()
325
326     # DEBUG: print("DEBUG: EXIT!")
327
328 def fetch_fba_rss(args: argparse.Namespace):
329     # DEBUG: print(f"DEBUG: args[]={type(args)} - CALLED!")
330     domains = list()
331
332     try:
333         print(f"INFO: Fetch FBA-specific RSS args.feed='{args.feed}' ...")
334         response = fba.fetch_url(args.feed, network.headers, (config.get("connection_timeout"), config.get("read_timeout")))
335
336         # DEBUG: print(f"DEBUG: response.ok={response.ok},response.status_code='{response.status_code}',response.text()={len(response.text)}")
337         if response.ok and response.status_code < 300 and len(response.text) > 0:
338             # DEBUG: print(f"DEBUG: Parsing RSS feed ...")
339             rss = atoma.parse_rss_bytes(response.content)
340
341             # DEBUG: print(f"DEBUG: rss[]={type(rss)}")
342             for item in rss.items:
343                 # DEBUG: print(f"DEBUG: item={item}")
344                 domain = item.link.split("=")[1]
345
346                 if blacklist.is_blacklisted(domain):
347                     # DEBUG: print(f"DEBUG: domain='{domain}' is blacklisted - SKIPPED!")
348                     continue
349                 elif domain in domains:
350                     # DEBUG: print(f"DEBUG: domain='{domain}' is already added - SKIPPED!")
351                     continue
352                 elif instances.is_registered(domain):
353                     # DEBUG: print(f"DEBUG: domain='{domain}' is already registered - SKIPPED!")
354                     continue
355
356                 # DEBUG: print(f"DEBUG: Adding domain='{domain}'")
357                 domains.append(domain)
358
359     except BaseException as exception:
360         print(f"ERROR: Cannot fetch feed='{feed}',exception[{type(exception)}]:'{str(exception)}'")
361         sys.exit(255)
362
363     # DEBUG: print(f"DEBUG: domains()={len(domains)}")
364     if len(domains) > 0:
365         boot.acquire_lock()
366
367         print(f"INFO: Adding {len(domains)} new instances ...")
368         for domain in domains:
369             print(f"INFO: Fetching instances from domain='{domain}' ...")
370             fba.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
371
372     # DEBUG: print("DEBUG: EXIT!")
373
374 def fetch_fbabot_atom(args: argparse.Namespace):
375     # DEBUG: print(f"DEBUG: args[]={type(args)} - CALLED!")
376     feed = "https://ryona.agency/users/fba/feed.atom"
377
378     domains = list()
379     try:
380         print(f"INFO: Fetching ATOM feed='{feed}' from FBA bot account ...")
381         response = fba.fetch_url(feed, network.headers, (config.get("connection_timeout"), config.get("read_timeout")))
382
383         # DEBUG: print(f"DEBUG: response.ok={response.ok},response.status_code='{response.status_code}',response.text()={len(response.text)}")
384         if response.ok and response.status_code < 300 and len(response.text) > 0:
385             # DEBUG: print(f"DEBUG: Parsing ATOM feed ...")
386             atom = atoma.parse_atom_bytes(response.content)
387
388             # DEBUG: print(f"DEBUG: atom[]={type(atom)}")
389             for entry in atom.entries:
390                 # DEBUG: print(f"DEBUG: entry[]={type(entry)}")
391                 doc = bs4.BeautifulSoup(entry.content.value, "html.parser")
392                 # DEBUG: print(f"DEBUG: doc[]={type(doc)}")
393                 for element in doc.findAll("a"):
394                     for href in element["href"].split(","):
395                         # DEBUG: print(f"DEBUG: href[{type(href)}]={href}")
396                         domain = fba.tidyup_domain(href)
397
398                         # DEBUG: print(f"DEBUG: domain='{domain}'")
399                         if blacklist.is_blacklisted(domain):
400                             # DEBUG: print(f"DEBUG: domain='{domain}' is blacklisted - SKIPPED!")
401                             continue
402                         elif domain in domains:
403                             # DEBUG: print(f"DEBUG: domain='{domain}' is already added - SKIPPED!")
404                             continue
405                         elif instances.is_registered(domain):
406                             # DEBUG: print(f"DEBUG: domain='{domain}' is already registered - SKIPPED!")
407                             continue
408
409                         # DEBUG: print(f"DEBUG: Adding domain='{domain}',domains()={len(domains)}")
410                         domains.append(domain)
411
412     except BaseException as exception:
413         print(f"ERROR: Cannot fetch feed='{feed}',exception[{type(exception)}]:'{str(exception)}'")
414         sys.exit(255)
415
416     # DEBUG: print(f"DEBUG: domains({len(domains)})={domains}")
417     if len(domains) > 0:
418         boot.acquire_lock()
419
420         print(f"INFO: Adding {len(domains)} new instances ...")
421         for domain in domains:
422             print(f"INFO: Fetching instances from domain='{domain}' ...")
423             fba.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
424
425     # DEBUG: print("DEBUG: EXIT!")
426
427 def fetch_instances(args: argparse.Namespace):
428     # DEBUG: print(f"DEBUG: args[]={type(args)} - CALLED!")
429     boot.acquire_lock()
430
431     # Initial fetch
432     fba.fetch_instances(args.domain, None, None, inspect.currentframe().f_code.co_name)
433
434     if args.single:
435         # DEBUG: print(f"DEBUG: Not fetching more instances - EXIT!")
436         return
437
438     # Loop through some instances
439     fba.cursor.execute(
440         "SELECT domain, origin, software, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'bookwyrm', 'takahe', 'lemmy') AND (last_instance_fetch IS NULL OR last_instance_fetch < ?) ORDER BY rowid DESC", [time.time() - config.get("recheck_instance")]
441     )
442
443     rows = fba.cursor.fetchall()
444     print(f"INFO: Checking {len(rows)} entries ...")
445     for row in rows:
446         # DEBUG: print("DEBUG: domain:", row[0])
447         if blacklist.is_blacklisted(row[0]):
448             print("WARNING: domain is blacklisted:", row[0])
449             continue
450
451         print(f"INFO: Fetching instances for instance '{row[0]}' ('{row[2]}') of origin='{row[1]}',nodeinfo_url='{row[3]}'")
452         fba.fetch_instances(row[0], row[1], row[2], inspect.currentframe().f_code.co_name, row[3])
453
454     # DEBUG: print("DEBUG: EXIT!")
455
456 def fetch_federater(args: argparse.Namespace):
457     # DEBUG: print(f"DEBUG: args[]={type(args)} - CALLED!")
458     boot.acquire_lock()
459
460     # Fetch this URL
461     response = fba.fetch_url("https://github.com/federater/blocks_recommended/raw/main/federater.csv", network.headers, (config.get("connection_timeout"), config.get("read_timeout")))
462     # DEBUG: print(f"DEBUG: response[]='{type(response)}'")
463     if response.ok and response.content != "":
464         # DEBUG: print(f"DEBUG: Fetched {len(response.content)} Bytes, parsing CSV ...")
465         #print(f"DEBUG: response.content={response.content}")
466         reader = csv.DictReader(response.content.decode('utf-8').splitlines(), dialect='unix')
467         #, fieldnames='domain,severity,reject_media,reject_reports,public_comment,obfuscate'
468         # DEBUG: print(f"DEBUG: reader[]={type(reader)}")
469         for row in reader:
470             if not validators.domain(row["#domain"]):
471                 print(f"WARNING: domain='{row['#domain']}' is not a valid domain - skipped!")
472                 continue
473             elif blacklist.is_blacklisted(row["#domain"]):
474                 print(f"WARNING: domain='{row['#domain']}' is blacklisted - skipped!")
475                 continue
476             elif instances.is_registered(row["#domain"]):
477                 # DEBUG: print(f"DEBUG: domain='{row['#domain']}' is already registered - skipped!")
478                 continue
479
480             print(f"INFO: Fetching instances for instane='{row['#domain']}' ...")
481             fba.fetch_instances(row["#domain"], None, None, inspect.currentframe().f_code.co_name)
482
483     # DEBUG: print("DEBUG: EXIT!")