]> git.mxchange.org Git - fba.git/blob - fba/commands.py
Continued:
[fba.git] / fba / commands.py
1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
3 #
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
8 #
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 # GNU Affero General Public License for more details.
13 #
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program.  If not, see <https://www.gnu.org/licenses/>.
16
17 import csv
18 import inspect
19 import json
20 import sys
21 import time
22
23 import argparse
24 import atoma
25 import bs4
26 import markdown
27 import validators
28
29 from fba import blacklist
30 from fba import blocks
31 from fba import config
32 from fba import federation
33 from fba import fba
34 from fba import instances
35 from fba import locking
36 from fba import network
37
38 from fba.helpers import tidyup
39
40 from fba.networks import friendica
41 from fba.networks import mastodon
42 from fba.networks import misskey
43 from fba.networks import pleroma
44
45 def check_instance(args: argparse.Namespace) -> int:
46     # DEBUG: print(f"DEBUG: args.domain='{args.domain}' - CALLED!")
47     status = 0
48     if not validators.domain(args.domain):
49         print(f"WARNING: args.domain='{args.domain}' is not valid")
50         status = 100
51     elif blacklist.is_blacklisted(args.domain):
52         print(f"WARNING: args.domain='{args.domain}' is blacklisted")
53         status = 101
54     elif instances.is_registered(args.domain):
55         print(f"WARNING: args.domain='{args.domain}' is already registered")
56         status = 102
57     else:
58         print(f"INFO: args.domain='{args.domain}' is not known")
59
60     # DEBUG: print(f"DEBUG: status={status} - EXIT!")
61     return status
62
63 def fetch_bkali(args: argparse.Namespace):
64     # DEBUG: print(f"DEBUG: args[]={type(args)} - CALLED!")
65     domains = list()
66     try:
67         fetched = network.post_json_api("gql.api.bka.li", "/v1/graphql", json.dumps({
68             "query": "query domainlist {nodeinfo(order_by: {domain: asc}) {domain}}"
69         }))
70
71         # DEBUG: print(f"DEBUG: fetched({len(fetched)})[]='{type(fetched)}'")
72         if len(fetched) == 0:
73             raise Exception("WARNING: Returned no records")
74         elif "data" not in fetched:
75             raise Exception(f"WARNING: fetched()={len(fetched)} does not contain key 'data'")
76         elif "nodeinfo" not in fetched["data"]:
77             raise Exception(f"WARNING: fetched()={len(fetched['data'])} does not contain key 'nodeinfo'")
78
79         for entry in fetched["data"]["nodeinfo"]:
80             # DEBUG: print(f"DEBUG: entry['{type(entry)}']='{entry}'")
81             if not "domain" in entry:
82                 print(f"WARNING: entry()={len(entry)} does not contain 'domain' - SKIPPED!")
83                 continue
84             elif not validators.domain(entry["domain"]):
85                 print(f"WARNING: domain='{entry['domain']}' is not a valid domain - SKIPPED!")
86                 continue
87             elif blacklist.is_blacklisted(entry["domain"]):
88                 # DEBUG: print(f"DEBUG: domain='{entry['domain']}' is blacklisted - SKIPPED!")
89                 continue
90             elif instances.is_registered(entry["domain"]):
91                 # DEBUG: print(f"DEBUG: domain='{entry['domain']}' is already registered - SKIPPED!")
92                 continue
93
94             # DEBUG: print(f"DEBUG: Adding domain='{entry['domain']}' ...")
95             domains.append(entry["domain"])
96
97     except BaseException as exception:
98         print(f"ERROR: Cannot fetch graphql,exception[{type(exception)}]:'{str(exception)}'")
99         sys.exit(255)
100
101     # DEBUG: print(f"DEBUG: domains()={len(domains)}")
102     if len(domains) > 0:
103         locking.acquire()
104
105         print(f"INFO: Adding {len(domains)} new instances ...")
106         for domain in domains:
107             print(f"INFO: Fetching instances from domain='{domain}' ...")
108             federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
109
110     # DEBUG: print("DEBUG: EXIT!")
111
112 def fetch_blocks(args: argparse.Namespace):
113     # DEBUG: print(f"DEBUG: args[]={type(args)} - CALLED!")
114     if args.domain is not None and args.domain != "":
115         # DEBUG: print(f"DEBUG: args.domain='{args.domain}' - checking ...")
116         if not validators.domain(args.domain):
117             print(f"WARNING: domain='{args.domain}' is not valid.")
118             return
119         elif blacklist.is_blacklisted(args.domain):
120             print(f"WARNING: domain='{args.domain}' is blacklisted, won't check it!")
121             return
122         elif not instances.is_registered(args.domain):
123             print(f"WARNING: domain='{args.domain}' is not registered, please run ./fba.py fetch_instances {args.domain} first.")
124             return
125
126     locking.acquire()
127
128     if args.domain is not None and args.domain != "":
129         # Re-check single domain
130         fba.cursor.execute(
131             "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'bookwyrm', 'takahe') AND domain = ?", [args.domain]
132         )
133     else:
134         # Re-check after "timeout" (aka. minimum interval)
135         fba.cursor.execute(
136             "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'bookwyrm', 'takahe') AND (last_blocked IS NULL OR last_blocked < ?) ORDER BY rowid DESC", [time.time() - config.get("recheck_block")]
137         )
138
139     rows = fba.cursor.fetchall()
140     print(f"INFO: Checking {len(rows)} entries ...")
141     for blocker, software, origin, nodeinfo_url in rows:
142         # DEBUG: print("DEBUG: BEFORE blocker,software,origin,nodeinfo_url:", blocker, software, origin, nodeinfo_url)
143         blockdict = list()
144         blocker = tidyup.domain(blocker)
145         # DEBUG: print("DEBUG: AFTER blocker,software:", blocker, software)
146
147         if blocker == "":
148             print("WARNING: blocker is now empty!")
149             continue
150         elif blacklist.is_blacklisted(blocker):
151             print(f"WARNING: blocker='{blocker}' is blacklisted now!")
152             continue
153
154         # DEBUG: print(f"DEBUG: blocker='{blocker}'")
155         instances.update_last_blocked(blocker)
156
157         if software == "pleroma":
158             print(f"INFO: blocker='{blocker}',software='{software}'")
159             pleroma.fetch_blocks(blocker, origin, nodeinfo_url)
160         elif software == "mastodon":
161             print(f"INFO: blocker='{blocker}',software='{software}'")
162             mastodon.fetch_blocks(blocker, origin, nodeinfo_url)
163         elif software == "friendica" or software == "misskey":
164             print(f"INFO: blocker='{blocker}',software='{software}'")
165             if software == "friendica":
166                 rows = friendica.fetch_blocks(blocker)
167             elif software == "misskey":
168                 rows = misskey.fetch_blocks(blocker)
169
170             print(f"INFO: Checking {len(rows.items())} entries from blocker='{blocker}',software='{software}' ...")
171             for block_level, blocklist in rows.items():
172                 # DEBUG: print("DEBUG: blocker,block_level,blocklist():", blocker, block_level, len(blocklist))
173                 block_level = tidyup.domain(block_level)
174                 # DEBUG: print("DEBUG: AFTER-block_level:", block_level)
175                 if block_level == "":
176                     print("WARNING: block_level is empty, blocker:", blocker)
177                     continue
178
179                 # DEBUG: print(f"DEBUG: Checking {len(blocklist)} entries from blocker='{blocker}',software='{software}',block_level='{block_level}' ...")
180                 for block in blocklist:
181                     blocked, reason = block.values()
182                     # DEBUG: print(f"DEBUG: blocked='{blocked}',reason='{reason}' - BEFORE!")
183                     blocked = tidyup.domain(blocked)
184                     reason  = tidyup.reason(reason) if reason is not None and reason != "" else None
185                     # DEBUG: print(f"DEBUG: blocked='{blocked}',reason='{reason}' - AFTER!")
186
187                     if blocked == "":
188                         print("WARNING: blocked is empty:", blocker)
189                         continue
190                     elif blacklist.is_blacklisted(blocked):
191                         # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - skipping!")
192                         continue
193                     elif blocked.count("*") > 0:
194                         # Some friendica servers also obscure domains without hash
195                         fba.cursor.execute(
196                             "SELECT domain, origin, nodeinfo_url FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", [blocked.replace("*", "_")]
197                         )
198
199                         searchres = fba.cursor.fetchone()
200
201                         if searchres is None:
202                             print(f"WARNING: Cannot deobsfucate blocked='{blocked}' - SKIPPED!")
203                             continue
204
205                         blocked = searchres[0]
206                         origin = searchres[1]
207                         nodeinfo_url = searchres[2]
208                     elif blocked.count("?") > 0:
209                         # Some obscure them with question marks, not sure if that's dependent on version or not
210                         fba.cursor.execute(
211                             "SELECT domain, origin, nodeinfo_url FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", [blocked.replace("?", "_")]
212                         )
213
214                         searchres = fba.cursor.fetchone()
215
216                         if searchres is None:
217                             print(f"WARNING: Cannot deobsfucate blocked='{blocked}' - SKIPPED!")
218                             continue
219
220                         blocked = searchres[0]
221                         origin = searchres[1]
222                         nodeinfo_url = searchres[2]
223                     elif not validators.domain(blocked):
224                         print(f"WARNING: blocked='{blocked}',software='{software}' is not a valid domain name - skipped!")
225                         continue
226
227                     # DEBUG: print("DEBUG: Looking up instance by domain:", blocked)
228                     if not validators.domain(blocked):
229                         print(f"WARNING: blocked='{blocked}',software='{software}' is not a valid domain name - skipped!")
230                         continue
231                     elif not instances.is_registered(blocked):
232                         # DEBUG: print("DEBUG: Hash wasn't found, adding:", blocked, blocker)
233                         instances.add(blocked, blocker, inspect.currentframe().f_code.co_name, nodeinfo_url)
234
235                     if not blocks.is_instance_blocked(blocker, blocked, block_level):
236                         blocks.add_instance(blocker, blocked, reason, block_level)
237
238                         if block_level == "reject":
239                             blockdict.append({
240                                 "blocked": blocked,
241                                 "reason" : reason
242                             })
243                     else:
244                         # DEBUG: print(f"DEBUG: Updating block last seen and reason for blocker='{blocker}',blocked='{blocked}' ...")
245                         blocks.update_last_seen(blocker, blocked, block_level)
246                         blocks.update_reason(reason, blocker, blocked, block_level)
247
248             # DEBUG: print("DEBUG: Committing changes ...")
249             fba.connection.commit()
250         else:
251             print("WARNING: Unknown software:", blocker, software)
252
253         if config.get("bot_enabled") and len(blockdict) > 0:
254             network.send_bot_post(blocker, blockdict)
255
256         blockdict = []
257
258     # DEBUG: print("DEBUG: EXIT!")
259
260 def fetch_cs(args: argparse.Namespace):
261     # DEBUG: print(f"DEBUG: args[]={type(args)} - CALLED!")
262     extensions = [
263         'extra',
264         'abbr',
265         'attr_list',
266         'def_list',
267         'fenced_code',
268         'footnotes',
269         'md_in_html',
270         'admonition',
271         'codehilite',
272         'legacy_attrs',
273         'legacy_em',
274         'meta',
275         'nl2br',
276         'sane_lists',
277         'smarty',
278         'toc',
279         'wikilinks'
280     ]
281
282     domains = {
283         "silenced": list(),
284         "reject"  : list(),
285     }
286
287     raw = fba.fetch_url("https://raw.githubusercontent.com/chaossocial/meta/master/federation.md", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))).text
288     # DEBUG: print(f"DEBUG: raw()={len(raw)}[]={type(raw)}")
289
290     doc = bs4.BeautifulSoup(markdown.markdown(raw, extensions=extensions), features='html.parser')
291
292     # DEBUG: print(f"DEBUG: doc()={len(doc)}[]={type(doc)}")
293     silenced = doc.find("h2", {"id": "silenced-instances"}).findNext("table").find("tbody")
294     # DEBUG: print(f"DEBUG: silenced[]={type(silenced)}")
295     domains["silenced"] = domains["silenced"] + federation.find_domains(silenced)
296
297     blocked = doc.find("h2", {"id": "blocked-instances"}).findNext("table").find("tbody")
298     # DEBUG: print(f"DEBUG: blocked[]={type(blocked)}")
299     domains["reject"] = domains["reject"] + federation.find_domains(blocked)
300
301     # DEBUG: print(f"DEBUG: domains()={len(domains)}")
302     if len(domains) > 0:
303         locking.acquire()
304
305         print(f"INFO: Adding {len(domains)} new instances ...")
306         for block_level in domains:
307             # DEBUG: print(f"DEBUG: block_level='{block_level}'")
308
309             for row in domains[block_level]:
310                 # DEBUG: print(f"DEBUG: row='{row}'")
311                 if not blocks.is_instance_blocked('chaos.social', row["domain"], block_level):
312                     # DEBUG: print(f"DEBUG: domain='{row['domain']}',block_level='{block_level}' blocked by chaos.social, adding ...")
313                     blocks.add_instance('chaos.social', row["domain"], row["reason"], block_level)
314
315                 if not instances.is_registered(row["domain"]):
316                     print(f"INFO: Fetching instances from domain='{row['domain']}' ...")
317                     federation.fetch_instances(row["domain"], 'chaos.social', None, inspect.currentframe().f_code.co_name)
318
319         # DEBUG: print("DEBUG: Committing changes ...")
320         fba.connection.commit()
321
322     # DEBUG: print("DEBUG: EXIT!")
323
324 def fetch_fba_rss(args: argparse.Namespace):
325     # DEBUG: print(f"DEBUG: args[]={type(args)} - CALLED!")
326     domains = list()
327
328     print(f"INFO: Fetch FBA-specific RSS args.feed='{args.feed}' ...")
329     response = fba.fetch_url(args.feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
330
331     # DEBUG: print(f"DEBUG: response.ok={response.ok},response.status_code='{response.status_code}',response.text()={len(response.text)}")
332     if response.ok and response.status_code < 300 and len(response.text) > 0:
333         # DEBUG: print(f"DEBUG: Parsing RSS feed ({len(response.text)} Bytes) ...")
334         rss = atoma.parse_rss_bytes(response.content)
335
336         # DEBUG: print(f"DEBUG: rss[]={type(rss)}")
337         for item in rss.items:
338             # DEBUG: print(f"DEBUG: item={item}")
339             domain = item.link.split("=")[1]
340
341             if blacklist.is_blacklisted(domain):
342                 # DEBUG: print(f"DEBUG: domain='{domain}' is blacklisted - SKIPPED!")
343                 continue
344             elif domain in domains:
345                 # DEBUG: print(f"DEBUG: domain='{domain}' is already added - SKIPPED!")
346                 continue
347             elif instances.is_registered(domain):
348                 # DEBUG: print(f"DEBUG: domain='{domain}' is already registered - SKIPPED!")
349                 continue
350
351             # DEBUG: print(f"DEBUG: Adding domain='{domain}'")
352             domains.append(domain)
353
354     # DEBUG: print(f"DEBUG: domains()={len(domains)}")
355     if len(domains) > 0:
356         locking.acquire()
357
358         print(f"INFO: Adding {len(domains)} new instances ...")
359         for domain in domains:
360             print(f"INFO: Fetching instances from domain='{domain}' ...")
361             federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
362
363     # DEBUG: print("DEBUG: EXIT!")
364
365 def fetch_fbabot_atom(args: argparse.Namespace):
366     # DEBUG: print(f"DEBUG: args[]={type(args)} - CALLED!")
367     feed = "https://ryona.agency/users/fba/feed.atom"
368
369     domains = list()
370
371     print(f"INFO: Fetching ATOM feed='{feed}' from FBA bot account ...")
372     response = fba.fetch_url(feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
373
374     # DEBUG: print(f"DEBUG: response.ok={response.ok},response.status_code='{response.status_code}',response.text()={len(response.text)}")
375     if response.ok and response.status_code < 300 and len(response.text) > 0:
376         # DEBUG: print(f"DEBUG: Parsing ATOM feed ({len(response.text)} Bytes) ...")
377         atom = atoma.parse_atom_bytes(response.content)
378
379         # DEBUG: print(f"DEBUG: atom[]={type(atom)}")
380         for entry in atom.entries:
381             # DEBUG: print(f"DEBUG: entry[]={type(entry)}")
382             doc = bs4.BeautifulSoup(entry.content.value, "html.parser")
383             # DEBUG: print(f"DEBUG: doc[]={type(doc)}")
384             for element in doc.findAll("a"):
385                 for href in element["href"].split(","):
386                     # DEBUG: print(f"DEBUG: href[{type(href)}]={href}")
387                     domain = tidyup.domain(href)
388
389                     # DEBUG: print(f"DEBUG: domain='{domain}'")
390                     if blacklist.is_blacklisted(domain):
391                         # DEBUG: print(f"DEBUG: domain='{domain}' is blacklisted - SKIPPED!")
392                         continue
393                     elif domain in domains:
394                         # DEBUG: print(f"DEBUG: domain='{domain}' is already added - SKIPPED!")
395                         continue
396                     elif instances.is_registered(domain):
397                         # DEBUG: print(f"DEBUG: domain='{domain}' is already registered - SKIPPED!")
398                         continue
399
400                     # DEBUG: print(f"DEBUG: Adding domain='{domain}',domains()={len(domains)}")
401                     domains.append(domain)
402
403     # DEBUG: print(f"DEBUG: domains({len(domains)})={domains}")
404     if len(domains) > 0:
405         locking.acquire()
406
407         print(f"INFO: Adding {len(domains)} new instances ...")
408         for domain in domains:
409             print(f"INFO: Fetching instances from domain='{domain}' ...")
410             federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
411
412     # DEBUG: print("DEBUG: EXIT!")
413
414 def fetch_instances(args: argparse.Namespace):
415     # DEBUG: print(f"DEBUG: args[]={type(args)} - CALLED!")
416     locking.acquire()
417
418     # Initial fetch
419     federation.fetch_instances(args.domain, None, None, inspect.currentframe().f_code.co_name)
420
421     if args.single:
422         # DEBUG: print("DEBUG: Not fetching more instances - EXIT!")
423         return
424
425     # Loop through some instances
426     fba.cursor.execute(
427         "SELECT domain, origin, software, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'bookwyrm', 'takahe', 'lemmy') AND (last_instance_fetch IS NULL OR last_instance_fetch < ?) ORDER BY rowid DESC", [time.time() - config.get("recheck_instance")]
428     )
429
430     rows = fba.cursor.fetchall()
431     print(f"INFO: Checking {len(rows)} entries ...")
432     for row in rows:
433         # DEBUG: print(f"DEBUG: domain='{row[0]}'")
434         if blacklist.is_blacklisted(row[0]):
435             print("WARNING: domain is blacklisted:", row[0])
436             continue
437
438         print(f"INFO: Fetching instances for instance '{row[0]}' ('{row[2]}') of origin='{row[1]}',nodeinfo_url='{row[3]}'")
439         federation.fetch_instances(row[0], row[1], row[2], inspect.currentframe().f_code.co_name, row[3])
440
441     # DEBUG: print("DEBUG: EXIT!")
442
443 def fetch_federater(args: argparse.Namespace):
444     # DEBUG: print(f"DEBUG: args[]={type(args)} - CALLED!")
445     locking.acquire()
446
447     # Fetch this URL
448     response = fba.fetch_url("https://github.com/federater/blocks_recommended/raw/main/federater.csv", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
449     # DEBUG: print(f"DEBUG: response[]='{type(response)}'")
450     if response.ok and response.content != "":
451         # DEBUG: print(f"DEBUG: Fetched {len(response.content)} Bytes, parsing CSV ...")
452         ## DEBUG: print(f"DEBUG: response.content={response.content}")
453         reader = csv.DictReader(response.content.decode('utf-8').splitlines(), dialect='unix')
454         #, fieldnames='domain,severity,reject_media,reject_reports,public_comment,obfuscate'
455         # DEBUG: print(f"DEBUG: reader[]={type(reader)}")
456         for row in reader:
457             if not validators.domain(row["#domain"]):
458                 print(f"WARNING: domain='{row['#domain']}' is not a valid domain - skipped!")
459                 continue
460             elif blacklist.is_blacklisted(row["#domain"]):
461                 print(f"WARNING: domain='{row['#domain']}' is blacklisted - skipped!")
462                 continue
463             elif instances.is_registered(row["#domain"]):
464                 # DEBUG: print(f"DEBUG: domain='{row['#domain']}' is already registered - skipped!")
465                 continue
466
467             print(f"INFO: Fetching instances for instane='{row['#domain']}' ...")
468             federation.fetch_instances(row["#domain"], None, None, inspect.currentframe().f_code.co_name)
469
470     # DEBUG: print("DEBUG: EXIT!")