]> git.mxchange.org Git - fba.git/blob - fba/commands.py
Continued:
[fba.git] / fba / commands.py
1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
3 #
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
8 #
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 # GNU Affero General Public License for more details.
13 #
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program.  If not, see <https://www.gnu.org/licenses/>.
16
17 import csv
18 import inspect
19 import json
20 import sys
21 import time
22
23 import argparse
24 import atoma
25 import bs4
26 import markdown
27 import reqto
28 import requests
29 import validators
30
31 from fba import blacklist
32 from fba import blocks
33 from fba import config
34 from fba import federation
35 from fba import fba
36 from fba import instances
37 from fba import locking
38 from fba import network
39
40 from fba.helpers import tidyup
41
42 from fba.networks import friendica
43 from fba.networks import mastodon
44 from fba.networks import misskey
45 from fba.networks import pleroma
46
47 def check_instance(args: argparse.Namespace) -> int:
48     # DEBUG: print(f"DEBUG: args.domain='{args.domain}' - CALLED!")
49     status = 0
50     if not validators.domain(args.domain):
51         print(f"WARNING: args.domain='{args.domain}' is not valid")
52         status = 100
53     elif blacklist.is_blacklisted(args.domain):
54         print(f"WARNING: args.domain='{args.domain}' is blacklisted")
55         status = 101
56     elif instances.is_registered(args.domain):
57         print(f"WARNING: args.domain='{args.domain}' is already registered")
58         status = 102
59     else:
60         print(f"INFO: args.domain='{args.domain}' is not known")
61
62     # DEBUG: print(f"DEBUG: status={status} - EXIT!")
63     return status
64
65 def fetch_bkali(args: argparse.Namespace):
66     # DEBUG: print(f"DEBUG: args[]={type(args)} - CALLED!")
67     domains = list()
68     try:
69         fetched = network.post_json_api("gql.api.bka.li", "/v1/graphql", json.dumps({
70             "query": "query domainlist {nodeinfo(order_by: {domain: asc}) {domain}}"
71         }))
72
73         # DEBUG: print(f"DEBUG: fetched({len(fetched)})[]='{type(fetched)}'")
74         if len(fetched) == 0:
75             raise Exception("WARNING: Returned no records")
76         elif "data" not in fetched:
77             raise Exception(f"WARNING: fetched()={len(fetched)} does not contain key 'data'")
78         elif "nodeinfo" not in fetched["data"]:
79             raise Exception(f"WARNING: fetched()={len(fetched['data'])} does not contain key 'nodeinfo'")
80
81         for entry in fetched["data"]["nodeinfo"]:
82             # DEBUG: print(f"DEBUG: entry['{type(entry)}']='{entry}'")
83             if not "domain" in entry:
84                 print(f"WARNING: entry()={len(entry)} does not contain 'domain' - SKIPPED!")
85                 continue
86             elif not validators.domain(entry["domain"]):
87                 print(f"WARNING: domain='{entry['domain']}' is not a valid domain - SKIPPED!")
88                 continue
89             elif blacklist.is_blacklisted(entry["domain"]):
90                 # DEBUG: print(f"DEBUG: domain='{entry['domain']}' is blacklisted - SKIPPED!")
91                 continue
92             elif instances.is_registered(entry["domain"]):
93                 # DEBUG: print(f"DEBUG: domain='{entry['domain']}' is already registered - SKIPPED!")
94                 continue
95
96             # DEBUG: print(f"DEBUG: Adding domain='{entry['domain']}' ...")
97             domains.append(entry["domain"])
98
99     except (requests.exceptions.Timeout, requests.exceptions.ConnectionError) as exception:
100         print(f"ERROR: Cannot fetch graphql,exception[{type(exception)}]:'{str(exception)}'")
101         sys.exit(255)
102
103     # DEBUG: print(f"DEBUG: domains()={len(domains)}")
104     if len(domains) > 0:
105         locking.acquire()
106
107         print(f"INFO: Adding {len(domains)} new instances ...")
108         for domain in domains:
109             try:
110                 print(f"INFO: Fetching instances from domain='{domain}' ...")
111                 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
112             except (requests.exceptions.Timeout, requests.exceptions.ConnectionError) as ex:
113                 print(f"WARNING: Timeout during fetching instances from domain='{domain}'")
114                 instances.update_last_error(domain, ex)
115
116     # DEBUG: print("DEBUG: EXIT!")
117
118 def fetch_blocks(args: argparse.Namespace):
119     # DEBUG: print(f"DEBUG: args[]={type(args)} - CALLED!")
120     if args.domain is not None and args.domain != "":
121         # DEBUG: print(f"DEBUG: args.domain='{args.domain}' - checking ...")
122         if not validators.domain(args.domain):
123             print(f"WARNING: domain='{args.domain}' is not valid.")
124             return
125         elif blacklist.is_blacklisted(args.domain):
126             print(f"WARNING: domain='{args.domain}' is blacklisted, won't check it!")
127             return
128         elif not instances.is_registered(args.domain):
129             print(f"WARNING: domain='{args.domain}' is not registered, please run ./fba.py fetch_instances {args.domain} first.")
130             return
131
132     locking.acquire()
133
134     if args.domain is not None and args.domain != "":
135         # Re-check single domain
136         fba.cursor.execute(
137             "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'bookwyrm', 'takahe') AND domain = ?", [args.domain]
138         )
139     else:
140         # Re-check after "timeout" (aka. minimum interval)
141         fba.cursor.execute(
142             "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'bookwyrm', 'takahe') AND (last_blocked IS NULL OR last_blocked < ?) ORDER BY rowid DESC", [time.time() - config.get("recheck_block")]
143         )
144
145     rows = fba.cursor.fetchall()
146     print(f"INFO: Checking {len(rows)} entries ...")
147     for blocker, software, origin, nodeinfo_url in rows:
148         # DEBUG: print("DEBUG: BEFORE blocker,software,origin,nodeinfo_url:", blocker, software, origin, nodeinfo_url)
149         blockdict = list()
150         blocker = tidyup.domain(blocker)
151         # DEBUG: print("DEBUG: AFTER blocker,software:", blocker, software)
152
153         if blocker == "":
154             print("WARNING: blocker is now empty!")
155             continue
156         elif blacklist.is_blacklisted(blocker):
157             print(f"WARNING: blocker='{blocker}' is blacklisted now!")
158             continue
159
160         # DEBUG: print(f"DEBUG: blocker='{blocker}'")
161         instances.update_last_blocked(blocker)
162
163         if software == "pleroma":
164             print(f"INFO: blocker='{blocker}',software='{software}'")
165             pleroma.fetch_blocks(blocker, origin, nodeinfo_url)
166         elif software == "mastodon":
167             print(f"INFO: blocker='{blocker}',software='{software}'")
168             mastodon.fetch_blocks(blocker, origin, nodeinfo_url)
169         elif software == "friendica" or software == "misskey":
170             print(f"INFO: blocker='{blocker}',software='{software}'")
171
172             blocking = list()
173             if software == "friendica":
174                 blocking = friendica.fetch_blocks(blocker)
175             elif software == "misskey":
176                 blocking = misskey.fetch_blocks(blocker)
177
178             print(f"INFO: Checking {len(blocking.items())} entries from blocker='{blocker}',software='{software}' ...")
179             for block_level, blocklist in blocking.items():
180                 # DEBUG: print("DEBUG: blocker,block_level,blocklist():", blocker, block_level, len(blocklist))
181                 block_level = tidyup.domain(block_level)
182                 # DEBUG: print("DEBUG: AFTER-block_level:", block_level)
183                 if block_level == "":
184                     print("WARNING: block_level is empty, blocker:", blocker)
185                     continue
186
187                 # DEBUG: print(f"DEBUG: Checking {len(blocklist)} entries from blocker='{blocker}',software='{software}',block_level='{block_level}' ...")
188                 for block in blocklist:
189                     blocked, reason = block.values()
190                     # DEBUG: print(f"DEBUG: blocked='{blocked}',reason='{reason}' - BEFORE!")
191                     blocked = tidyup.domain(blocked)
192                     reason  = tidyup.reason(reason) if reason is not None and reason != "" else None
193                     # DEBUG: print(f"DEBUG: blocked='{blocked}',reason='{reason}' - AFTER!")
194
195                     if blocked == "":
196                         print("WARNING: blocked is empty:", blocker)
197                         continue
198                     elif blacklist.is_blacklisted(blocked):
199                         # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - skipping!")
200                         continue
201                     elif blocked.count("*") > 0:
202                         # Some friendica servers also obscure domains without hash
203                         fba.cursor.execute(
204                             "SELECT domain, origin, nodeinfo_url FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", [blocked.replace("*", "_")]
205                         )
206
207                         searchres = fba.cursor.fetchone()
208
209                         if searchres is None:
210                             print(f"WARNING: Cannot deobsfucate blocked='{blocked}' - SKIPPED!")
211                             continue
212
213                         blocked      = searchres[0]
214                         origin       = searchres[1]
215                         nodeinfo_url = searchres[2]
216                     elif blocked.count("?") > 0:
217                         # Some obscure them with question marks, not sure if that's dependent on version or not
218                         fba.cursor.execute(
219                             "SELECT domain, origin, nodeinfo_url FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", [blocked.replace("?", "_")]
220                         )
221
222                         searchres = fba.cursor.fetchone()
223
224                         if searchres is None:
225                             print(f"WARNING: Cannot deobsfucate blocked='{blocked}' - SKIPPED!")
226                             continue
227
228                         blocked      = searchres[0]
229                         origin       = searchres[1]
230                         nodeinfo_url = searchres[2]
231                     elif not validators.domain(blocked):
232                         print(f"WARNING: blocked='{blocked}',software='{software}' is not a valid domain name - skipped!")
233                         continue
234
235                     # DEBUG: print("DEBUG: Looking up instance by domain:", blocked)
236                     if not validators.domain(blocked):
237                         print(f"WARNING: blocked='{blocked}',software='{software}' is not a valid domain name - skipped!")
238                         continue
239                     elif not instances.is_registered(blocked):
240                         # DEBUG: print("DEBUG: Hash wasn't found, adding:", blocked, blocker)
241                         instances.add(blocked, blocker, inspect.currentframe().f_code.co_name, nodeinfo_url)
242
243                     if not blocks.is_instance_blocked(blocker, blocked, block_level):
244                         blocks.add_instance(blocker, blocked, reason, block_level)
245
246                         if block_level == "reject":
247                             blockdict.append({
248                                 "blocked": blocked,
249                                 "reason" : reason
250                             })
251                     else:
252                         # DEBUG: print(f"DEBUG: Updating block last seen and reason for blocker='{blocker}',blocked='{blocked}' ...")
253                         blocks.update_last_seen(blocker, blocked, block_level)
254                         blocks.update_reason(reason, blocker, blocked, block_level)
255
256             # DEBUG: print("DEBUG: Committing changes ...")
257             fba.connection.commit()
258         else:
259             print("WARNING: Unknown software:", blocker, software)
260
261         if config.get("bot_enabled") and len(blockdict) > 0:
262             network.send_bot_post(blocker, blockdict)
263
264     # DEBUG: print("DEBUG: EXIT!")
265
266 def fetch_cs(args: argparse.Namespace):
267     # DEBUG: print(f"DEBUG: args[]={type(args)} - CALLED!")
268     extensions = [
269         'extra',
270         'abbr',
271         'attr_list',
272         'def_list',
273         'fenced_code',
274         'footnotes',
275         'md_in_html',
276         'admonition',
277         'codehilite',
278         'legacy_attrs',
279         'legacy_em',
280         'meta',
281         'nl2br',
282         'sane_lists',
283         'smarty',
284         'toc',
285         'wikilinks'
286     ]
287
288     domains = {
289         "silenced": list(),
290         "reject"  : list(),
291     }
292
293     raw = fba.fetch_url("https://raw.githubusercontent.com/chaossocial/meta/master/federation.md", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))).text
294     # DEBUG: print(f"DEBUG: raw()={len(raw)}[]={type(raw)}")
295
296     doc = bs4.BeautifulSoup(markdown.markdown(raw, extensions=extensions), features='html.parser')
297
298     # DEBUG: print(f"DEBUG: doc()={len(doc)}[]={type(doc)}")
299     silenced = doc.find("h2", {"id": "silenced-instances"}).findNext("table").find("tbody")
300     # DEBUG: print(f"DEBUG: silenced[]={type(silenced)}")
301     domains["silenced"] = domains["silenced"] + federation.find_domains(silenced)
302
303     blocked = doc.find("h2", {"id": "blocked-instances"}).findNext("table").find("tbody")
304     # DEBUG: print(f"DEBUG: blocked[]={type(blocked)}")
305     domains["reject"] = domains["reject"] + federation.find_domains(blocked)
306
307     # DEBUG: print(f"DEBUG: domains()={len(domains)}")
308     if len(domains) > 0:
309         locking.acquire()
310
311         print(f"INFO: Adding {len(domains)} new instances ...")
312         for block_level in domains:
313             # DEBUG: print(f"DEBUG: block_level='{block_level}'")
314
315             for row in domains[block_level]:
316                 # DEBUG: print(f"DEBUG: row='{row}'")
317                 if not blocks.is_instance_blocked('chaos.social', row["domain"], block_level):
318                     # DEBUG: print(f"DEBUG: domain='{row['domain']}',block_level='{block_level}' blocked by chaos.social, adding ...")
319                     blocks.add_instance('chaos.social', row["domain"], row["reason"], block_level)
320
321                 if not instances.is_registered(row["domain"]):
322                     try:
323                         print(f"INFO: Fetching instances from domain='{row['domain']}' ...")
324                         federation.fetch_instances(row["domain"], 'chaos.social', None, inspect.currentframe().f_code.co_name)
325                     except (requests.exceptions.Timeout, requests.exceptions.ConnectionError) as ex:
326                         print(f"WARNING: Timeout during fetching instances from domain='{row['domain']}'")
327                         instances.update_last_error(row["domain"], ex)
328
329         # DEBUG: print("DEBUG: Committing changes ...")
330         fba.connection.commit()
331
332     # DEBUG: print("DEBUG: EXIT!")
333
334 def fetch_fba_rss(args: argparse.Namespace):
335     # DEBUG: print(f"DEBUG: args[]={type(args)} - CALLED!")
336     domains = list()
337
338     print(f"INFO: Fetch FBA-specific RSS args.feed='{args.feed}' ...")
339     response = fba.fetch_url(args.feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
340
341     # DEBUG: print(f"DEBUG: response.ok={response.ok},response.status_code='{response.status_code}',response.text()={len(response.text)}")
342     if response.ok and response.status_code < 300 and len(response.text) > 0:
343         # DEBUG: print(f"DEBUG: Parsing RSS feed ({len(response.text)} Bytes) ...")
344         rss = atoma.parse_rss_bytes(response.content)
345
346         # DEBUG: print(f"DEBUG: rss[]={type(rss)}")
347         for item in rss.items:
348             # DEBUG: print(f"DEBUG: item={item}")
349             domain = item.link.split("=")[1]
350
351             if blacklist.is_blacklisted(domain):
352                 # DEBUG: print(f"DEBUG: domain='{domain}' is blacklisted - SKIPPED!")
353                 continue
354             elif domain in domains:
355                 # DEBUG: print(f"DEBUG: domain='{domain}' is already added - SKIPPED!")
356                 continue
357             elif instances.is_registered(domain):
358                 # DEBUG: print(f"DEBUG: domain='{domain}' is already registered - SKIPPED!")
359                 continue
360
361             # DEBUG: print(f"DEBUG: Adding domain='{domain}'")
362             domains.append(domain)
363
364     # DEBUG: print(f"DEBUG: domains()={len(domains)}")
365     if len(domains) > 0:
366         locking.acquire()
367
368         print(f"INFO: Adding {len(domains)} new instances ...")
369         for domain in domains:
370             try:
371                 print(f"INFO: Fetching instances from domain='{domain}' ...")
372                 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
373             except (requests.exceptions.Timeout, requests.exceptions.ConnectionError) as ex:
374                 print(f"WARNING: Timeout during fetching instances from domain='{domain}'")
375                 instances.update_last_error(domain, ex)
376
377     # DEBUG: print("DEBUG: EXIT!")
378
379 def fetch_fbabot_atom(args: argparse.Namespace):
380     # DEBUG: print(f"DEBUG: args[]={type(args)} - CALLED!")
381     feed = "https://ryona.agency/users/fba/feed.atom"
382
383     domains = list()
384
385     print(f"INFO: Fetching ATOM feed='{feed}' from FBA bot account ...")
386     response = fba.fetch_url(feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
387
388     # DEBUG: print(f"DEBUG: response.ok={response.ok},response.status_code='{response.status_code}',response.text()={len(response.text)}")
389     if response.ok and response.status_code < 300 and len(response.text) > 0:
390         # DEBUG: print(f"DEBUG: Parsing ATOM feed ({len(response.text)} Bytes) ...")
391         atom = atoma.parse_atom_bytes(response.content)
392
393         # DEBUG: print(f"DEBUG: atom[]={type(atom)}")
394         for entry in atom.entries:
395             # DEBUG: print(f"DEBUG: entry[]={type(entry)}")
396             doc = bs4.BeautifulSoup(entry.content.value, "html.parser")
397             # DEBUG: print(f"DEBUG: doc[]={type(doc)}")
398             for element in doc.findAll("a"):
399                 for href in element["href"].split(","):
400                     # DEBUG: print(f"DEBUG: href[{type(href)}]={href}")
401                     domain = tidyup.domain(href)
402
403                     # DEBUG: print(f"DEBUG: domain='{domain}'")
404                     if blacklist.is_blacklisted(domain):
405                         # DEBUG: print(f"DEBUG: domain='{domain}' is blacklisted - SKIPPED!")
406                         continue
407                     elif domain in domains:
408                         # DEBUG: print(f"DEBUG: domain='{domain}' is already added - SKIPPED!")
409                         continue
410                     elif instances.is_registered(domain):
411                         # DEBUG: print(f"DEBUG: domain='{domain}' is already registered - SKIPPED!")
412                         continue
413
414                     # DEBUG: print(f"DEBUG: Adding domain='{domain}',domains()={len(domains)}")
415                     domains.append(domain)
416
417     # DEBUG: print(f"DEBUG: domains({len(domains)})={domains}")
418     if len(domains) > 0:
419         locking.acquire()
420
421         print(f"INFO: Adding {len(domains)} new instances ...")
422         for domain in domains:
423             try:
424                 print(f"INFO: Fetching instances from domain='{domain}' ...")
425                 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
426             except (requests.exceptions.Timeout, requests.exceptions.ConnectionError) as ex:
427                 print(f"WARNING: Timeout during fetching instances from domain='{domain}'")
428                 instances.update_last_error(domain, ex)
429
430     # DEBUG: print("DEBUG: EXIT!")
431
432 def fetch_instances(args: argparse.Namespace):
433     # DEBUG: print(f"DEBUG: args[]={type(args)} - CALLED!")
434     locking.acquire()
435
436     # Initial fetch
437     try:
438         print(f"INFO: Fetching instances from args.domain='{args.domain}' ...")
439         federation.fetch_instances(args.domain, None, None, inspect.currentframe().f_code.co_name)
440     except (requests.exceptions.Timeout, requests.exceptions.ConnectionError) as ex:
441         print(f"WARNING: Timeout during fetching instances from args.domain='{args.domain}'")
442         instances.update_last_error(args.domain, ex)
443         return
444
445     if args.single:
446         # DEBUG: print("DEBUG: Not fetching more instances - EXIT!")
447         return
448
449     # Loop through some instances
450     fba.cursor.execute(
451         "SELECT domain, origin, software, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'bookwyrm', 'takahe', 'lemmy') AND (last_instance_fetch IS NULL OR last_instance_fetch < ?) ORDER BY rowid DESC", [time.time() - config.get("recheck_instance")]
452     )
453
454     rows = fba.cursor.fetchall()
455     print(f"INFO: Checking {len(rows)} entries ...")
456     for row in rows:
457         # DEBUG: print(f"DEBUG: domain='{row[0]}'")
458         if blacklist.is_blacklisted(row[0]):
459             print("WARNING: domain is blacklisted:", row[0])
460             continue
461
462         try:
463             print(f"INFO: Fetching instances for instance '{row[0]}' ('{row[2]}') of origin='{row[1]}',nodeinfo_url='{row[3]}'")
464             federation.fetch_instances(row[0], row[1], row[2], inspect.currentframe().f_code.co_name, row[3])
465         except (requests.exceptions.Timeout, requests.exceptions.ConnectionError) as ex:
466             print(f"WARNING: Timeout during fetching instances from domain='{row[0]}'")
467             instances.update_last_error(row[0], ex)
468
469     # DEBUG: print("DEBUG: EXIT!")
470
471 def fetch_federater(args: argparse.Namespace):
472     # DEBUG: print(f"DEBUG: args[]={type(args)} - CALLED!")
473     locking.acquire()
474
475     # Fetch this URL
476     response = fba.fetch_url("https://github.com/federater/blocks_recommended/raw/main/federater.csv", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
477     # DEBUG: print(f"DEBUG: response[]='{type(response)}'")
478     if response.ok and response.content != "":
479         # DEBUG: print(f"DEBUG: Fetched {len(response.content)} Bytes, parsing CSV ...")
480         ## DEBUG: print(f"DEBUG: response.content={response.content}")
481         reader = csv.DictReader(response.content.decode('utf-8').splitlines(), dialect='unix')
482         #, fieldnames='domain,severity,reject_media,reject_reports,public_comment,obfuscate'
483         # DEBUG: print(f"DEBUG: reader[]={type(reader)}")
484         for row in reader:
485             if not validators.domain(row["#domain"]):
486                 print(f"WARNING: domain='{row['#domain']}' is not a valid domain - skipped!")
487                 continue
488             elif blacklist.is_blacklisted(row["#domain"]):
489                 print(f"WARNING: domain='{row['#domain']}' is blacklisted - skipped!")
490                 continue
491             elif instances.is_registered(row["#domain"]):
492                 # DEBUG: print(f"DEBUG: domain='{row['#domain']}' is already registered - skipped!")
493                 continue
494
495             try:
496                 print(f"INFO: Fetching instances for instane='{row['#domain']}' ...")
497                 federation.fetch_instances(row["#domain"], None, None, inspect.currentframe().f_code.co_name)
498             except (requests.exceptions.Timeout, requests.exceptions.ConnectionError) as ex:
499                 print(f"WARNING: Timeout during fetching instances from domain='{row['#domain']}'")
500                 instances.update_last_error(row["#domain"], ex)
501
502     # DEBUG: print("DEBUG: EXIT!")