]> git.mxchange.org Git - fba.git/blob - fba/commands.py
Continued:
[fba.git] / fba / commands.py
1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
3 #
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
8 #
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 # GNU Affero General Public License for more details.
13 #
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program.  If not, see <https://www.gnu.org/licenses/>.
16
17 import csv
18 import inspect
19 import json
20 import time
21
22 import argparse
23 import atoma
24 import bs4
25 import markdown
26 import reqto
27 import validators
28
29 from fba import blacklist
30 from fba import blocks
31 from fba import config
32 from fba import federation
33 from fba import fba
34 from fba import instances
35 from fba import locking
36 from fba import network
37
38 from fba.helpers import tidyup
39
40 from fba.networks import friendica
41 from fba.networks import mastodon
42 from fba.networks import misskey
43 from fba.networks import pleroma
44
45 def check_instance(args: argparse.Namespace) -> int:
46     # DEBUG: print(f"DEBUG: args.domain='{args.domain}' - CALLED!")
47     status = 0
48     if not validators.domain(args.domain):
49         print(f"WARNING: args.domain='{args.domain}' is not valid")
50         status = 100
51     elif blacklist.is_blacklisted(args.domain):
52         print(f"WARNING: args.domain='{args.domain}' is blacklisted")
53         status = 101
54     elif instances.is_registered(args.domain):
55         print(f"WARNING: args.domain='{args.domain}' is already registered")
56         status = 102
57     else:
58         print(f"INFO: args.domain='{args.domain}' is not known")
59
60     # DEBUG: print(f"DEBUG: status={status} - EXIT!")
61     return status
62
63 def fetch_bkali(args: argparse.Namespace) -> int:
64     # DEBUG: print(f"DEBUG: args[]='{type(args)}' - CALLED!")
65     domains = list()
66     try:
67         fetched = network.post_json_api("gql.api.bka.li", "/v1/graphql", json.dumps({
68             "query": "query domainlist {nodeinfo(order_by: {domain: asc}) {domain}}"
69         }))
70
71         # DEBUG: print(f"DEBUG: fetched[]='{type(fetched)}'")
72         if "error_message" in fetched:
73             print(f"WARNING: post_json_api() for 'gql.api.bka.li' returned error message: {fetched['error_message']}")
74             return 100
75         elif isinstance(fetched["json"], dict) and "error" in fetched["json"] and "message" in fetched["json"]["error"]:
76             print(f"WARNING: post_json_api() returned error: {fetched['error']['message']}")
77             return 101
78
79         rows = fetched["json"]
80
81         # DEBUG: print(f"DEBUG: rows({len(rows)})[]='{type(rows)}'")
82         if len(rows) == 0:
83             raise Exception("WARNING: Returned no records")
84         elif "data" not in rows:
85             raise Exception(f"WARNING: rows()={len(rows)} does not contain key 'data'")
86         elif "nodeinfo" not in rows["data"]:
87             raise Exception(f"WARNING: rows()={len(rows['data'])} does not contain key 'nodeinfo'")
88
89         for entry in rows["data"]["nodeinfo"]:
90             # DEBUG: print(f"DEBUG: entry['{type(entry)}']='{entry}'")
91             if not "domain" in entry:
92                 print(f"WARNING: entry()={len(entry)} does not contain 'domain' - SKIPPED!")
93                 continue
94             elif not validators.domain(entry["domain"]):
95                 print(f"WARNING: domain='{entry['domain']}' is not a valid domain - SKIPPED!")
96                 continue
97             elif blacklist.is_blacklisted(entry["domain"]):
98                 # DEBUG: print(f"DEBUG: domain='{entry['domain']}' is blacklisted - SKIPPED!")
99                 continue
100             elif instances.is_registered(entry["domain"]):
101                 # DEBUG: print(f"DEBUG: domain='{entry['domain']}' is already registered - SKIPPED!")
102                 continue
103
104             # DEBUG: print(f"DEBUG: Adding domain='{entry['domain']}' ...")
105             domains.append(entry["domain"])
106
107     except network.exceptions as exception:
108         print(f"ERROR: Cannot fetch graphql,exception[{type(exception)}]:'{str(exception)}' - EXIT!")
109         return 102
110
111     # DEBUG: print(f"DEBUG: domains()={len(domains)}")
112     if len(domains) > 0:
113         locking.acquire()
114
115         print(f"INFO: Adding {len(domains)} new instances ...")
116         for domain in domains:
117             try:
118                 print(f"INFO: Fetching instances from domain='{domain}' ...")
119                 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
120             except network.exceptions as exception:
121                 print(f"WARNING: Exception '{type(exception)}' during fetching instances from domain='{domain}'")
122                 instances.update_last_error(domain, exception)
123
124     # DEBUG: print("DEBUG: EXIT!")
125     return 0
126
127 def fetch_blocks(args: argparse.Namespace):
128     # DEBUG: print(f"DEBUG: args[]='{type(args)}' - CALLED!")
129     if args.domain is not None and args.domain != "":
130         # DEBUG: print(f"DEBUG: args.domain='{args.domain}' - checking ...")
131         if not validators.domain(args.domain):
132             print(f"WARNING: domain='{args.domain}' is not valid.")
133             return
134         elif blacklist.is_blacklisted(args.domain):
135             print(f"WARNING: domain='{args.domain}' is blacklisted, won't check it!")
136             return
137         elif not instances.is_registered(args.domain):
138             print(f"WARNING: domain='{args.domain}' is not registered, please run ./fba.py fetch_instances {args.domain} first.")
139             return
140
141     locking.acquire()
142
143     if args.domain is not None and args.domain != "":
144         # Re-check single domain
145         fba.cursor.execute(
146             "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'bookwyrm', 'takahe') AND domain = ?", [args.domain]
147         )
148     else:
149         # Re-check after "timeout" (aka. minimum interval)
150         fba.cursor.execute(
151             "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'bookwyrm', 'takahe') AND (last_blocked IS NULL OR last_blocked < ?) ORDER BY rowid DESC", [time.time() - config.get("recheck_block")]
152         )
153
154     rows = fba.cursor.fetchall()
155     print(f"INFO: Checking {len(rows)} entries ...")
156     for blocker, software, origin, nodeinfo_url in rows:
157         # DEBUG: print("DEBUG: BEFORE blocker,software,origin,nodeinfo_url:", blocker, software, origin, nodeinfo_url)
158         blockdict = list()
159         blocker = tidyup.domain(blocker)
160         # DEBUG: print("DEBUG: AFTER blocker,software:", blocker, software)
161
162         if blocker == "":
163             print("WARNING: blocker is now empty!")
164             continue
165         elif blacklist.is_blacklisted(blocker):
166             print(f"WARNING: blocker='{blocker}' is blacklisted now!")
167             continue
168
169         # DEBUG: print(f"DEBUG: blocker='{blocker}'")
170         instances.update_last_blocked(blocker)
171
172         if software == "pleroma":
173             print(f"INFO: blocker='{blocker}',software='{software}'")
174             pleroma.fetch_blocks(blocker, origin, nodeinfo_url)
175         elif software == "mastodon":
176             print(f"INFO: blocker='{blocker}',software='{software}'")
177             mastodon.fetch_blocks(blocker, origin, nodeinfo_url)
178         elif software == "friendica" or software == "misskey":
179             print(f"INFO: blocker='{blocker}',software='{software}'")
180
181             blocking = list()
182             if software == "friendica":
183                 blocking = friendica.fetch_blocks(blocker)
184             elif software == "misskey":
185                 blocking = misskey.fetch_blocks(blocker)
186
187             print(f"INFO: Checking {len(blocking.items())} entries from blocker='{blocker}',software='{software}' ...")
188             for block_level, blocklist in blocking.items():
189                 # DEBUG: print("DEBUG: blocker,block_level,blocklist():", blocker, block_level, len(blocklist))
190                 block_level = tidyup.domain(block_level)
191                 # DEBUG: print("DEBUG: AFTER-block_level:", block_level)
192                 if block_level == "":
193                     print("WARNING: block_level is empty, blocker:", blocker)
194                     continue
195
196                 # DEBUG: print(f"DEBUG: Checking {len(blocklist)} entries from blocker='{blocker}',software='{software}',block_level='{block_level}' ...")
197                 for block in blocklist:
198                     blocked, reason = block.values()
199                     # DEBUG: print(f"DEBUG: blocked='{blocked}',reason='{reason}' - BEFORE!")
200                     blocked = tidyup.domain(blocked)
201                     reason  = tidyup.reason(reason) if reason is not None and reason != "" else None
202                     # DEBUG: print(f"DEBUG: blocked='{blocked}',reason='{reason}' - AFTER!")
203
204                     if blocked == "":
205                         print("WARNING: blocked is empty:", blocker)
206                         continue
207                     elif blacklist.is_blacklisted(blocked):
208                         # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - skipping!")
209                         continue
210                     elif blocked.count("*") > 0:
211                         # Some friendica servers also obscure domains without hash
212                         fba.cursor.execute(
213                             "SELECT domain, origin, nodeinfo_url FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", [blocked.replace("*", "_")]
214                         )
215
216                         searchres = fba.cursor.fetchone()
217
218                         # DEBUG: print(f"DEBUG: searchres[]='{type(searchres)}'")
219                         if searchres is None:
220                             print(f"WARNING: Cannot deobsfucate blocked='{blocked}' - SKIPPED!")
221                             continue
222
223                         blocked      = searchres[0]
224                         origin       = searchres[1]
225                         nodeinfo_url = searchres[2]
226                     elif blocked.count("?") > 0:
227                         # Some obscure them with question marks, not sure if that's dependent on version or not
228                         fba.cursor.execute(
229                             "SELECT domain, origin, nodeinfo_url FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", [blocked.replace("?", "_")]
230                         )
231
232                         searchres = fba.cursor.fetchone()
233
234                         # DEBUG: print(f"DEBUG: searchres[]='{type(searchres)}'")
235                         if searchres is None:
236                             print(f"WARNING: Cannot deobsfucate blocked='{blocked}' - SKIPPED!")
237                             continue
238
239                         blocked      = searchres[0]
240                         origin       = searchres[1]
241                         nodeinfo_url = searchres[2]
242                     elif not validators.domain(blocked):
243                         print(f"WARNING: blocked='{blocked}',software='{software}' is not a valid domain name - skipped!")
244                         continue
245
246                     # DEBUG: print("DEBUG: Looking up instance by domain:", blocked)
247                     if not validators.domain(blocked):
248                         print(f"WARNING: blocked='{blocked}',software='{software}' is not a valid domain name - skipped!")
249                         continue
250                     elif not instances.is_registered(blocked):
251                         # DEBUG: print("DEBUG: Hash wasn't found, adding:", blocked, blocker)
252                         instances.add(blocked, blocker, inspect.currentframe().f_code.co_name, nodeinfo_url)
253
254                     if not blocks.is_instance_blocked(blocker, blocked, block_level):
255                         blocks.add_instance(blocker, blocked, reason, block_level)
256
257                         if block_level == "reject":
258                             blockdict.append({
259                                 "blocked": blocked,
260                                 "reason" : reason
261                             })
262                     else:
263                         # DEBUG: print(f"DEBUG: Updating block last seen and reason for blocker='{blocker}',blocked='{blocked}' ...")
264                         blocks.update_last_seen(blocker, blocked, block_level)
265                         blocks.update_reason(reason, blocker, blocked, block_level)
266
267             # DEBUG: print("DEBUG: Committing changes ...")
268             fba.connection.commit()
269         else:
270             print("WARNING: Unknown software:", blocker, software)
271
272         if config.get("bot_enabled") and len(blockdict) > 0:
273             network.send_bot_post(blocker, blockdict)
274
275     # DEBUG: print("DEBUG: EXIT!")
276
277 def fetch_cs(args: argparse.Namespace):
278     # DEBUG: print(f"DEBUG: args[]='{type(args)}' - CALLED!")
279     extensions = [
280         'extra',
281         'abbr',
282         'attr_list',
283         'def_list',
284         'fenced_code',
285         'footnotes',
286         'md_in_html',
287         'admonition',
288         'codehilite',
289         'legacy_attrs',
290         'legacy_em',
291         'meta',
292         'nl2br',
293         'sane_lists',
294         'smarty',
295         'toc',
296         'wikilinks'
297     ]
298
299     domains = {
300         "silenced": list(),
301         "reject"  : list(),
302     }
303
304     raw = fba.fetch_url("https://raw.githubusercontent.com/chaossocial/meta/master/federation.md", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))).text
305     # DEBUG: print(f"DEBUG: raw()={len(raw)}[]='{type(raw)}'")
306
307     doc = bs4.BeautifulSoup(markdown.markdown(raw, extensions=extensions), features='html.parser')
308
309     # DEBUG: print(f"DEBUG: doc()={len(doc)}[]='{type(doc)}'")
310     silenced = doc.find("h2", {"id": "silenced-instances"}).findNext("table").find("tbody")
311     # DEBUG: print(f"DEBUG: silenced[]='{type(silenced)}'")
312     domains["silenced"] = domains["silenced"] + federation.find_domains(silenced)
313
314     blocked = doc.find("h2", {"id": "blocked-instances"}).findNext("table").find("tbody")
315     # DEBUG: print(f"DEBUG: blocked[]='{type(blocked)}'")
316     domains["reject"] = domains["reject"] + federation.find_domains(blocked)
317
318     # DEBUG: print(f"DEBUG: domains()={len(domains)}")
319     if len(domains) > 0:
320         locking.acquire()
321
322         print(f"INFO: Adding {len(domains)} new instances ...")
323         for block_level in domains:
324             # DEBUG: print(f"DEBUG: block_level='{block_level}'")
325
326             for row in domains[block_level]:
327                 # DEBUG: print(f"DEBUG: row='{row}'")
328                 if not blocks.is_instance_blocked('chaos.social', row["domain"], block_level):
329                     # DEBUG: print(f"DEBUG: domain='{row['domain']}',block_level='{block_level}' blocked by chaos.social, adding ...")
330                     blocks.add_instance('chaos.social', row["domain"], row["reason"], block_level)
331
332                 if not instances.is_registered(row["domain"]):
333                     try:
334                         print(f"INFO: Fetching instances from domain='{row['domain']}' ...")
335                         federation.fetch_instances(row["domain"], 'chaos.social', None, inspect.currentframe().f_code.co_name)
336                     except network.exceptions as exception:
337                         print(f"WARNING: Exception '{type(exception)}' during fetching instances from domain='{row['domain']}'")
338                         instances.update_last_error(row["domain"], exception)
339
340         # DEBUG: print("DEBUG: Committing changes ...")
341         fba.connection.commit()
342
343     # DEBUG: print("DEBUG: EXIT!")
344
345 def fetch_fba_rss(args: argparse.Namespace):
346     # DEBUG: print(f"DEBUG: args[]='{type(args)}' - CALLED!")
347     domains = list()
348
349     print(f"INFO: Fetch FBA-specific RSS args.feed='{args.feed}' ...")
350     response = fba.fetch_url(args.feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
351
352     # DEBUG: print(f"DEBUG: response.ok={response.ok},response.status_code='{response.status_code}',response.text()={len(response.text)}")
353     if response.ok and response.status_code < 300 and len(response.text) > 0:
354         # DEBUG: print(f"DEBUG: Parsing RSS feed ({len(response.text)} Bytes) ...")
355         rss = atoma.parse_rss_bytes(response.content)
356
357         # DEBUG: print(f"DEBUG: rss[]='{type(rss)}'")
358         for item in rss.items:
359             # DEBUG: print(f"DEBUG: item={item}")
360             domain = item.link.split("=")[1]
361
362             if blacklist.is_blacklisted(domain):
363                 # DEBUG: print(f"DEBUG: domain='{domain}' is blacklisted - SKIPPED!")
364                 continue
365             elif domain in domains:
366                 # DEBUG: print(f"DEBUG: domain='{domain}' is already added - SKIPPED!")
367                 continue
368             elif instances.is_registered(domain):
369                 # DEBUG: print(f"DEBUG: domain='{domain}' is already registered - SKIPPED!")
370                 continue
371
372             # DEBUG: print(f"DEBUG: Adding domain='{domain}'")
373             domains.append(domain)
374
375     # DEBUG: print(f"DEBUG: domains()={len(domains)}")
376     if len(domains) > 0:
377         locking.acquire()
378
379         print(f"INFO: Adding {len(domains)} new instances ...")
380         for domain in domains:
381             try:
382                 print(f"INFO: Fetching instances from domain='{domain}' ...")
383                 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
384             except network.exceptions as exception:
385                 print(f"WARNING: Exception '{type(exception)}' during fetching instances from domain='{domain}'")
386                 instances.update_last_error(domain, exception)
387
388     # DEBUG: print("DEBUG: EXIT!")
389
390 def fetch_fbabot_atom(args: argparse.Namespace):
391     # DEBUG: print(f"DEBUG: args[]='{type(args)}' - CALLED!")
392     feed = "https://ryona.agency/users/fba/feed.atom"
393
394     domains = list()
395
396     print(f"INFO: Fetching ATOM feed='{feed}' from FBA bot account ...")
397     response = fba.fetch_url(feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
398
399     # DEBUG: print(f"DEBUG: response.ok={response.ok},response.status_code='{response.status_code}',response.text()={len(response.text)}")
400     if response.ok and response.status_code < 300 and len(response.text) > 0:
401         # DEBUG: print(f"DEBUG: Parsing ATOM feed ({len(response.text)} Bytes) ...")
402         atom = atoma.parse_atom_bytes(response.content)
403
404         # DEBUG: print(f"DEBUG: atom[]='{type(atom)}'")
405         for entry in atom.entries:
406             # DEBUG: print(f"DEBUG: entry[]='{type(entry)}'")
407             doc = bs4.BeautifulSoup(entry.content.value, "html.parser")
408             # DEBUG: print(f"DEBUG: doc[]='{type(doc)}'")
409             for element in doc.findAll("a"):
410                 for href in element["href"].split(","):
411                     # DEBUG: print(f"DEBUG: href[{type(href)}]={href}")
412                     domain = tidyup.domain(href)
413
414                     # DEBUG: print(f"DEBUG: domain='{domain}'")
415                     if blacklist.is_blacklisted(domain):
416                         # DEBUG: print(f"DEBUG: domain='{domain}' is blacklisted - SKIPPED!")
417                         continue
418                     elif domain in domains:
419                         # DEBUG: print(f"DEBUG: domain='{domain}' is already added - SKIPPED!")
420                         continue
421                     elif instances.is_registered(domain):
422                         # DEBUG: print(f"DEBUG: domain='{domain}' is already registered - SKIPPED!")
423                         continue
424
425                     # DEBUG: print(f"DEBUG: Adding domain='{domain}',domains()={len(domains)}")
426                     domains.append(domain)
427
428     # DEBUG: print(f"DEBUG: domains({len(domains)})={domains}")
429     if len(domains) > 0:
430         locking.acquire()
431
432         print(f"INFO: Adding {len(domains)} new instances ...")
433         for domain in domains:
434             try:
435                 print(f"INFO: Fetching instances from domain='{domain}' ...")
436                 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
437             except network.exceptions as exception:
438                 print(f"WARNING: Exception '{type(exception)}' during fetching instances from domain='{domain}'")
439                 instances.update_last_error(domain, exception)
440
441     # DEBUG: print("DEBUG: EXIT!")
442
443 def fetch_instances(args: argparse.Namespace):
444     # DEBUG: print(f"DEBUG: args[]='{type(args)}' - CALLED!")
445     locking.acquire()
446
447     # Initial fetch
448     try:
449         print(f"INFO: Fetching instances from args.domain='{args.domain}' ...")
450         federation.fetch_instances(args.domain, None, None, inspect.currentframe().f_code.co_name)
451     except network.exceptions as exception:
452         print(f"WARNING: Exception '{type(exception)}' during fetching instances from args.domain='{args.domain}'")
453         instances.update_last_error(args.domain, exception)
454         return
455
456     if args.single:
457         # DEBUG: print("DEBUG: Not fetching more instances - EXIT!")
458         return
459
460     # Loop through some instances
461     fba.cursor.execute(
462         "SELECT domain, origin, software, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'bookwyrm', 'takahe', 'lemmy') AND (last_instance_fetch IS NULL OR last_instance_fetch < ?) ORDER BY rowid DESC", [time.time() - config.get("recheck_instance")]
463     )
464
465     rows = fba.cursor.fetchall()
466     print(f"INFO: Checking {len(rows)} entries ...")
467     for row in rows:
468         # DEBUG: print(f"DEBUG: domain='{row[0]}'")
469         if blacklist.is_blacklisted(row[0]):
470             print("WARNING: domain is blacklisted:", row[0])
471             continue
472
473         try:
474             print(f"INFO: Fetching instances for instance '{row[0]}' ('{row[2]}') of origin='{row[1]}',nodeinfo_url='{row[3]}'")
475             federation.fetch_instances(row[0], row[1], row[2], inspect.currentframe().f_code.co_name, row[3])
476         except network.exceptions as exception:
477             print(f"WARNING: Exception '{type(exception)}' during fetching instances from domain='{row[0]}'")
478             instances.update_last_error(row[0], exception)
479
480     # DEBUG: print("DEBUG: EXIT!")
481
482 def fetch_federater(args: argparse.Namespace):
483     # DEBUG: print(f"DEBUG: args[]='{type(args)}' - CALLED!")
484     locking.acquire()
485
486     # Fetch this URL
487     response = fba.fetch_url("https://github.com/federater/blocks_recommended/raw/main/federater.csv", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
488     # DEBUG: print(f"DEBUG: response[]='{type(response)}'")
489     if response.ok and response.content != "":
490         # DEBUG: print(f"DEBUG: Fetched {len(response.content)} Bytes, parsing CSV ...")
491         ## DEBUG: print(f"DEBUG: response.content={response.content}")
492         reader = csv.DictReader(response.content.decode('utf-8').splitlines(), dialect='unix')
493         #, fieldnames='domain,severity,reject_media,reject_reports,public_comment,obfuscate'
494         # DEBUG: print(f"DEBUG: reader[]='{type(reader)}'")
495         for row in reader:
496             if not validators.domain(row["#domain"]):
497                 print(f"WARNING: domain='{row['#domain']}' is not a valid domain - skipped!")
498                 continue
499             elif blacklist.is_blacklisted(row["#domain"]):
500                 print(f"WARNING: domain='{row['#domain']}' is blacklisted - skipped!")
501                 continue
502             elif instances.is_registered(row["#domain"]):
503                 # DEBUG: print(f"DEBUG: domain='{row['#domain']}' is already registered - skipped!")
504                 continue
505
506             try:
507                 print(f"INFO: Fetching instances for instane='{row['#domain']}' ...")
508                 federation.fetch_instances(row["#domain"], None, None, inspect.currentframe().f_code.co_name)
509             except network.exceptions as exception:
510                 print(f"WARNING: Exception '{type(exception)}' during fetching instances from domain='{row['#domain']}'")
511                 instances.update_last_error(row["#domain"], exception)
512
513     # DEBUG: print("DEBUG: EXIT!")