]> git.mxchange.org Git - fba.git/blob - fba/commands.py
3f5b4354eb7e161e1330e8d23370e6cf3f7b6cd4
[fba.git] / fba / commands.py
1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
3 #
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
8 #
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 # GNU Affero General Public License for more details.
13 #
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program.  If not, see <https://www.gnu.org/licenses/>.
16
17 import csv
18 import inspect
19 import json
20 import sys
21 import time
22
23 import argparse
24 import atoma
25 import bs4
26 import markdown
27 import validators
28
29 from fba import blacklist
30 from fba import blocks
31 from fba import config
32 from fba import federation
33 from fba import fba
34 from fba import instances
35 from fba import locking
36 from fba import network
37
38 from fba.helpers import tidyup
39
40 from fba.networks import friendica
41 from fba.networks import mastodon
42 from fba.networks import misskey
43 from fba.networks import pleroma
44
45 def check_instance(args: argparse.Namespace) -> int:
46     # DEBUG: print(f"DEBUG: args.domain='{args.domain}' - CALLED!")
47     status = 0
48     if not validators.domain(args.domain):
49         print(f"WARNING: args.domain='{args.domain}' is not valid")
50         status = 100
51     elif blacklist.is_blacklisted(args.domain):
52         print(f"WARNING: args.domain='{args.domain}' is blacklisted")
53         status = 101
54     elif instances.is_registered(args.domain):
55         print(f"WARNING: args.domain='{args.domain}' is already registered")
56         status = 102
57     else:
58         print(f"INFO: args.domain='{args.domain}' is not known")
59
60     # DEBUG: print(f"DEBUG: status={status} - EXIT!")
61     return status
62
63 def fetch_bkali(args: argparse.Namespace):
64     # DEBUG: print(f"DEBUG: args[]={type(args)} - CALLED!")
65     domains = list()
66     try:
67         fetched = network.post_json_api("gql.api.bka.li", "/v1/graphql", json.dumps({
68             "query": "query domainlist {nodeinfo(order_by: {domain: asc}) {domain}}"
69         }))
70
71         # DEBUG: print(f"DEBUG: fetched({len(fetched)})[]='{type(fetched)}'")
72         if len(fetched) == 0:
73             raise Exception("WARNING: Returned no records")
74         elif "data" not in fetched:
75             raise Exception(f"WARNING: fetched()={len(fetched)} does not contain key 'data'")
76         elif "nodeinfo" not in fetched["data"]:
77             raise Exception(f"WARNING: fetched()={len(fetched['data'])} does not contain key 'nodeinfo'")
78
79         for entry in fetched["data"]["nodeinfo"]:
80             # DEBUG: print(f"DEBUG: entry['{type(entry)}']='{entry}'")
81             if not "domain" in entry:
82                 print(f"WARNING: entry()={len(entry)} does not contain 'domain' - SKIPPED!")
83                 continue
84             elif not validators.domain(entry["domain"]):
85                 print(f"WARNING: domain='{entry['domain']}' is not a valid domain - SKIPPED!")
86                 continue
87             elif blacklist.is_blacklisted(entry["domain"]):
88                 # DEBUG: print(f"DEBUG: domain='{entry['domain']}' is blacklisted - SKIPPED!")
89                 continue
90             elif instances.is_registered(entry["domain"]):
91                 # DEBUG: print(f"DEBUG: domain='{entry['domain']}' is already registered - SKIPPED!")
92                 continue
93
94             # DEBUG: print(f"DEBUG: Adding domain='{entry['domain']}' ...")
95             domains.append(entry["domain"])
96
97     except BaseException as exception:
98         print(f"ERROR: Cannot fetch graphql,exception[{type(exception)}]:'{str(exception)}'")
99         sys.exit(255)
100
101     # DEBUG: print(f"DEBUG: domains()={len(domains)}")
102     if len(domains) > 0:
103         locking.acquire()
104
105         print(f"INFO: Adding {len(domains)} new instances ...")
106         for domain in domains:
107             print(f"INFO: Fetching instances from domain='{domain}' ...")
108             federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
109
110     # DEBUG: print("DEBUG: EXIT!")
111
112 def fetch_blocks(args: argparse.Namespace):
113     # DEBUG: print(f"DEBUG: args[]={type(args)} - CALLED!")
114     if args.domain is not None and args.domain != "":
115         # DEBUG: print(f"DEBUG: args.domain='{args.domain}' - checking ...")
116         if not validators.domain(args.domain):
117             print(f"WARNING: domain='{args.domain}' is not valid.")
118             return
119         elif blacklist.is_blacklisted(args.domain):
120             print(f"WARNING: domain='{args.domain}' is blacklisted, won't check it!")
121             return
122         elif not instances.is_registered(args.domain):
123             print(f"WARNING: domain='{args.domain}' is not registered, please run ./fba.py fetch_instances {args.domain} first.")
124             return
125
126     locking.acquire()
127
128     if args.domain is not None and args.domain != "":
129         # Re-check single domain
130         fba.cursor.execute(
131             "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'bookwyrm', 'takahe') AND domain = ?", [args.domain]
132         )
133     else:
134         # Re-check after "timeout" (aka. minimum interval)
135         fba.cursor.execute(
136             "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'bookwyrm', 'takahe') AND (last_blocked IS NULL OR last_blocked < ?) ORDER BY rowid DESC", [time.time() - config.get("recheck_block")]
137         )
138
139     rows = fba.cursor.fetchall()
140     print(f"INFO: Checking {len(rows)} entries ...")
141     for blocker, software, origin, nodeinfo_url in rows:
142         # DEBUG: print("DEBUG: BEFORE blocker,software,origin,nodeinfo_url:", blocker, software, origin, nodeinfo_url)
143         blockdict = list()
144         blocker = tidyup.domain(blocker)
145         # DEBUG: print("DEBUG: AFTER blocker,software:", blocker, software)
146
147         if blocker == "":
148             print("WARNING: blocker is now empty!")
149             continue
150         elif blacklist.is_blacklisted(blocker):
151             print(f"WARNING: blocker='{blocker}' is blacklisted now!")
152             continue
153
154         # DEBUG: print(f"DEBUG: blocker='{blocker}'")
155         instances.update_last_blocked(blocker)
156
157         if software == "pleroma":
158             print(f"INFO: blocker='{blocker}',software='{software}'")
159             pleroma.fetch_blocks(blocker, origin, nodeinfo_url)
160         elif software == "mastodon":
161             print(f"INFO: blocker='{blocker}',software='{software}'")
162             mastodon.fetch_blocks(blocker, origin, nodeinfo_url)
163         elif software == "friendica" or software == "misskey":
164             print(f"INFO: blocker='{blocker}',software='{software}'")
165             if software == "friendica":
166                 blocks = friendica.fetch_blocks(blocker)
167             elif software == "misskey":
168                 blocks = misskey.fetch_blocks(blocker)
169
170             print(f"INFO: Checking {len(blocks.items())} entries from blocker='{blocker}',software='{software}' ...")
171             for block_level, blocklist in blocks.items():
172                 # DEBUG: print("DEBUG: blocker,block_level,blocklist():", blocker, block_level, len(blocklist))
173                 block_level = tidyup.domain(block_level)
174                 # DEBUG: print("DEBUG: AFTER-block_level:", block_level)
175                 if block_level == "":
176                     print("WARNING: block_level is empty, blocker:", blocker)
177                     continue
178
179                 # DEBUG: print(f"DEBUG: Checking {len(blocklist)} entries from blocker='{blocker}',software='{software}',block_level='{block_level}' ...")
180                 for block in blocklist:
181                     blocked, reason = block.values()
182                     # DEBUG: print(f"DEBUG: blocked='{blocked}',reason='{reason}' - BEFORE!")
183                     blocked = tidyup.domain(blocked)
184                     reason  = tidyup.reason(reason) if reason is not None and reason != "" else None
185                     # DEBUG: print(f"DEBUG: blocked='{blocked}',reason='{reason}' - AFTER!")
186
187                     if blocked == "":
188                         print("WARNING: blocked is empty:", blocker)
189                         continue
190                     elif blacklist.is_blacklisted(blocked):
191                         # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - skipping!")
192                         continue
193                     elif blocked.count("*") > 0:
194                         # Some friendica servers also obscure domains without hash
195                         fba.cursor.execute(
196                             "SELECT domain, origin, nodeinfo_url FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", [blocked.replace("*", "_")]
197                         )
198
199                         searchres = fba.cursor.fetchone()
200
201                         if searchres is None:
202                             print(f"WARNING: Cannot deobsfucate blocked='{blocked}' - SKIPPED!")
203                             continue
204
205                         blocked = searchres[0]
206                         origin = searchres[1]
207                         nodeinfo_url = searchres[2]
208                     elif blocked.count("?") > 0:
209                         # Some obscure them with question marks, not sure if that's dependent on version or not
210                         fba.cursor.execute(
211                             "SELECT domain, origin, nodeinfo_url FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", [blocked.replace("?", "_")]
212                         )
213
214                         searchres = fba.cursor.fetchone()
215
216                         if searchres is None:
217                             print(f"WARNING: Cannot deobsfucate blocked='{blocked}' - SKIPPED!")
218                             continue
219
220                         blocked = searchres[0]
221                         origin = searchres[1]
222                         nodeinfo_url = searchres[2]
223                     elif not validators.domain(blocked):
224                         print(f"WARNING: blocked='{blocked}',software='{software}' is not a valid domain name - skipped!")
225                         continue
226
227                     # DEBUG: print("DEBUG: Looking up instance by domain:", blocked)
228                     if not validators.domain(blocked):
229                         print(f"WARNING: blocked='{blocked}',software='{software}' is not a valid domain name - skipped!")
230                         continue
231                     elif not instances.is_registered(blocked):
232                         # DEBUG: print("DEBUG: Hash wasn't found, adding:", blocked, blocker)
233                         instances.add(blocked, blocker, inspect.currentframe().f_code.co_name, nodeinfo_url)
234
235                     if not blocks.is_instance_blocked(blocker, blocked, block_level):
236                         blocks.add_instance(blocker, blocked, reason, block_level)
237
238                         if block_level == "reject":
239                             blockdict.append({
240                                 "blocked": blocked,
241                                 "reason" : reason
242                             })
243                     else:
244                         # DEBUG: print(f"DEBUG: Updating block last seen and reason for blocker='{blocker}',blocked='{blocked}' ...")
245                         blocks.update_last_seen(blocker, blocked, block_level)
246                         blocks.update_reason(reason, blocker, blocked, block_level)
247
248             # DEBUG: print("DEBUG: Committing changes ...")
249             fba.connection.commit()
250         else:
251             print("WARNING: Unknown software:", blocker, software)
252
253         if config.get("bot_enabled") and len(blockdict) > 0:
254             network.send_bot_post(blocker, blockdict)
255
256     # DEBUG: print("DEBUG: EXIT!")
257
258 def fetch_cs(args: argparse.Namespace):
259     # DEBUG: print(f"DEBUG: args[]={type(args)} - CALLED!")
260     extensions = [
261         'extra',
262         'abbr',
263         'attr_list',
264         'def_list',
265         'fenced_code',
266         'footnotes',
267         'md_in_html',
268         'admonition',
269         'codehilite',
270         'legacy_attrs',
271         'legacy_em',
272         'meta',
273         'nl2br',
274         'sane_lists',
275         'smarty',
276         'toc',
277         'wikilinks'
278     ]
279
280     domains = {
281         "silenced": list(),
282         "reject"  : list(),
283     }
284
285     raw = fba.fetch_url("https://raw.githubusercontent.com/chaossocial/meta/master/federation.md", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))).text
286     # DEBUG: print(f"DEBUG: raw()={len(raw)}[]={type(raw)}")
287
288     doc = bs4.BeautifulSoup(markdown.markdown(raw, extensions=extensions), features='html.parser')
289
290     # DEBUG: print(f"DEBUG: doc()={len(doc)}[]={type(doc)}")
291     silenced = doc.find("h2", {"id": "silenced-instances"}).findNext("table").find("tbody")
292     # DEBUG: print(f"DEBUG: silenced[]={type(silenced)}")
293     domains["silenced"] = domains["silenced"] + federation.find_domains(silenced)
294
295     blocked = doc.find("h2", {"id": "blocked-instances"}).findNext("table").find("tbody")
296     # DEBUG: print(f"DEBUG: blocked[]={type(blocked)}")
297     domains["reject"] = domains["reject"] + federation.find_domains(blocked)
298
299     # DEBUG: print(f"DEBUG: domains()={len(domains)}")
300     if len(domains) > 0:
301         locking.acquire()
302
303         print(f"INFO: Adding {len(domains)} new instances ...")
304         for block_level in domains:
305             # DEBUG: print(f"DEBUG: block_level='{block_level}'")
306
307             for row in domains[block_level]:
308                 # DEBUG: print(f"DEBUG: row='{row}'")
309                 if not blocks.is_instance_blocked('chaos.social', row["domain"], block_level):
310                     # DEBUG: print(f"DEBUG: domain='{row['domain']}',block_level='{block_level}' blocked by chaos.social, adding ...")
311                     blocks.add_instance('chaos.social', row["domain"], row["reason"], block_level)
312
313                 if not instances.is_registered(row["domain"]):
314                     print(f"INFO: Fetching instances from domain='{row['domain']}' ...")
315                     federation.fetch_instances(row["domain"], 'chaos.social', None, inspect.currentframe().f_code.co_name)
316
317         # DEBUG: print("DEBUG: Committing changes ...")
318         fba.connection.commit()
319
320     # DEBUG: print("DEBUG: EXIT!")
321
322 def fetch_fba_rss(args: argparse.Namespace):
323     # DEBUG: print(f"DEBUG: args[]={type(args)} - CALLED!")
324     domains = list()
325
326     print(f"INFO: Fetch FBA-specific RSS args.feed='{args.feed}' ...")
327     response = fba.fetch_url(args.feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
328
329     # DEBUG: print(f"DEBUG: response.ok={response.ok},response.status_code='{response.status_code}',response.text()={len(response.text)}")
330     if response.ok and response.status_code < 300 and len(response.text) > 0:
331         # DEBUG: print(f"DEBUG: Parsing RSS feed ({len(response.text)} Bytes) ...")
332         rss = atoma.parse_rss_bytes(response.content)
333
334         # DEBUG: print(f"DEBUG: rss[]={type(rss)}")
335         for item in rss.items:
336             # DEBUG: print(f"DEBUG: item={item}")
337             domain = item.link.split("=")[1]
338
339             if blacklist.is_blacklisted(domain):
340                 # DEBUG: print(f"DEBUG: domain='{domain}' is blacklisted - SKIPPED!")
341                 continue
342             elif domain in domains:
343                 # DEBUG: print(f"DEBUG: domain='{domain}' is already added - SKIPPED!")
344                 continue
345             elif instances.is_registered(domain):
346                 # DEBUG: print(f"DEBUG: domain='{domain}' is already registered - SKIPPED!")
347                 continue
348
349             # DEBUG: print(f"DEBUG: Adding domain='{domain}'")
350             domains.append(domain)
351
352     # DEBUG: print(f"DEBUG: domains()={len(domains)}")
353     if len(domains) > 0:
354         locking.acquire()
355
356         print(f"INFO: Adding {len(domains)} new instances ...")
357         for domain in domains:
358             print(f"INFO: Fetching instances from domain='{domain}' ...")
359             federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
360
361     # DEBUG: print("DEBUG: EXIT!")
362
363 def fetch_fbabot_atom(args: argparse.Namespace):
364     # DEBUG: print(f"DEBUG: args[]={type(args)} - CALLED!")
365     feed = "https://ryona.agency/users/fba/feed.atom"
366
367     domains = list()
368
369     print(f"INFO: Fetching ATOM feed='{feed}' from FBA bot account ...")
370     response = fba.fetch_url(feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
371
372     # DEBUG: print(f"DEBUG: response.ok={response.ok},response.status_code='{response.status_code}',response.text()={len(response.text)}")
373     if response.ok and response.status_code < 300 and len(response.text) > 0:
374         # DEBUG: print(f"DEBUG: Parsing ATOM feed ({len(response.text)} Bytes) ...")
375         atom = atoma.parse_atom_bytes(response.content)
376
377         # DEBUG: print(f"DEBUG: atom[]={type(atom)}")
378         for entry in atom.entries:
379             # DEBUG: print(f"DEBUG: entry[]={type(entry)}")
380             doc = bs4.BeautifulSoup(entry.content.value, "html.parser")
381             # DEBUG: print(f"DEBUG: doc[]={type(doc)}")
382             for element in doc.findAll("a"):
383                 for href in element["href"].split(","):
384                     # DEBUG: print(f"DEBUG: href[{type(href)}]={href}")
385                     domain = tidyup.domain(href)
386
387                     # DEBUG: print(f"DEBUG: domain='{domain}'")
388                     if blacklist.is_blacklisted(domain):
389                         # DEBUG: print(f"DEBUG: domain='{domain}' is blacklisted - SKIPPED!")
390                         continue
391                     elif domain in domains:
392                         # DEBUG: print(f"DEBUG: domain='{domain}' is already added - SKIPPED!")
393                         continue
394                     elif instances.is_registered(domain):
395                         # DEBUG: print(f"DEBUG: domain='{domain}' is already registered - SKIPPED!")
396                         continue
397
398                     # DEBUG: print(f"DEBUG: Adding domain='{domain}',domains()={len(domains)}")
399                     domains.append(domain)
400
401     # DEBUG: print(f"DEBUG: domains({len(domains)})={domains}")
402     if len(domains) > 0:
403         locking.acquire()
404
405         print(f"INFO: Adding {len(domains)} new instances ...")
406         for domain in domains:
407             print(f"INFO: Fetching instances from domain='{domain}' ...")
408             federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
409
410     # DEBUG: print("DEBUG: EXIT!")
411
412 def fetch_instances(args: argparse.Namespace):
413     # DEBUG: print(f"DEBUG: args[]={type(args)} - CALLED!")
414     locking.acquire()
415
416     # Initial fetch
417     federation.fetch_instances(args.domain, None, None, inspect.currentframe().f_code.co_name)
418
419     if args.single:
420         # DEBUG: print("DEBUG: Not fetching more instances - EXIT!")
421         return
422
423     # Loop through some instances
424     fba.cursor.execute(
425         "SELECT domain, origin, software, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'bookwyrm', 'takahe', 'lemmy') AND (last_instance_fetch IS NULL OR last_instance_fetch < ?) ORDER BY rowid DESC", [time.time() - config.get("recheck_instance")]
426     )
427
428     rows = fba.cursor.fetchall()
429     print(f"INFO: Checking {len(rows)} entries ...")
430     for row in rows:
431         # DEBUG: print(f"DEBUG: domain='{row[0]}'")
432         if blacklist.is_blacklisted(row[0]):
433             print("WARNING: domain is blacklisted:", row[0])
434             continue
435
436         print(f"INFO: Fetching instances for instance '{row[0]}' ('{row[2]}') of origin='{row[1]}',nodeinfo_url='{row[3]}'")
437         federation.fetch_instances(row[0], row[1], row[2], inspect.currentframe().f_code.co_name, row[3])
438
439     # DEBUG: print("DEBUG: EXIT!")
440
441 def fetch_federater(args: argparse.Namespace):
442     # DEBUG: print(f"DEBUG: args[]={type(args)} - CALLED!")
443     locking.acquire()
444
445     # Fetch this URL
446     response = fba.fetch_url("https://github.com/federater/blocks_recommended/raw/main/federater.csv", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
447     # DEBUG: print(f"DEBUG: response[]='{type(response)}'")
448     if response.ok and response.content != "":
449         # DEBUG: print(f"DEBUG: Fetched {len(response.content)} Bytes, parsing CSV ...")
450         ## DEBUG: print(f"DEBUG: response.content={response.content}")
451         reader = csv.DictReader(response.content.decode('utf-8').splitlines(), dialect='unix')
452         #, fieldnames='domain,severity,reject_media,reject_reports,public_comment,obfuscate'
453         # DEBUG: print(f"DEBUG: reader[]={type(reader)}")
454         for row in reader:
455             if not validators.domain(row["#domain"]):
456                 print(f"WARNING: domain='{row['#domain']}' is not a valid domain - skipped!")
457                 continue
458             elif blacklist.is_blacklisted(row["#domain"]):
459                 print(f"WARNING: domain='{row['#domain']}' is blacklisted - skipped!")
460                 continue
461             elif instances.is_registered(row["#domain"]):
462                 # DEBUG: print(f"DEBUG: domain='{row['#domain']}' is already registered - skipped!")
463                 continue
464
465             print(f"INFO: Fetching instances for instane='{row['#domain']}' ...")
466             federation.fetch_instances(row["#domain"], None, None, inspect.currentframe().f_code.co_name)
467
468     # DEBUG: print("DEBUG: EXIT!")