]> git.mxchange.org Git - fba.git/blob - fba/commands.py
56bc9ff0d9417f8d876488adb6373464b8635590
[fba.git] / fba / commands.py
1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
3 #
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
8 #
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 # GNU Affero General Public License for more details.
13 #
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program.  If not, see <https://www.gnu.org/licenses/>.
16
17 import argparse
18 import atoma
19 import bs4
20 import csv
21 import inspect
22 import itertools
23 import json
24 import re
25 import reqto
26 import sys
27 import time
28 import validators
29
30 from fba import blacklist
31 from fba import blocks
32 from fba import boot
33 from fba import config
34 from fba import fba
35 from fba import instances
36
37 from fba.federation import *
38
39 def check_instance(args: argparse.Namespace) -> int:
40     # DEBUG: print(f"DEBUG: args.domain='{args.domain}' - CALLED!")
41     status = 0
42     if not validators.domain(args.domain):
43         print(f"WARNING: args.domain='{args.domain}' is not valid")
44         status = 100
45     elif blacklist.is_blacklisted(args.domain):
46         print(f"WARNING: args.domain='{args.domain}' is blacklisted")
47         status = 101
48     elif instances.is_registered(args.domain):
49         print(f"WARNING: args.domain='{args.domain}' is already registered")
50         status = 102
51     else:
52         print(f"INFO: args.domain='{args.domain}' is not known")
53
54     # DEBUG: print(f"DEBUG: status={status} - EXIT!")
55     return status
56
57 def fetch_bkali(args: argparse.Namespace):
58     # DEBUG: print(f"DEBUG: args[]={type(args)} - CALLED!")
59     domains = list()
60     try:
61         fetched = fba.post_json_api("gql.api.bka.li", "/v1/graphql", json.dumps({
62             "query": "query domainlist {nodeinfo(order_by: {domain: asc}) {domain}}"
63         }))
64
65         # DEBUG: print(f"DEBUG: fetched({len(fetched)})[]='{type(fetched)}'")
66         if len(fetched) == 0:
67             raise Exception("WARNING: Returned no records")
68         elif not "data" in fetched:
69             raise Exception(f"WARNING: fetched()={len(fetched)} does not contain key 'data'")
70         elif not "nodeinfo" in fetched["data"]:
71             raise Exception(f"WARNING: fetched()={len(fetched['data'])} does not contain key 'nodeinfo'")
72
73         for entry in fetched["data"]["nodeinfo"]:
74             # DEBUG: print(f"DEBUG: entry['{type(entry)}']='{entry}'")
75             if not "domain" in entry:
76                 print(f"WARNING: entry does not contain 'domain' - SKIPPED!")
77                 continue
78             elif not validators.domain(entry["domain"]):
79                 print(f"WARNING: domain='{entry['domain']}' is not a valid domain - SKIPPED!")
80                 continue
81             elif blacklist.is_blacklisted(entry["domain"]):
82                 # DEBUG: print(f"DEBUG: domain='{entry['domain']}' is blacklisted - SKIPPED!")
83                 continue
84             elif instances.is_registered(entry["domain"]):
85                 # DEBUG: print(f"DEBUG: domain='{entry['domain']}' is already registered - SKIPPED!")
86                 continue
87
88             # DEBUG: print(f"DEBUG: Adding domain='{entry['domain']}' ...")
89             domains.append(entry["domain"])
90
91     except BaseException as e:
92         print(f"ERROR: Cannot fetch graphql,exception[{type(e)}]:'{str(e)}'")
93         sys.exit(255)
94
95     # DEBUG: print(f"DEBUG: domains()={len(domains)}")
96     if len(domains) > 0:
97         boot.acquire_lock()
98
99         print(f"INFO: Adding {len(domains)} new instances ...")
100         for domain in domains:
101             print(f"INFO: Fetching instances from domain='{domain}' ...")
102             fba.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
103
104     # DEBUG: print("DEBUG: EXIT!")
105
106 def fetch_blocks(args: argparse.Namespace):
107     # DEBUG: print(f"DEBUG: args[]={type(args)} - CALLED!")
108     if args.domain != None and args.domain != "":
109         # DEBUG: print(f"DEBUG: args.domain='{args.domain}' - checking ...")
110         if not validators.domain(args.domain):
111             print(f"WARNING: domain='{args.domain}' is not valid.")
112             return
113         elif blacklist.is_blacklisted(args.domain):
114             print(f"WARNING: domain='{args.domain}' is blacklisted, won't check it!")
115             return
116         elif not instances.is_registered(args.domain):
117             print(f"WARNING: domain='{args.domain}' is not registered, please run ./fba.py fetch_instances {args.domain} first.")
118             return
119
120     boot.acquire_lock()
121
122     if args.domain != None and args.domain != "":
123         # Re-check single domain
124         fba.cursor.execute(
125             "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'bookwyrm', 'takahe') AND domain = ?", [args.domain]
126         )
127     else:
128         # Re-check after "timeout" (aka. minimum interval)
129         fba.cursor.execute(
130             "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'bookwyrm', 'takahe') AND (last_blocked IS NULL OR last_blocked < ?) ORDER BY rowid DESC", [time.time() - config.get("recheck_block")]
131         )
132
133     rows = fba.cursor.fetchall()
134     print(f"INFO: Checking {len(rows)} entries ...")
135     for blocker, software, origin, nodeinfo_url in rows:
136         # DEBUG: print("DEBUG: BEFORE blocker,software,origin,nodeinfo_url:", blocker, software, origin, nodeinfo_url)
137         blockdict = list()
138         blocker = fba.tidyup_domain(blocker)
139         # DEBUG: print("DEBUG: AFTER blocker,software:", blocker, software)
140
141         if blocker == "":
142             print("WARNING: blocker is now empty!")
143             continue
144         elif blacklist.is_blacklisted(blocker):
145             print(f"WARNING: blocker='{blocker}' is blacklisted now!")
146             continue
147
148         # DEBUG: print(f"DEBUG: blocker='{blocker}'")
149         instances.update_last_blocked(blocker)
150
151         if software == "pleroma":
152             print(f"INFO: blocker='{blocker}',software='{software}'")
153             pleroma.fetch_blocks(blocker, origin, nodeinfo_url)
154         elif software == "mastodon":
155             print(f"INFO: blocker='{blocker}',software='{software}'")
156             mastodon.fetch_blocks(blocker, origin, nodeinfo_url)
157         elif software == "friendica" or software == "misskey":
158             print(f"INFO: blocker='{blocker}',software='{software}'")
159             try:
160                 if software == "friendica":
161                     json = fba.fetch_friendica_blocks(blocker)
162                 elif software == "misskey":
163                     json = misskey.fetch_blocks(blocker)
164
165                 print(f"INFO: Checking {len(json.items())} entries from blocker='{blocker}',software='{software}' ...")
166                 for block_level, blocklist in json.items():
167                     # DEBUG: print("DEBUG: blocker,block_level,blocklist():", blocker, block_level, len(blocklist))
168                     block_level = fba.tidyup_domain(block_level)
169                     # DEBUG: print("DEBUG: AFTER-block_level:", block_level)
170                     if block_level == "":
171                         print("WARNING: block_level is empty, blocker:", blocker)
172                         continue
173
174                     # DEBUG: print(f"DEBUG: Checking {len(blocklist)} entries from blocker='{blocker}',software='{software}',block_level='{block_level}' ...")
175                     for block in blocklist:
176                         blocked, reason = block.values()
177                         # DEBUG: print(f"DEBUG: blocked='{blocked}',reason='{reason}' - BEFORE!")
178                         blocked = fba.tidyup_domain(blocked)
179                         reason  = fba.tidyup_reason(reason) if reason != None and reason != "" else None
180                         # DEBUG: print(f"DEBUG: blocked='{blocked}',reason='{reason}' - AFTER!")
181
182                         if blocked == "":
183                             print("WARNING: blocked is empty:", blocker)
184                             continue
185                         elif blacklist.is_blacklisted(blocked):
186                             # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - skipping!")
187                             continue
188                         elif blocked.count("*") > 0:
189                             # Some friendica servers also obscure domains without hash
190                             fba.cursor.execute(
191                                 "SELECT domain, origin, nodeinfo_url FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", [blocked.replace("*", "_")]
192                             )
193
194                             searchres = fba.cursor.fetchone()
195
196                             if searchres == None:
197                                 print(f"WARNING: Cannot deobsfucate blocked='{blocked}' - SKIPPED!")
198                                 continue
199
200                             blocked = searchres[0]
201                             origin = searchres[1]
202                             nodeinfo_url = searchres[2]
203                         elif blocked.count("?") > 0:
204                             # Some obscure them with question marks, not sure if that's dependent on version or not
205                             fba.cursor.execute(
206                                 "SELECT domain, origin, nodeinfo_url FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", [blocked.replace("?", "_")]
207                             )
208
209                             searchres = fba.cursor.fetchone()
210
211                             if searchres == None:
212                                 print(f"WARNING: Cannot deobsfucate blocked='{blocked}' - SKIPPED!")
213                                 continue
214
215                             blocked = searchres[0]
216                             origin = searchres[1]
217                             nodeinfo_url = searchres[2]
218                         elif not validators.domain(blocked):
219                             print(f"WARNING: blocked='{blocked}',software='{software}' is not a valid domain name - skipped!")
220                             continue
221
222                         # DEBUG: print("DEBUG: Looking up instance by domain:", blocked)
223                         if not validators.domain(blocked):
224                             print(f"WARNING: blocked='{blocked}',software='{software}' is not a valid domain name - skipped!")
225                             continue
226                         elif not instances.is_registered(blocked):
227                             # DEBUG: print("DEBUG: Hash wasn't found, adding:", blocked, blocker)
228                             instances.add(blocked, blocker, inspect.currentframe().f_code.co_name, nodeinfo_url)
229
230                         if not blocks.is_instance_blocked(blocker, blocked, block_level):
231                             blocks.add_instance(blocker, blocked, reason, block_level)
232
233                             if block_level == "reject":
234                                 blockdict.append({
235                                     "blocked": blocked,
236                                     "reason" : reason
237                                 })
238                         else:
239                             # DEBUG: print(f"DEBUG: Updating block last seen and reason for blocker='{blocker}',blocked='{blocked}' ...")
240                             blocks.update_last_seen(blocker, blocked, block_level)
241                             blocks.update_reason(reason, blocker, blocked, block_level)
242
243                 # DEBUG: print("DEBUG: Committing changes ...")
244                 fba.connection.commit()
245             except Exception as e:
246                 print(f"ERROR: blocker='{blocker}',software='{software}',exception[{type(e)}]:'{str(e)}'")
247         else:
248             print("WARNING: Unknown software:", blocker, software)
249
250         if config.get("bot_enabled") and len(blockdict) > 0:
251             send_bot_post(blocker, blockdict)
252
253         blockdict = []
254
255     # DEBUG: print("DEBUG: EXIT!")
256
257 def fetch_cs(args: argparse.Namespace):
258     # DEBUG: print(f"DEBUG: args[]={type(args)} - CALLED!")
259     domains = {
260         "silenced": list(),
261         "reject"  : list(),
262     }
263
264     try:
265         doc = bs4.BeautifulSoup(
266             fba.get_response("meta.chaos.social", "/federation", fba.headers, (config.get("connection_timeout"), config.get("read_timeout"))).text,
267             "html.parser",
268         )
269         # DEBUG: print(f"DEBUG: doc()={len(doc)}[]={type(doc)}")
270         silenced = doc.find("h2", {"id": "silenced-instances"}).findNext("table")
271
272         # DEBUG: print(f"DEBUG: silenced[]={type(silenced)}")
273         domains["silenced"] = domains["silenced"] + fba.find_domains(silenced)
274         blocked = doc.find("h2", {"id": "blocked-instances"}).findNext("table")
275
276         # DEBUG: print(f"DEBUG: blocked[]={type(blocked)}")
277         domains["reject"] = domains["reject"] + fba.find_domains(blocked)
278
279     except BaseException as e:
280         print(f"ERROR: Cannot fetch from meta.chaos.social,exception[{type(e)}]:'{str(e)}'")
281         sys.exit(255)
282
283     # DEBUG: print(f"DEBUG: domains()={len(domains)}")
284     if len(domains) > 0:
285         boot.acquire_lock()
286
287         print(f"INFO: Adding {len(domains)} new instances ...")
288         for block_level in domains:
289             # DEBUG: print(f"DEBUG: block_level='{block_level}'")
290
291             for row in domains[block_level]:
292                 # DEBUG: print(f"DEBUG: row='{row}'")
293                 if not blocks.is_instance_blocked('chaos.social', row["domain"], block_level):
294                     # DEBUG: print(f"DEBUG: domain='{row['domain']}',block_level='{block_level}' blocked by chaos.social, adding ...")
295                     blocks.add_instance('chaos.social', row["domain"], row["reason"], block_level)
296
297                 if not instances.is_registered(row["domain"]):
298                     print(f"INFO: Fetching instances from domain='{row['domain']}' ...")
299                     fba.fetch_instances(row["domain"], 'chaos.social', None, inspect.currentframe().f_code.co_name)
300
301         # DEBUG: print("DEBUG: Committing changes ...")
302         fba.connection.commit()
303
304     # DEBUG: print("DEBUG: EXIT!")
305
306 def fetch_fba_rss(args: argparse.Namespace):
307     # DEBUG: print(f"DEBUG: args[]={type(args)} - CALLED!")
308     domains = list()
309
310     try:
311         print(f"INFO: Fetch FBA-specific RSS args.feed='{args.feed}' ...")
312         response = fba.fetch_url(args.feed, fba.headers, (config.get("connection_timeout"), config.get("read_timeout")))
313
314         # DEBUG: print(f"DEBUG: response.ok={response.ok},response.status_code='{response.status_code}',response.text()={len(response.text)}")
315         if response.ok and response.status_code < 300 and len(response.text) > 0:
316             # DEBUG: print(f"DEBUG: Parsing RSS feed ...")
317             rss = atoma.parse_rss_bytes(response.content)
318
319             # DEBUG: print(f"DEBUG: rss[]={type(rss)}")
320             for item in rss.items:
321                 # DEBUG: print(f"DEBUG: item={item}")
322                 domain = item.link.split("=")[1]
323
324                 if blacklist.is_blacklisted(domain):
325                     # DEBUG: print(f"DEBUG: domain='{domain}' is blacklisted - SKIPPED!")
326                     continue
327                 elif domain in domains:
328                     # DEBUG: print(f"DEBUG: domain='{domain}' is already added - SKIPPED!")
329                     continue
330                 elif instances.is_registered(domain):
331                     # DEBUG: print(f"DEBUG: domain='{domain}' is already registered - SKIPPED!")
332                     continue
333
334                 # DEBUG: print(f"DEBUG: Adding domain='{domain}'")
335                 domains.append(domain)
336
337     except BaseException as e:
338         print(f"ERROR: Cannot fetch feed='{feed}',exception[{type(e)}]:'{str(e)}'")
339         sys.exit(255)
340
341     # DEBUG: print(f"DEBUG: domains()={len(domains)}")
342     if len(domains) > 0:
343         boot.acquire_lock()
344
345         print(f"INFO: Adding {len(domains)} new instances ...")
346         for domain in domains:
347             print(f"INFO: Fetching instances from domain='{domain}' ...")
348             fba.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
349
350     # DEBUG: print("DEBUG: EXIT!")
351
352 def fetch_fbabot_atom(args: argparse.Namespace):
353     # DEBUG: print(f"DEBUG: args[]={type(args)} - CALLED!")
354     feed = "https://ryona.agency/users/fba/feed.atom"
355
356     domains = list()
357     try:
358         print(f"INFO: Fetching ATOM feed='{feed}' from FBA bot account ...")
359         response = fba.fetch_url(feed, fba.headers, (config.get("connection_timeout"), config.get("read_timeout")))
360
361         # DEBUG: print(f"DEBUG: response.ok={response.ok},response.status_code='{response.status_code}',response.text()={len(response.text)}")
362         if response.ok and response.status_code < 300 and len(response.text) > 0:
363             # DEBUG: print(f"DEBUG: Parsing ATOM feed ...")
364             atom = atoma.parse_atom_bytes(response.content)
365
366             # DEBUG: print(f"DEBUG: atom[]={type(atom)}")
367             for entry in atom.entries:
368                 # DEBUG: print(f"DEBUG: entry[]={type(entry)}")
369                 doc = bs4.BeautifulSoup(entry.content.value, "html.parser")
370                 # DEBUG: print(f"DEBUG: doc[]={type(doc)}")
371                 for element in doc.findAll("a"):
372                     for href in element["href"].split(","):
373                         # DEBUG: print(f"DEBUG: href[{type(href)}]={href}")
374                         domain = fba.tidyup_domain(href)
375
376                         # DEBUG: print(f"DEBUG: domain='{domain}'")
377                         if blacklist.is_blacklisted(domain):
378                             # DEBUG: print(f"DEBUG: domain='{domain}' is blacklisted - SKIPPED!")
379                             continue
380                         elif domain in domains:
381                             # DEBUG: print(f"DEBUG: domain='{domain}' is already added - SKIPPED!")
382                             continue
383                         elif instances.is_registered(domain):
384                             # DEBUG: print(f"DEBUG: domain='{domain}' is already registered - SKIPPED!")
385                             continue
386
387                         # DEBUG: print(f"DEBUG: Adding domain='{domain}',domains()={len(domains)}")
388                         domains.append(domain)
389
390     except BaseException as e:
391         print(f"ERROR: Cannot fetch feed='{feed}',exception[{type(e)}]:'{str(e)}'")
392         sys.exit(255)
393
394     # DEBUG: print(f"DEBUG: domains({len(domains)})={domains}")
395     if len(domains) > 0:
396         boot.acquire_lock()
397
398         print(f"INFO: Adding {len(domains)} new instances ...")
399         for domain in domains:
400             print(f"INFO: Fetching instances from domain='{domain}' ...")
401             fba.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
402
403     # DEBUG: print("DEBUG: EXIT!")
404
405 def fetch_instances(args: argparse.Namespace):
406     # DEBUG: print(f"DEBUG: args[]={type(args)} - CALLED!")
407     boot.acquire_lock()
408
409     # Initial fetch
410     fba.fetch_instances(args.domain, None, None, inspect.currentframe().f_code.co_name)
411
412     if args.single:
413         # DEBUG: print(f"DEBUG: Not fetching more instances - EXIT!")
414         return
415
416     # Loop through some instances
417     fba.cursor.execute(
418         "SELECT domain, origin, software, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'bookwyrm', 'takahe', 'lemmy') AND (last_instance_fetch IS NULL OR last_instance_fetch < ?) ORDER BY rowid DESC", [time.time() - config.get("recheck_instance")]
419     )
420
421     rows = fba.cursor.fetchall()
422     print(f"INFO: Checking {len(rows)} entries ...")
423     for row in rows:
424         # DEBUG: print("DEBUG: domain:", row[0])
425         if blacklist.is_blacklisted(row[0]):
426             print("WARNING: domain is blacklisted:", row[0])
427             continue
428
429         print(f"INFO: Fetching instances for instance '{row[0]}' ('{row[2]}') of origin='{row[1]}',nodeinfo_url='{row[3]}'")
430         fba.fetch_instances(row[0], row[1], row[2], inspect.currentframe().f_code.co_name, row[3])
431
432     # DEBUG: print("DEBUG: EXIT!")
433
434 def fetch_federater(args: argparse.Namespace):
435     # DEBUG: print(f"DEBUG: args[]={type(args)} - CALLED!")
436     boot.acquire_lock()
437
438     # Fetch this URL
439     response = fba.fetch_url("https://github.com/federater/blocks_recommended/raw/main/federater.csv", fba.headers, (config.get("connection_timeout"), config.get("read_timeout")))
440     # DEBUG: print(f"DEBUG: response[]='{type(response)}'")
441     if response.ok and response.content != "":
442         # DEBUG: print(f"DEBUG: Fetched {len(response.content)} Bytes, parsing CSV ...")
443         #print(f"DEBUG: response.content={response.content}")
444         reader = csv.DictReader(response.content.decode('utf-8').splitlines(), dialect='unix')
445         #, fieldnames='domain,severity,reject_media,reject_reports,public_comment,obfuscate'
446         # DEBUG: print(f"DEBUG: reader[]={type(reader)}")
447         for row in reader:
448             if not validators.domain(row["#domain"]):
449                 print(f"WARNING: domain='{row['#domain']}' is not a valid domain - skipped!")
450                 continue
451             elif blacklist.is_blacklisted(row["#domain"]):
452                 print(f"WARNING: domain='{row['#domain']}' is blacklisted - skipped!")
453                 continue
454             elif instances.is_registered(row["#domain"]):
455                 # DEBUG: print(f"DEBUG: domain='{row['#domain']}' is already registered - skipped!")
456                 continue
457
458             print(f"INFO: Fetching instances for instane='{row['#domain']}' ...")
459             fba.fetch_instances(row["#domain"], None, None, inspect.currentframe().f_code.co_name)
460
461     # DEBUG: print("DEBUG: EXIT!")