]> git.mxchange.org Git - fba.git/blob - fba/commands.py
53ad51838e89ec064fcf64a1df0e85402e34217e
[fba.git] / fba / commands.py
1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
3 #
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
8 #
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 # GNU Affero General Public License for more details.
13 #
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program.  If not, see <https://www.gnu.org/licenses/>.
16
17 import argparse
18 import atoma
19 import bs4
20 import inspect
21 import itertools
22 import json
23 import re
24 import reqto
25 import sys
26 import time
27 import validators
28
29 from fba import blacklist
30 from fba import blocks
31 from fba import boot
32 from fba import config
33 from fba import fba
34 from fba import instances
35
36 from fba.federation import *
37
38 def check_instance(args: argparse.Namespace) -> int:
39     # DEBUG: print(f"DEBUG: args.domain='{args.domain}' - CALLED!")
40     status = 0
41     if not validators.domain(args.domain):
42         print(f"WARNING: args.domain='{args.domain}' is not valid")
43         status = 100
44     elif blacklist.is_blacklisted(args.domain):
45         print(f"WARNING: args.domain='{args.domain}' is blacklisted")
46         status = 101
47     elif instances.is_registered(args.domain):
48         print(f"WARNING: args.domain='{args.domain}' is already registered")
49         status = 102
50     else:
51         print(f"INFO: args.domain='{args.domain}' is not known")
52
53     # DEBUG: print(f"DEBUG: status={status} - EXIT!")
54     return status
55
56 def fetch_bkali(args: argparse.Namespace):
57     # DEBUG: print(f"DEBUG: args[]={type(args)} - CALLED!")
58     domains = list()
59     try:
60         fetched = fba.post_json_api("gql.api.bka.li", "/v1/graphql", json.dumps({
61             "query": "query domainlist {nodeinfo(order_by: {domain: asc}) {domain}}"
62         }))
63
64         # DEBUG: print(f"DEBUG: fetched({len(fetched)})[]='{type(fetched)}'")
65         if len(fetched) == 0:
66             raise Exception("WARNING: Returned no records")
67         elif not "data" in fetched:
68             raise Exception(f"WARNING: fetched()={len(fetched)} does not contain key 'data'")
69         elif not "nodeinfo" in fetched["data"]:
70             raise Exception(f"WARNING: fetched()={len(fetched['data'])} does not contain key 'nodeinfo'")
71
72         for entry in fetched["data"]["nodeinfo"]:
73             # DEBUG: print(f"DEBUG: entry['{type(entry)}']='{entry}'")
74             if not "domain" in entry:
75                 print(f"WARNING: entry does not contain 'domain' - SKIPPED!")
76                 continue
77             elif not validators.domain(entry["domain"]):
78                 print(f"WARNING: domain='{entry['domain']}' is not a valid domain - SKIPPED!")
79                 continue
80             elif blacklist.is_blacklisted(entry["domain"]):
81                 # DEBUG: print(f"DEBUG: domain='{entry['domain']}' is blacklisted - SKIPPED!")
82                 continue
83             elif instances.is_registered(entry["domain"]):
84                 # DEBUG: print(f"DEBUG: domain='{entry['domain']}' is already registered - SKIPPED!")
85                 continue
86
87             # DEBUG: print(f"DEBUG: Adding domain='{entry['domain']}' ...")
88             domains.append(entry["domain"])
89
90     except BaseException as e:
91         print(f"ERROR: Cannot fetch graphql,exception[{type(e)}]:'{str(e)}'")
92         sys.exit(255)
93
94     # DEBUG: print(f"DEBUG: domains()={len(domains)}")
95     if len(domains) > 0:
96         boot.acquire_lock()
97
98         print(f"INFO: Adding {len(domains)} new instances ...")
99         for domain in domains:
100             print(f"INFO: Fetching instances from domain='{domain}' ...")
101             fba.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
102
103     # DEBUG: print("DEBUG: EXIT!")
104
105 def fetch_blocks(args: argparse.Namespace):
106     # DEBUG: print(f"DEBUG: args[]={type(args)} - CALLED!")
107     if args.domain != None and args.domain != "":
108         # DEBUG: print(f"DEBUG: args.domain='{args.domain}' - checking ...")
109         if not validators.domain(args.domain):
110             print(f"WARNING: domain='{args.domain}' is not valid.")
111             return
112         elif blacklist.is_blacklisted(args.domain):
113             print(f"WARNING: domain='{args.domain}' is blacklisted, won't check it!")
114             return
115         elif not instances.is_registered(args.domain):
116             print(f"WARNING: domain='{args.domain}' is not registered, please run ./fba.py fetch_instances {args.domain} first.")
117             return
118
119     boot.acquire_lock()
120
121     if args.domain != None and args.domain != "":
122         # Re-check single domain
123         fba.cursor.execute(
124             "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'bookwyrm', 'takahe') AND domain = ?", [args.domain]
125         )
126     else:
127         # Re-check after "timeout" (aka. minimum interval)
128         fba.cursor.execute(
129             "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'bookwyrm', 'takahe') AND (last_blocked IS NULL OR last_blocked < ?) ORDER BY rowid DESC", [time.time() - config.get("recheck_block")]
130         )
131
132     rows = fba.cursor.fetchall()
133     print(f"INFO: Checking {len(rows)} entries ...")
134     for blocker, software, origin, nodeinfo_url in rows:
135         # DEBUG: print("DEBUG: BEFORE blocker,software,origin,nodeinfo_url:", blocker, software, origin, nodeinfo_url)
136         blockdict = list()
137         blocker = fba.tidyup_domain(blocker)
138         # DEBUG: print("DEBUG: AFTER blocker,software:", blocker, software)
139
140         if blocker == "":
141             print("WARNING: blocker is now empty!")
142             continue
143         elif blacklist.is_blacklisted(blocker):
144             print(f"WARNING: blocker='{blocker}' is blacklisted now!")
145             continue
146
147         # DEBUG: print(f"DEBUG: blocker='{blocker}'")
148         instances.update_last_blocked(blocker)
149
150         if software == "pleroma":
151             print(f"INFO: blocker='{blocker}',software='{software}'")
152             pleroma.fetch_blocks(blocker, origin, nodeinfo_url)
153         elif software == "mastodon":
154             print(f"INFO: blocker='{blocker}',software='{software}'")
155             mastodon.fetch_blocks(blocker, origin, nodeinfo_url)
156         elif software == "friendica" or software == "misskey":
157             print(f"INFO: blocker='{blocker}',software='{software}'")
158             try:
159                 if software == "friendica":
160                     json = fba.fetch_friendica_blocks(blocker)
161                 elif software == "misskey":
162                     json = fba.fetch_misskey_blocks(blocker)
163
164                 print(f"INFO: Checking {len(json.items())} entries from blocker='{blocker}',software='{software}' ...")
165                 for block_level, blocklist in json.items():
166                     # DEBUG: print("DEBUG: blocker,block_level,blocklist():", blocker, block_level, len(blocklist))
167                     block_level = fba.tidyup_domain(block_level)
168                     # DEBUG: print("DEBUG: AFTER-block_level:", block_level)
169                     if block_level == "":
170                         print("WARNING: block_level is empty, blocker:", blocker)
171                         continue
172
173                     # DEBUG: print(f"DEBUG: Checking {len(blocklist)} entries from blocker='{blocker}',software='{software}',block_level='{block_level}' ...")
174                     for block in blocklist:
175                         blocked, reason = block.values()
176                         # DEBUG: print(f"DEBUG: blocked='{blocked}',reason='{reason}' - BEFORE!")
177                         blocked = fba.tidyup_domain(blocked)
178                         reason  = fba.tidyup_reason(reason)
179                         # DEBUG: print(f"DEBUG: blocked='{blocked}',reason='{reason}' - AFTER!")
180
181                         if blocked == "":
182                             print("WARNING: blocked is empty:", blocker)
183                             continue
184                         elif blacklist.is_blacklisted(blocked):
185                             # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - skipping!")
186                             continue
187                         elif blocked.count("*") > 0:
188                             # Some friendica servers also obscure domains without hash
189                             fba.cursor.execute(
190                                 "SELECT domain, origin, nodeinfo_url FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", [blocked.replace("*", "_")]
191                             )
192
193                             searchres = fba.cursor.fetchone()
194
195                             if searchres == None:
196                                 print(f"WARNING: Cannot deobsfucate blocked='{blocked}' - SKIPPED!")
197                                 continue
198
199                             blocked = searchres[0]
200                             origin = searchres[1]
201                             nodeinfo_url = searchres[2]
202                         elif blocked.count("?") > 0:
203                             # Some obscure them with question marks, not sure if that's dependent on version or not
204                             fba.cursor.execute(
205                                 "SELECT domain, origin, nodeinfo_url FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", [blocked.replace("?", "_")]
206                             )
207
208                             searchres = fba.cursor.fetchone()
209
210                             if searchres == None:
211                                 print(f"WARNING: Cannot deobsfucate blocked='{blocked}' - SKIPPED!")
212                                 continue
213
214                             blocked = searchres[0]
215                             origin = searchres[1]
216                             nodeinfo_url = searchres[2]
217                         elif not validators.domain(blocked):
218                             print(f"WARNING: blocked='{blocked}',software='{software}' is not a valid domain name - skipped!")
219                             continue
220
221                         # DEBUG: print("DEBUG: Looking up instance by domain:", blocked)
222                         if not validators.domain(blocked):
223                             print(f"WARNING: blocked='{blocked}',software='{software}' is not a valid domain name - skipped!")
224                             continue
225                         elif not instances.is_registered(blocked):
226                             # DEBUG: print("DEBUG: Hash wasn't found, adding:", blocked, blocker)
227                             instances.add(blocked, blocker, inspect.currentframe().f_code.co_name, nodeinfo_url)
228
229                         if not blocks.is_instance_blocked(blocker, blocked, block_level):
230                             blocks.add_instance(blocker, blocked, reason, block_level)
231
232                             if block_level == "reject":
233                                 blockdict.append({
234                                     "blocked": blocked,
235                                     "reason" : reason
236                                 })
237                         else:
238                             # DEBUG: print(f"DEBUG: Updating block last seen and reason for blocker='{blocker}',blocked='{blocked}' ...")
239                             blocks.update_last_seen(blocker, blocked, block_level)
240                             blocks.update_reason(reason, blocker, blocked, block_level)
241
242                 # DEBUG: print("DEBUG: Committing changes ...")
243                 fba.connection.commit()
244             except Exception as e:
245                 print(f"ERROR: blocker='{blocker}',software='{software}',exception[{type(e)}]:'{str(e)}'")
246         else:
247             print("WARNING: Unknown software:", blocker, software)
248
249         if config.get("bot_enabled") and len(blockdict) > 0:
250             send_bot_post(blocker, blockdict)
251
252         blockdict = []
253
254     # DEBUG: print("DEBUG: EXIT!")
255
256 def fetch_cs(args: argparse.Namespace):
257     # DEBUG: print(f"DEBUG: args[]={type(args)} - CALLED!")
258     domains = {
259         "silenced": list(),
260         "blocked": list(),
261     }
262
263     try:
264         doc = bs4.BeautifulSoup(
265             fba.get_response("meta.chaos.social", "/federation", fba.headers, (config.get("connection_timeout"), config.get("read_timeout"))).text,
266             "html.parser",
267         )
268         # DEBUG: print(f"DEBUG: doc()={len(doc)}[]={type(doc)}")
269         silenced = doc.find("h2", {"id": "silenced-instances"}).findNext("table")
270
271         # DEBUG: print(f"DEBUG: silenced[]={type(silenced)}")
272         domains["silenced"] = domains["silenced"] + fba.find_domains(silenced)
273         blocked = doc.find("h2", {"id": "blocked-instances"}).findNext("table")
274
275         # DEBUG: print(f"DEBUG: blocked[]={type(blocked)}")
276         domains["blocked"] = domains["blocked"] + fba.find_domains(blocked)
277
278     except BaseException as e:
279         print(f"ERROR: Cannot fetch from meta.chaos.social,exception[{type(e)}]:'{str(e)}'")
280         sys.exit(255)
281
282     # DEBUG: print(f"DEBUG: domains()={len(domains)}")
283     if len(domains) > 0:
284         boot.acquire_lock()
285
286         print(f"INFO: Adding {len(domains)} new instances ...")
287         for block_level in domains:
288             # DEBUG: print(f"DEBUG: block_level='{block_level}'")
289
290             for row in domains[block_level]:
291                 # DEBUG: print(f"DEBUG: row='{row}'")
292                 if not instances.is_registered(row["domain"]):
293                     print(f"INFO: Fetching instances from domain='{row['domain']}' ...")
294                     fba.fetch_instances(row["domain"], None, None, inspect.currentframe().f_code.co_name)
295
296                 if not blocks.is_instance_blocked('chaos.social', row["domain"], block_level):
297                     # DEBUG: print(f"DEBUG: domain='{row['domain']}',block_level='{block_level}' blocked by chaos.social, adding ...")
298                     blocks.add_instance('chaos.social', row["domain"], row["reason"], block_level)
299
300         # DEBUG: print("DEBUG: Committing changes ...")
301         fba.connection.commit()
302
303     # DEBUG: print("DEBUG: EXIT!")
304
305 def fetch_fba_rss(args: argparse.Namespace):
306     # DEBUG: print(f"DEBUG: args[]={type(args)} - CALLED!")
307     domains = list()
308
309     try:
310         print(f"INFO: Fetch FBA-specific RSS args.feed='{args.feed}' ...")
311         response = fba.get_url(args.feed, fba.headers, (config.get("connection_timeout"), config.get("read_timeout")))
312
313         # DEBUG: print(f"DEBUG: response.ok={response.ok},response.status_code='{response.status_code}',response.text()={len(response.text)}")
314         if response.ok and response.status_code < 300 and len(response.text) > 0:
315             # DEBUG: print(f"DEBUG: Parsing RSS feed ...")
316             rss = atoma.parse_rss_bytes(response.content)
317
318             # DEBUG: print(f"DEBUG: rss[]={type(rss)}")
319             for item in rss.items:
320                 # DEBUG: print(f"DEBUG: item={item}")
321                 domain = item.link.split("=")[1]
322
323                 if blacklist.is_blacklisted(domain):
324                     # DEBUG: print(f"DEBUG: domain='{domain}' is blacklisted - SKIPPED!")
325                     continue
326                 elif domain in domains:
327                     # DEBUG: print(f"DEBUG: domain='{domain}' is already added - SKIPPED!")
328                     continue
329                 elif instances.is_registered(domain):
330                     # DEBUG: print(f"DEBUG: domain='{domain}' is already registered - SKIPPED!")
331                     continue
332
333                 # DEBUG: print(f"DEBUG: Adding domain='{domain}'")
334                 domains.append(domain)
335
336     except BaseException as e:
337         print(f"ERROR: Cannot fetch feed='{feed}',exception[{type(e)}]:'{str(e)}'")
338         sys.exit(255)
339
340     # DEBUG: print(f"DEBUG: domains()={len(domains)}")
341     if len(domains) > 0:
342         boot.acquire_lock()
343
344         print(f"INFO: Adding {len(domains)} new instances ...")
345         for domain in domains:
346             print(f"INFO: Fetching instances from domain='{domain}' ...")
347             fba.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
348
349     # DEBUG: print("DEBUG: EXIT!")
350
351 def fetch_fbabot_atom(args: argparse.Namespace):
352     # DEBUG: print(f"DEBUG: args[]={type(args)} - CALLED!")
353     feed = "https://ryona.agency/users/fba/feed.atom"
354
355     domains = list()
356     try:
357         print(f"INFO: Fetching ATOM feed='{feed}' from FBA bot account ...")
358         response = fba.get_url(feed, fba.headers, (config.get("connection_timeout"), config.get("read_timeout")))
359
360         # DEBUG: print(f"DEBUG: response.ok={response.ok},response.status_code='{response.status_code}',response.text()={len(response.text)}")
361         if response.ok and response.status_code < 300 and len(response.text) > 0:
362             # DEBUG: print(f"DEBUG: Parsing ATOM feed ...")
363             atom = atoma.parse_atom_bytes(response.content)
364
365             # DEBUG: print(f"DEBUG: atom[]={type(atom)}")
366             for entry in atom.entries:
367                 # DEBUG: print(f"DEBUG: entry[]={type(entry)}")
368                 doc = bs4.BeautifulSoup(entry.content.value, "html.parser")
369                 # DEBUG: print(f"DEBUG: doc[]={type(doc)}")
370                 for element in doc.findAll("a"):
371                     for href in element["href"].split(","):
372                         # DEBUG: print(f"DEBUG: href[{type(href)}]={href}")
373                         domain = fba.tidyup_domain(href)
374
375                         # DEBUG: print(f"DEBUG: domain='{domain}'")
376                         if blacklist.is_blacklisted(domain):
377                             # DEBUG: print(f"DEBUG: domain='{domain}' is blacklisted - SKIPPED!")
378                             continue
379                         elif domain in domains:
380                             # DEBUG: print(f"DEBUG: domain='{domain}' is already added - SKIPPED!")
381                             continue
382                         elif instances.is_registered(domain):
383                             # DEBUG: print(f"DEBUG: domain='{domain}' is already registered - SKIPPED!")
384                             continue
385
386                         # DEBUG: print(f"DEBUG: Adding domain='{domain}',domains()={len(domains)}")
387                         domains.append(domain)
388
389     except BaseException as e:
390         print(f"ERROR: Cannot fetch feed='{feed}',exception[{type(e)}]:'{str(e)}'")
391         sys.exit(255)
392
393     # DEBUG: print(f"DEBUG: domains({len(domains)})={domains}")
394     if len(domains) > 0:
395         boot.acquire_lock()
396
397         print(f"INFO: Adding {len(domains)} new instances ...")
398         for domain in domains:
399             print(f"INFO: Fetching instances from domain='{domain}' ...")
400             fba.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
401
402     # DEBUG: print("DEBUG: EXIT!")
403
404 def fetch_instances(args: argparse.Namespace):
405     # DEBUG: print(f"DEBUG: args[]={type(args)} - CALLED!")
406     boot.acquire_lock()
407
408     # Initial fetch
409     fba.fetch_instances(args.domain, None, None, inspect.currentframe().f_code.co_name)
410
411     if args.single:
412         # DEBUG: print(f"DEBUG: Not fetching more instances - EXIT!")
413         return
414
415     # Loop through some instances
416     fba.cursor.execute(
417         "SELECT domain, origin, software, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'bookwyrm', 'takahe', 'lemmy') AND (last_instance_fetch IS NULL OR last_instance_fetch < ?) ORDER BY rowid DESC", [time.time() - config.get("recheck_instance")]
418     )
419
420     rows = fba.cursor.fetchall()
421     print(f"INFO: Checking {len(rows)} entries ...")
422     for row in rows:
423         # DEBUG: print("DEBUG: domain:", row[0])
424         if blacklist.is_blacklisted(row[0]):
425             print("WARNING: domain is blacklisted:", row[0])
426             continue
427
428         print(f"INFO: Fetching instances for instance '{row[0]}' ('{row[2]}') of origin='{row[1]}',nodeinfo_url='{row[3]}'")
429         fba.fetch_instances(row[0], row[1], row[2], inspect.currentframe().f_code.co_name, row[3])
430
431     # DEBUG: print("DEBUG: EXIT!")