]> git.mxchange.org Git - fba.git/blob - fba/commands.py
514da073b71654fc65db8d31b51cb572e0b9c6ff
[fba.git] / fba / commands.py
1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
3 #
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
8 #
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 # GNU Affero General Public License for more details.
13 #
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program.  If not, see <https://www.gnu.org/licenses/>.
16
17 import argparse
18 import atoma
19 import bs4
20 import inspect
21 import itertools
22 import json
23 import re
24 import reqto
25 import sys
26 import time
27 import validators
28
29 from fba import blacklist
30 from fba import blocks
31 from fba import boot
32 from fba import config
33 from fba import fba
34 from fba import instances
35
36 from fba.federation import *
37
38 def check_instance(args: argparse.Namespace) -> int:
39     # DEBUG: print(f"DEBUG: args.domain='{args.domain}' - CALLED!")
40     status = 0
41     if not validators.domain(args.domain):
42         print(f"WARNING: args.domain='{args.domain}' is not valid")
43         status = 100
44     elif blacklist.is_blacklisted(args.domain):
45         print(f"WARNING: args.domain='{args.domain}' is blacklisted")
46         status = 101
47     elif fba.is_instance_registered(args.domain):
48         print(f"WARNING: args.domain='{args.domain}' is already registered")
49         status = 102
50     else:
51         print(f"INFO: args.domain='{args.domain}' is not known")
52
53     # DEBUG: print(f"DEBUG: status={status} - EXIT!")
54     return status
55
56 def fetch_bkali(args: argparse.Namespace):
57     # DEBUG: print(f"DEBUG: args[]={type(args)} - CALLED!")
58     domains = list()
59     try:
60         fetched = fba.post_json_api("gql.api.bka.li", "/v1/graphql", json.dumps({
61             "query": "query domainlist {nodeinfo(order_by: {domain: asc}) {domain}}"
62         }))
63
64         # DEBUG: print(f"DEBUG: fetched({len(fetched)})[]='{type(fetched)}'")
65         if len(fetched) == 0:
66             raise Exception("WARNING: Returned no records")
67         elif not "data" in fetched:
68             raise Exception(f"WARNING: fetched()={len(fetched)} does not contain key 'data'")
69         elif not "nodeinfo" in fetched["data"]:
70             raise Exception(f"WARNING: fetched()={len(fetched['data'])} does not contain key 'nodeinfo'")
71
72         for entry in fetched["data"]["nodeinfo"]:
73             # DEBUG: print(f"DEBUG: entry['{type(entry)}']='{entry}'")
74             if not "domain" in entry:
75                 print(f"WARNING: entry does not contain 'domain' - SKIPPED!")
76                 continue
77             elif not validators.domain(entry["domain"]):
78                 print(f"WARNING: domain='{entry['domain']}' is not a valid domain - SKIPPED!")
79                 continue
80             elif blacklist.is_blacklisted(entry["domain"]):
81                 # DEBUG: print(f"DEBUG: domain='{entry['domain']}' is blacklisted - SKIPPED!")
82                 continue
83             elif fba.is_instance_registered(entry["domain"]):
84                 # DEBUG: print(f"DEBUG: domain='{entry['domain']}' is already registered - SKIPPED!")
85                 continue
86
87             # DEBUG: print(f"DEBUG: Adding domain='{entry['domain']}' ...")
88             domains.append(entry["domain"])
89
90     except BaseException as e:
91         print(f"ERROR: Cannot fetch graphql,exception[{type(e)}]:'{str(e)}'")
92         sys.exit(255)
93
94     # DEBUG: print(f"DEBUG: domains()={len(domains)}")
95     if len(domains) > 0:
96         boot.acquire_lock()
97
98         print(f"INFO: Adding {len(domains)} new instances ...")
99         for domain in domains:
100             print(f"INFO: Fetching instances from domain='{domain}' ...")
101             fba.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
102
103     # DEBUG: print("DEBUG: EXIT!")
104
105 def fetch_blocks(args: argparse.Namespace):
106     # DEBUG: print(f"DEBUG: args[]={type(args)} - CALLED!")
107     if args.domain != None and args.domain != "":
108         if not validators.domain(args.domain):
109             print(f"WARNING: domain='{args.domain}' is not valid.")
110             return
111         elif blacklist.is_blacklisted(args.domain):
112             print(f"WARNING: domain='{args.domain}' is blacklisted, won't check it!")
113             return
114         elif not fba.is_instance_registered(args.domain):
115             print(f"WARNING: domain='{args.domain}' is not registered, please run ./fba.py fetch_instances {args.domain} first.")
116             return
117
118     boot.acquire_lock()
119
120     if args.domain != None and args.domain != "":
121         fba.cursor.execute(
122             "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'gotosocial', 'bookwyrm', 'takahe') AND domain = ?", [args.domain]
123         )
124     else:
125         fba.cursor.execute(
126             "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'gotosocial', 'bookwyrm', 'takahe') AND (last_blocked IS NULL OR last_blocked < ?) ORDER BY rowid DESC", [time.time() - config.get("recheck_block")]
127         )
128
129     rows = fba.cursor.fetchall()
130     print(f"INFO: Checking {len(rows)} entries ...")
131     for blocker, software, origin, nodeinfo_url in rows:
132         # DEBUG: print("DEBUG: BEFORE blocker,software,origin,nodeinfo_url:", blocker, software, origin, nodeinfo_url)
133         blockdict = list()
134         blocker = fba.tidyup_domain(blocker)
135         # DEBUG: print("DEBUG: AFTER blocker,software:", blocker, software)
136
137         if blocker == "":
138             print("WARNING: blocker is now empty!")
139             continue
140         elif blacklist.is_blacklisted(blocker):
141             print(f"WARNING: blocker='{blocker}' is blacklisted now!")
142             continue
143
144         # DEBUG: print(f"DEBUG: blocker='{blocker}'")
145         instances.update_last_blocked(blocker)
146
147         if software == "pleroma":
148             print(f"INFO: blocker='{blocker}',software='{software}'")
149             pleroma.fetch_blocks(blocker, origin, nodeinfo_url)
150         elif software == "mastodon":
151             print(f"INFO: blocker='{blocker}',software='{software}'")
152             mastodon.fetch_blocks(blocker, origin, nodeinfo_url)
153         elif software == "friendica" or software == "misskey":
154             print(f"INFO: blocker='{blocker}',software='{software}'")
155             try:
156                 if software == "friendica":
157                     json = fba.fetch_friendica_blocks(blocker)
158                 elif software == "misskey":
159                     json = fba.fetch_misskey_blocks(blocker)
160
161                 print(f"INFO: Checking {len(json.items())} entries from blocker='{blocker}',software='{software}' ...")
162                 for block_level, blocklist in json.items():
163                     # DEBUG: print("DEBUG: blocker,block_level,blocklist():", blocker, block_level, len(blocklist))
164                     block_level = fba.tidyup_domain(block_level)
165                     # DEBUG: print("DEBUG: AFTER-block_level:", block_level)
166                     if block_level == "":
167                         print("WARNING: block_level is empty, blocker:", blocker)
168                         continue
169
170                     # DEBUG: print(f"DEBUG: Checking {len(blocklist)} entries from blocker='{blocker}',software='{software}',block_level='{block_level}' ...")
171                     for block in blocklist:
172                         blocked, reason = block.values()
173                         # DEBUG: print("DEBUG: BEFORE blocked:", blocked)
174                         blocked = fba.tidyup_domain(blocked)
175                         # DEBUG: print("DEBUG: AFTER blocked:", blocked)
176
177                         if blocked == "":
178                             print("WARNING: blocked is empty:", blocker)
179                             continue
180                         elif blacklist.is_blacklisted(blocked):
181                             # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - skipping!")
182                             continue
183                         elif blocked.count("*") > 0:
184                             # Some friendica servers also obscure domains without hash
185                             fba.cursor.execute(
186                                 "SELECT domain, origin, nodeinfo_url FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", [blocked.replace("*", "_")]
187                             )
188
189                             searchres = fba.cursor.fetchone()
190
191                             if searchres == None:
192                                 print(f"WARNING: Cannot deobsfucate blocked='{blocked}' - SKIPPED!")
193                                 continue
194
195                             blocked = searchres[0]
196                             origin = searchres[1]
197                             nodeinfo_url = searchres[2]
198                         elif blocked.count("?") > 0:
199                             # Some obscure them with question marks, not sure if that's dependent on version or not
200                             fba.cursor.execute(
201                                 "SELECT domain, origin, nodeinfo_url FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", [blocked.replace("?", "_")]
202                             )
203
204                             searchres = fba.cursor.fetchone()
205
206                             if searchres == None:
207                                 print(f"WARNING: Cannot deobsfucate blocked='{blocked}' - SKIPPED!")
208                                 continue
209
210                             blocked = searchres[0]
211                             origin = searchres[1]
212                             nodeinfo_url = searchres[2]
213                         elif not validators.domain(blocked):
214                             print(f"WARNING: blocked='{blocked}',software='{software}' is not a valid domain name - skipped!")
215                             continue
216
217                         # DEBUG: print("DEBUG: Looking up instance by domain:", blocked)
218                         if not validators.domain(blocked):
219                             print(f"WARNING: blocked='{blocked}',software='{software}' is not a valid domain name - skipped!")
220                             continue
221                         elif not fba.is_instance_registered(blocked):
222                             # DEBUG: print("DEBUG: Hash wasn't found, adding:", blocked, blocker)
223                             instances.add(blocked, blocker, inspect.currentframe().f_code.co_name, nodeinfo_url)
224
225                         if not blocks.is_instance_blocked(blocker, blocked, block_level):
226                             blocks.add_instance(blocker, blocked, reason, block_level)
227
228                             if block_level == "reject":
229                                 blockdict.append({
230                                     "blocked": blocked,
231                                     "reason" : reason
232                                 })
233                         else:
234                             # DEBUG: print(f"DEBUG: Updating block last seen and reason for blocker='{blocker}',blocked='{blocked}' ...")
235                             blocks.update_last_seen(blocker, blocked, block_level)
236                             blocks.update_reason(reason, blocker, blocked, block_level)
237
238                 # DEBUG: print("DEBUG: Committing changes ...")
239                 fba.connection.commit()
240             except Exception as e:
241                 print(f"ERROR: blocker='{blocker}',software='{software}',exception[{type(e)}]:'{str(e)}'")
242         elif software == "gotosocial":
243             print(f"INFO: blocker='{blocker}',software='{software}'")
244             gotosocial.fetch_blocks(blocker, origin, nodeinfo_url)
245         else:
246             print("WARNING: Unknown software:", blocker, software)
247
248         if config.get("bot_enabled") and len(blockdict) > 0:
249             send_bot_post(blocker, blockdict)
250
251         blockdict = []
252
253     # DEBUG: print("DEBUG: EXIT!")
254
255 def fetch_cs(args: argparse.Namespace):
256     # DEBUG: print(f"DEBUG: args[]={type(args)} - CALLED!")
257     domains = {
258         "silenced": list(),
259         "blocked": list(),
260     }
261
262     try:
263         doc = bs4.BeautifulSoup(
264             fba.get_response("meta.chaos.social", "/federation", fba.headers, (config.get("connection_timeout"), config.get("read_timeout"))).text,
265             "html.parser",
266         )
267         # DEBUG: print(f"DEBUG: doc()={len(doc)}[]={type(doc)}")
268         silenced = doc.find("h2", {"id": "silenced-instances"}).findNext("table")
269
270         # DEBUG: print(f"DEBUG: silenced[]={type(silenced)}")
271         domains["silenced"] = domains["silenced"] + fba.find_domains(silenced)
272         blocked = doc.find("h2", {"id": "blocked-instances"}).findNext("table")
273
274         # DEBUG: print(f"DEBUG: blocked[]={type(blocked)}")
275         domains["blocked"] = domains["blocked"] + fba.find_domains(blocked)
276
277     except BaseException as e:
278         print(f"ERROR: Cannot fetch from meta.chaos.social,exception[{type(e)}]:'{str(e)}'")
279         sys.exit(255)
280
281     # DEBUG: print(f"DEBUG: domains()={len(domains)}")
282     if len(domains) > 0:
283         boot.acquire_lock()
284
285         print(f"INFO: Adding {len(domains)} new instances ...")
286         for block_level in domains:
287             # DEBUG: print(f"DEBUG: block_level='{block_level}'")
288
289             for row in domains[block_level]:
290                 # DEBUG: print(f"DEBUG: row='{row}'")
291                 if not fba.is_instance_registered(row["domain"]):
292                     print(f"INFO: Fetching instances from domain='{row['domain']}' ...")
293                     fba.fetch_instances(row["domain"], None, None, inspect.currentframe().f_code.co_name)
294
295                 if not blocks.is_instance_blocked('chaos.social', row["domain"], block_level):
296                     # DEBUG: print(f"DEBUG: domain='{row['domain']}',block_level='{block_level}' blocked by chaos.social, adding ...")
297                     blocks.add_instance('chaos.social', row["domain"], row["reason"], block_level)
298
299         # DEBUG: print("DEBUG: Committing changes ...")
300         fba.connection.commit()
301
302     # DEBUG: print("DEBUG: EXIT!")
303
304 def fetch_fba_rss(args: argparse.Namespace):
305     # DEBUG: print(f"DEBUG: args[]={type(args)} - CALLED!")
306     domains = list()
307
308     try:
309         print(f"INFO: Fetch FBA-specific RSS args.feed='{args.feed}' ...")
310         response = fba.get_url(args.feed, fba.headers, (config.get("connection_timeout"), config.get("read_timeout")))
311
312         # DEBUG: print(f"DEBUG: response.ok={response.ok},response.status_code='{response.status_code}',response.text()={len(response.text)}")
313         if response.ok and response.status_code < 300 and len(response.text) > 0:
314             # DEBUG: print(f"DEBUG: Parsing RSS feed ...")
315             rss = atoma.parse_rss_bytes(response.content)
316
317             # DEBUG: print(f"DEBUG: rss[]={type(rss)}")
318             for item in rss.items:
319                 # DEBUG: print(f"DEBUG: item={item}")
320                 domain = item.link.split("=")[1]
321
322                 if blacklist.is_blacklisted(domain):
323                     # DEBUG: print(f"DEBUG: domain='{domain}' is blacklisted - SKIPPED!")
324                     continue
325                 elif domain in domains:
326                     # DEBUG: print(f"DEBUG: domain='{domain}' is already added - SKIPPED!")
327                     continue
328                 elif fba.is_instance_registered(domain):
329                     # DEBUG: print(f"DEBUG: domain='{domain}' is already registered - SKIPPED!")
330                     continue
331
332                 # DEBUG: print(f"DEBUG: Adding domain='{domain}'")
333                 domains.append(domain)
334
335     except BaseException as e:
336         print(f"ERROR: Cannot fetch feed='{feed}',exception[{type(e)}]:'{str(e)}'")
337         sys.exit(255)
338
339     # DEBUG: print(f"DEBUG: domains()={len(domains)}")
340     if len(domains) > 0:
341         boot.acquire_lock()
342
343         print(f"INFO: Adding {len(domains)} new instances ...")
344         for domain in domains:
345             print(f"INFO: Fetching instances from domain='{domain}' ...")
346             fba.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
347
348     # DEBUG: print("DEBUG: EXIT!")
349
350 def fetch_fbabot_atom(args: argparse.Namespace):
351     # DEBUG: print(f"DEBUG: args[]={type(args)} - CALLED!")
352     feed = "https://ryona.agency/users/fba/feed.atom"
353
354     domains = list()
355     try:
356         print(f"INFO: Fetching ATOM feed='{feed}' from FBA bot account ...")
357         response = fba.get_url(feed, fba.headers, (config.get("connection_timeout"), config.get("read_timeout")))
358
359         # DEBUG: print(f"DEBUG: response.ok={response.ok},response.status_code='{response.status_code}',response.text()={len(response.text)}")
360         if response.ok and response.status_code < 300 and len(response.text) > 0:
361             # DEBUG: print(f"DEBUG: Parsing ATOM feed ...")
362             atom = atoma.parse_atom_bytes(response.content)
363
364             # DEBUG: print(f"DEBUG: atom[]={type(atom)}")
365             for entry in atom.entries:
366                 # DEBUG: print(f"DEBUG: entry[]={type(entry)}")
367                 doc = bs4.BeautifulSoup(entry.content.value, "html.parser")
368                 # DEBUG: print(f"DEBUG: doc[]={type(doc)}")
369                 for element in doc.findAll("a"):
370                     for href in element["href"].split(","):
371                         # DEBUG: print(f"DEBUG: href[{type(href)}]={href}")
372                         domain = fba.tidyup_domain(href)
373
374                         # DEBUG: print(f"DEBUG: domain='{domain}'")
375                         if blacklist.is_blacklisted(domain):
376                             # DEBUG: print(f"DEBUG: domain='{domain}' is blacklisted - SKIPPED!")
377                             continue
378                         elif domain in domains:
379                             # DEBUG: print(f"DEBUG: domain='{domain}' is already added - SKIPPED!")
380                             continue
381                         elif fba.is_instance_registered(domain):
382                             # DEBUG: print(f"DEBUG: domain='{domain}' is already registered - SKIPPED!")
383                             continue
384
385                         # DEBUG: print(f"DEBUG: Adding domain='{domain}',domains()={len(domains)}")
386                         domains.append(domain)
387
388     except BaseException as e:
389         print(f"ERROR: Cannot fetch feed='{feed}',exception[{type(e)}]:'{str(e)}'")
390         sys.exit(255)
391
392     # DEBUG: print(f"DEBUG: domains({len(domains)})={domains}")
393     if len(domains) > 0:
394         boot.acquire_lock()
395
396         print(f"INFO: Adding {len(domains)} new instances ...")
397         for domain in domains:
398             print(f"INFO: Fetching instances from domain='{domain}' ...")
399             fba.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
400
401     # DEBUG: print("DEBUG: EXIT!")
402
403 def fetch_instances(args: argparse.Namespace):
404     # DEBUG: print(f"DEBUG: args[]={type(args)} - CALLED!")
405     boot.acquire_lock()
406
407     # Initial fetch
408     fba.fetch_instances(args.domain, None, None, inspect.currentframe().f_code.co_name)
409
410     if args.single:
411         # DEBUG: print(f"DEBUG: Not fetching more instances - EXIT!")
412         return
413
414     # Loop through some instances
415     fba.cursor.execute(
416         "SELECT domain, origin, software, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'gotosocial', 'bookwyrm', 'takahe', 'lemmy') AND (last_instance_fetch IS NULL OR last_instance_fetch < ?) ORDER BY rowid DESC", [time.time() - config.get("recheck_instance")]
417     )
418
419     rows = fba.cursor.fetchall()
420     print(f"INFO: Checking {len(rows)} entries ...")
421     for row in rows:
422         # DEBUG: print("DEBUG: domain:", row[0])
423         if blacklist.is_blacklisted(row[0]):
424             print("WARNING: domain is blacklisted:", row[0])
425             continue
426
427         print(f"INFO: Fetching instances for instance '{row[0]}' ('{row[2]}') of origin='{row[1]}',nodeinfo_url='{row[3]}'")
428         fba.fetch_instances(row[0], row[1], row[2], inspect.currentframe().f_code.co_name, row[3])
429
430     # DEBUG: print("DEBUG: EXIT!")