]> git.mxchange.org Git - fba.git/blob - fba/commands.py
Continued:
[fba.git] / fba / commands.py
1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
3 #
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
8 #
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 # GNU Affero General Public License for more details.
13 #
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program.  If not, see <https://www.gnu.org/licenses/>.
16
17 import argparse
18 import atoma
19 import bs4
20 import inspect
21 import itertools
22 import json
23 import re
24 import reqto
25 import sys
26 import time
27 import validators
28
29 from fba import blacklist
30 from fba import blocks
31 from fba import boot
32 from fba import config
33 from fba import fba
34 from fba import instances
35
36 from fba.federation import *
37
38 def check_instance(args: argparse.Namespace) -> int:
39     # DEBUG: print(f"DEBUG: args.domain='{args.domain}' - CALLED!")
40     status = 0
41     if not validators.domain(args.domain):
42         print(f"WARNING: args.domain='{args.domain}' is not valid")
43         status = 100
44     elif blacklist.is_blacklisted(args.domain):
45         print(f"WARNING: args.domain='{args.domain}' is blacklisted")
46         status = 101
47     elif instances.is_registered(args.domain):
48         print(f"WARNING: args.domain='{args.domain}' is already registered")
49         status = 102
50     else:
51         print(f"INFO: args.domain='{args.domain}' is not known")
52
53     # DEBUG: print(f"DEBUG: status={status} - EXIT!")
54     return status
55
56 def fetch_bkali(args: argparse.Namespace):
57     # DEBUG: print(f"DEBUG: args[]={type(args)} - CALLED!")
58     domains = list()
59     try:
60         fetched = fba.post_json_api("gql.api.bka.li", "/v1/graphql", json.dumps({
61             "query": "query domainlist {nodeinfo(order_by: {domain: asc}) {domain}}"
62         }))
63
64         # DEBUG: print(f"DEBUG: fetched({len(fetched)})[]='{type(fetched)}'")
65         if len(fetched) == 0:
66             raise Exception("WARNING: Returned no records")
67         elif not "data" in fetched:
68             raise Exception(f"WARNING: fetched()={len(fetched)} does not contain key 'data'")
69         elif not "nodeinfo" in fetched["data"]:
70             raise Exception(f"WARNING: fetched()={len(fetched['data'])} does not contain key 'nodeinfo'")
71
72         for entry in fetched["data"]["nodeinfo"]:
73             # DEBUG: print(f"DEBUG: entry['{type(entry)}']='{entry}'")
74             if not "domain" in entry:
75                 print(f"WARNING: entry does not contain 'domain' - SKIPPED!")
76                 continue
77             elif not validators.domain(entry["domain"]):
78                 print(f"WARNING: domain='{entry['domain']}' is not a valid domain - SKIPPED!")
79                 continue
80             elif blacklist.is_blacklisted(entry["domain"]):
81                 # DEBUG: print(f"DEBUG: domain='{entry['domain']}' is blacklisted - SKIPPED!")
82                 continue
83             elif instances.is_registered(entry["domain"]):
84                 # DEBUG: print(f"DEBUG: domain='{entry['domain']}' is already registered - SKIPPED!")
85                 continue
86
87             # DEBUG: print(f"DEBUG: Adding domain='{entry['domain']}' ...")
88             domains.append(entry["domain"])
89
90     except BaseException as e:
91         print(f"ERROR: Cannot fetch graphql,exception[{type(e)}]:'{str(e)}'")
92         sys.exit(255)
93
94     # DEBUG: print(f"DEBUG: domains()={len(domains)}")
95     if len(domains) > 0:
96         boot.acquire_lock()
97
98         print(f"INFO: Adding {len(domains)} new instances ...")
99         for domain in domains:
100             print(f"INFO: Fetching instances from domain='{domain}' ...")
101             fba.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
102
103     # DEBUG: print("DEBUG: EXIT!")
104
105 def fetch_blocks(args: argparse.Namespace):
106     # DEBUG: print(f"DEBUG: args[]={type(args)} - CALLED!")
107     if args.domain != None and args.domain != "":
108         # DEBUG: print(f"DEBUG: args.domain='{args.domain}' - checking ...")
109         if not validators.domain(args.domain):
110             print(f"WARNING: domain='{args.domain}' is not valid.")
111             return
112         elif blacklist.is_blacklisted(args.domain):
113             print(f"WARNING: domain='{args.domain}' is blacklisted, won't check it!")
114             return
115         elif not instances.is_registered(args.domain):
116             print(f"WARNING: domain='{args.domain}' is not registered, please run ./fba.py fetch_instances {args.domain} first.")
117             return
118
119     boot.acquire_lock()
120
121     if args.domain != None and args.domain != "":
122         # Re-check single domain
123         fba.cursor.execute(
124             "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'gotosocial', 'bookwyrm', 'takahe') AND domain = ?", [args.domain]
125         )
126     else:
127         # Re-check after "timeout" (aka. minimum interval)
128         fba.cursor.execute(
129             "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'gotosocial', 'bookwyrm', 'takahe') AND (last_blocked IS NULL OR last_blocked < ?) ORDER BY rowid DESC", [time.time() - config.get("recheck_block")]
130         )
131
132     rows = fba.cursor.fetchall()
133     print(f"INFO: Checking {len(rows)} entries ...")
134     for blocker, software, origin, nodeinfo_url in rows:
135         # DEBUG: print("DEBUG: BEFORE blocker,software,origin,nodeinfo_url:", blocker, software, origin, nodeinfo_url)
136         blockdict = list()
137         blocker = fba.tidyup_domain(blocker)
138         # DEBUG: print("DEBUG: AFTER blocker,software:", blocker, software)
139
140         if blocker == "":
141             print("WARNING: blocker is now empty!")
142             continue
143         elif blacklist.is_blacklisted(blocker):
144             print(f"WARNING: blocker='{blocker}' is blacklisted now!")
145             continue
146
147         # DEBUG: print(f"DEBUG: blocker='{blocker}'")
148         instances.update_last_blocked(blocker)
149
150         if software == "pleroma":
151             print(f"INFO: blocker='{blocker}',software='{software}'")
152             pleroma.fetch_blocks(blocker, origin, nodeinfo_url)
153         elif software == "mastodon":
154             print(f"INFO: blocker='{blocker}',software='{software}'")
155             mastodon.fetch_blocks(blocker, origin, nodeinfo_url)
156         elif software == "friendica" or software == "misskey":
157             print(f"INFO: blocker='{blocker}',software='{software}'")
158             try:
159                 if software == "friendica":
160                     json = fba.fetch_friendica_blocks(blocker)
161                 elif software == "misskey":
162                     json = fba.fetch_misskey_blocks(blocker)
163
164                 print(f"INFO: Checking {len(json.items())} entries from blocker='{blocker}',software='{software}' ...")
165                 for block_level, blocklist in json.items():
166                     # DEBUG: print("DEBUG: blocker,block_level,blocklist():", blocker, block_level, len(blocklist))
167                     block_level = fba.tidyup_domain(block_level)
168                     # DEBUG: print("DEBUG: AFTER-block_level:", block_level)
169                     if block_level == "":
170                         print("WARNING: block_level is empty, blocker:", blocker)
171                         continue
172
173                     # DEBUG: print(f"DEBUG: Checking {len(blocklist)} entries from blocker='{blocker}',software='{software}',block_level='{block_level}' ...")
174                     for block in blocklist:
175                         blocked, reason = block.values()
176                         # DEBUG: print(f"DEBUG: blocked='{blocked}',reason='{reason}' - BEFORE!")
177                         blocked = fba.tidyup_domain(blocked)
178                         reason  = fba.tidyup_reason(reason)
179                         # DEBUG: print(f"DEBUG: blocked='{blocked}',reason='{reason}' - AFTER!")
180
181                         if blocked == "":
182                             print("WARNING: blocked is empty:", blocker)
183                             continue
184                         elif blacklist.is_blacklisted(blocked):
185                             # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - skipping!")
186                             continue
187                         elif blocked.count("*") > 0:
188                             # Some friendica servers also obscure domains without hash
189                             fba.cursor.execute(
190                                 "SELECT domain, origin, nodeinfo_url FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", [blocked.replace("*", "_")]
191                             )
192
193                             searchres = fba.cursor.fetchone()
194
195                             if searchres == None:
196                                 print(f"WARNING: Cannot deobsfucate blocked='{blocked}' - SKIPPED!")
197                                 continue
198
199                             blocked = searchres[0]
200                             origin = searchres[1]
201                             nodeinfo_url = searchres[2]
202                         elif blocked.count("?") > 0:
203                             # Some obscure them with question marks, not sure if that's dependent on version or not
204                             fba.cursor.execute(
205                                 "SELECT domain, origin, nodeinfo_url FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", [blocked.replace("?", "_")]
206                             )
207
208                             searchres = fba.cursor.fetchone()
209
210                             if searchres == None:
211                                 print(f"WARNING: Cannot deobsfucate blocked='{blocked}' - SKIPPED!")
212                                 continue
213
214                             blocked = searchres[0]
215                             origin = searchres[1]
216                             nodeinfo_url = searchres[2]
217                         elif not validators.domain(blocked):
218                             print(f"WARNING: blocked='{blocked}',software='{software}' is not a valid domain name - skipped!")
219                             continue
220
221                         # DEBUG: print("DEBUG: Looking up instance by domain:", blocked)
222                         if not validators.domain(blocked):
223                             print(f"WARNING: blocked='{blocked}',software='{software}' is not a valid domain name - skipped!")
224                             continue
225                         elif not instances.is_registered(blocked):
226                             # DEBUG: print("DEBUG: Hash wasn't found, adding:", blocked, blocker)
227                             instances.add(blocked, blocker, inspect.currentframe().f_code.co_name, nodeinfo_url)
228
229                         if not blocks.is_instance_blocked(blocker, blocked, block_level):
230                             blocks.add_instance(blocker, blocked, reason, block_level)
231
232                             if block_level == "reject":
233                                 blockdict.append({
234                                     "blocked": blocked,
235                                     "reason" : reason
236                                 })
237                         else:
238                             # DEBUG: print(f"DEBUG: Updating block last seen and reason for blocker='{blocker}',blocked='{blocked}' ...")
239                             blocks.update_last_seen(blocker, blocked, block_level)
240                             blocks.update_reason(reason, blocker, blocked, block_level)
241
242                 # DEBUG: print("DEBUG: Committing changes ...")
243                 fba.connection.commit()
244             except Exception as e:
245                 print(f"ERROR: blocker='{blocker}',software='{software}',exception[{type(e)}]:'{str(e)}'")
246         elif software == "gotosocial":
247             print(f"INFO: blocker='{blocker}',software='{software}'")
248             gotosocial.fetch_blocks(blocker, origin, nodeinfo_url)
249         else:
250             print("WARNING: Unknown software:", blocker, software)
251
252         if config.get("bot_enabled") and len(blockdict) > 0:
253             send_bot_post(blocker, blockdict)
254
255         blockdict = []
256
257     # DEBUG: print("DEBUG: EXIT!")
258
259 def fetch_cs(args: argparse.Namespace):
260     # DEBUG: print(f"DEBUG: args[]={type(args)} - CALLED!")
261     domains = {
262         "silenced": list(),
263         "blocked": list(),
264     }
265
266     try:
267         doc = bs4.BeautifulSoup(
268             fba.get_response("meta.chaos.social", "/federation", fba.headers, (config.get("connection_timeout"), config.get("read_timeout"))).text,
269             "html.parser",
270         )
271         # DEBUG: print(f"DEBUG: doc()={len(doc)}[]={type(doc)}")
272         silenced = doc.find("h2", {"id": "silenced-instances"}).findNext("table")
273
274         # DEBUG: print(f"DEBUG: silenced[]={type(silenced)}")
275         domains["silenced"] = domains["silenced"] + fba.find_domains(silenced)
276         blocked = doc.find("h2", {"id": "blocked-instances"}).findNext("table")
277
278         # DEBUG: print(f"DEBUG: blocked[]={type(blocked)}")
279         domains["blocked"] = domains["blocked"] + fba.find_domains(blocked)
280
281     except BaseException as e:
282         print(f"ERROR: Cannot fetch from meta.chaos.social,exception[{type(e)}]:'{str(e)}'")
283         sys.exit(255)
284
285     # DEBUG: print(f"DEBUG: domains()={len(domains)}")
286     if len(domains) > 0:
287         boot.acquire_lock()
288
289         print(f"INFO: Adding {len(domains)} new instances ...")
290         for block_level in domains:
291             # DEBUG: print(f"DEBUG: block_level='{block_level}'")
292
293             for row in domains[block_level]:
294                 # DEBUG: print(f"DEBUG: row='{row}'")
295                 if not instances.is_registered(row["domain"]):
296                     print(f"INFO: Fetching instances from domain='{row['domain']}' ...")
297                     fba.fetch_instances(row["domain"], None, None, inspect.currentframe().f_code.co_name)
298
299                 if not blocks.is_instance_blocked('chaos.social', row["domain"], block_level):
300                     # DEBUG: print(f"DEBUG: domain='{row['domain']}',block_level='{block_level}' blocked by chaos.social, adding ...")
301                     blocks.add_instance('chaos.social', row["domain"], row["reason"], block_level)
302
303         # DEBUG: print("DEBUG: Committing changes ...")
304         fba.connection.commit()
305
306     # DEBUG: print("DEBUG: EXIT!")
307
308 def fetch_fba_rss(args: argparse.Namespace):
309     # DEBUG: print(f"DEBUG: args[]={type(args)} - CALLED!")
310     domains = list()
311
312     try:
313         print(f"INFO: Fetch FBA-specific RSS args.feed='{args.feed}' ...")
314         response = fba.get_url(args.feed, fba.headers, (config.get("connection_timeout"), config.get("read_timeout")))
315
316         # DEBUG: print(f"DEBUG: response.ok={response.ok},response.status_code='{response.status_code}',response.text()={len(response.text)}")
317         if response.ok and response.status_code < 300 and len(response.text) > 0:
318             # DEBUG: print(f"DEBUG: Parsing RSS feed ...")
319             rss = atoma.parse_rss_bytes(response.content)
320
321             # DEBUG: print(f"DEBUG: rss[]={type(rss)}")
322             for item in rss.items:
323                 # DEBUG: print(f"DEBUG: item={item}")
324                 domain = item.link.split("=")[1]
325
326                 if blacklist.is_blacklisted(domain):
327                     # DEBUG: print(f"DEBUG: domain='{domain}' is blacklisted - SKIPPED!")
328                     continue
329                 elif domain in domains:
330                     # DEBUG: print(f"DEBUG: domain='{domain}' is already added - SKIPPED!")
331                     continue
332                 elif instances.is_registered(domain):
333                     # DEBUG: print(f"DEBUG: domain='{domain}' is already registered - SKIPPED!")
334                     continue
335
336                 # DEBUG: print(f"DEBUG: Adding domain='{domain}'")
337                 domains.append(domain)
338
339     except BaseException as e:
340         print(f"ERROR: Cannot fetch feed='{feed}',exception[{type(e)}]:'{str(e)}'")
341         sys.exit(255)
342
343     # DEBUG: print(f"DEBUG: domains()={len(domains)}")
344     if len(domains) > 0:
345         boot.acquire_lock()
346
347         print(f"INFO: Adding {len(domains)} new instances ...")
348         for domain in domains:
349             print(f"INFO: Fetching instances from domain='{domain}' ...")
350             fba.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
351
352     # DEBUG: print("DEBUG: EXIT!")
353
354 def fetch_fbabot_atom(args: argparse.Namespace):
355     # DEBUG: print(f"DEBUG: args[]={type(args)} - CALLED!")
356     feed = "https://ryona.agency/users/fba/feed.atom"
357
358     domains = list()
359     try:
360         print(f"INFO: Fetching ATOM feed='{feed}' from FBA bot account ...")
361         response = fba.get_url(feed, fba.headers, (config.get("connection_timeout"), config.get("read_timeout")))
362
363         # DEBUG: print(f"DEBUG: response.ok={response.ok},response.status_code='{response.status_code}',response.text()={len(response.text)}")
364         if response.ok and response.status_code < 300 and len(response.text) > 0:
365             # DEBUG: print(f"DEBUG: Parsing ATOM feed ...")
366             atom = atoma.parse_atom_bytes(response.content)
367
368             # DEBUG: print(f"DEBUG: atom[]={type(atom)}")
369             for entry in atom.entries:
370                 # DEBUG: print(f"DEBUG: entry[]={type(entry)}")
371                 doc = bs4.BeautifulSoup(entry.content.value, "html.parser")
372                 # DEBUG: print(f"DEBUG: doc[]={type(doc)}")
373                 for element in doc.findAll("a"):
374                     for href in element["href"].split(","):
375                         # DEBUG: print(f"DEBUG: href[{type(href)}]={href}")
376                         domain = fba.tidyup_domain(href)
377
378                         # DEBUG: print(f"DEBUG: domain='{domain}'")
379                         if blacklist.is_blacklisted(domain):
380                             # DEBUG: print(f"DEBUG: domain='{domain}' is blacklisted - SKIPPED!")
381                             continue
382                         elif domain in domains:
383                             # DEBUG: print(f"DEBUG: domain='{domain}' is already added - SKIPPED!")
384                             continue
385                         elif instances.is_registered(domain):
386                             # DEBUG: print(f"DEBUG: domain='{domain}' is already registered - SKIPPED!")
387                             continue
388
389                         # DEBUG: print(f"DEBUG: Adding domain='{domain}',domains()={len(domains)}")
390                         domains.append(domain)
391
392     except BaseException as e:
393         print(f"ERROR: Cannot fetch feed='{feed}',exception[{type(e)}]:'{str(e)}'")
394         sys.exit(255)
395
396     # DEBUG: print(f"DEBUG: domains({len(domains)})={domains}")
397     if len(domains) > 0:
398         boot.acquire_lock()
399
400         print(f"INFO: Adding {len(domains)} new instances ...")
401         for domain in domains:
402             print(f"INFO: Fetching instances from domain='{domain}' ...")
403             fba.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
404
405     # DEBUG: print("DEBUG: EXIT!")
406
407 def fetch_instances(args: argparse.Namespace):
408     # DEBUG: print(f"DEBUG: args[]={type(args)} - CALLED!")
409     boot.acquire_lock()
410
411     # Initial fetch
412     fba.fetch_instances(args.domain, None, None, inspect.currentframe().f_code.co_name)
413
414     if args.single:
415         # DEBUG: print(f"DEBUG: Not fetching more instances - EXIT!")
416         return
417
418     # Loop through some instances
419     fba.cursor.execute(
420         "SELECT domain, origin, software, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'gotosocial', 'bookwyrm', 'takahe', 'lemmy') AND (last_instance_fetch IS NULL OR last_instance_fetch < ?) ORDER BY rowid DESC", [time.time() - config.get("recheck_instance")]
421     )
422
423     rows = fba.cursor.fetchall()
424     print(f"INFO: Checking {len(rows)} entries ...")
425     for row in rows:
426         # DEBUG: print("DEBUG: domain:", row[0])
427         if blacklist.is_blacklisted(row[0]):
428             print("WARNING: domain is blacklisted:", row[0])
429             continue
430
431         print(f"INFO: Fetching instances for instance '{row[0]}' ('{row[2]}') of origin='{row[1]}',nodeinfo_url='{row[3]}'")
432         fba.fetch_instances(row[0], row[1], row[2], inspect.currentframe().f_code.co_name, row[3])
433
434     # DEBUG: print("DEBUG: EXIT!")