]> git.mxchange.org Git - fba.git/blob - fba/commands.py
Fixed some issues found by pylint:
[fba.git] / fba / commands.py
1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
3 #
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
8 #
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 # GNU Affero General Public License for more details.
13 #
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program.  If not, see <https://www.gnu.org/licenses/>.
16
17 import argparse
18 import atoma
19 import bs4
20 import csv
21 import inspect
22 import itertools
23 import json
24 import re
25 import reqto
26 import sys
27 import time
28 import validators
29
30 from fba import blacklist
31 from fba import blocks
32 from fba import boot
33 from fba import config
34 from fba import fba
35 from fba import instances
36 from fba import network
37
38 from fba.federation import *
39
40 def check_instance(args: argparse.Namespace) -> int:
41     # DEBUG: print(f"DEBUG: args.domain='{args.domain}' - CALLED!")
42     status = 0
43     if not validators.domain(args.domain):
44         print(f"WARNING: args.domain='{args.domain}' is not valid")
45         status = 100
46     elif blacklist.is_blacklisted(args.domain):
47         print(f"WARNING: args.domain='{args.domain}' is blacklisted")
48         status = 101
49     elif instances.is_registered(args.domain):
50         print(f"WARNING: args.domain='{args.domain}' is already registered")
51         status = 102
52     else:
53         print(f"INFO: args.domain='{args.domain}' is not known")
54
55     # DEBUG: print(f"DEBUG: status={status} - EXIT!")
56     return status
57
58 def fetch_bkali(args: argparse.Namespace):
59     # DEBUG: print(f"DEBUG: args[]={type(args)} - CALLED!")
60     domains = list()
61     try:
62         fetched = network.post_json_api("gql.api.bka.li", "/v1/graphql", json.dumps({
63             "query": "query domainlist {nodeinfo(order_by: {domain: asc}) {domain}}"
64         }))
65
66         # DEBUG: print(f"DEBUG: fetched({len(fetched)})[]='{type(fetched)}'")
67         if len(fetched) == 0:
68             raise Exception("WARNING: Returned no records")
69         elif not "data" in fetched:
70             raise Exception(f"WARNING: fetched()={len(fetched)} does not contain key 'data'")
71         elif not "nodeinfo" in fetched["data"]:
72             raise Exception(f"WARNING: fetched()={len(fetched['data'])} does not contain key 'nodeinfo'")
73
74         for entry in fetched["data"]["nodeinfo"]:
75             # DEBUG: print(f"DEBUG: entry['{type(entry)}']='{entry}'")
76             if not "domain" in entry:
77                 print(f"WARNING: entry()={len(entry)} does not contain 'domain' - SKIPPED!")
78                 continue
79             elif not validators.domain(entry["domain"]):
80                 print(f"WARNING: domain='{entry['domain']}' is not a valid domain - SKIPPED!")
81                 continue
82             elif blacklist.is_blacklisted(entry["domain"]):
83                 # DEBUG: print(f"DEBUG: domain='{entry['domain']}' is blacklisted - SKIPPED!")
84                 continue
85             elif instances.is_registered(entry["domain"]):
86                 # DEBUG: print(f"DEBUG: domain='{entry['domain']}' is already registered - SKIPPED!")
87                 continue
88
89             # DEBUG: print(f"DEBUG: Adding domain='{entry['domain']}' ...")
90             domains.append(entry["domain"])
91
92     except BaseException as exception:
93         print(f"ERROR: Cannot fetch graphql,exception[{type(exception)}]:'{str(exception)}'")
94         sys.exit(255)
95
96     # DEBUG: print(f"DEBUG: domains()={len(domains)}")
97     if len(domains) > 0:
98         boot.acquire_lock()
99
100         print(f"INFO: Adding {len(domains)} new instances ...")
101         for domain in domains:
102             print(f"INFO: Fetching instances from domain='{domain}' ...")
103             fba.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
104
105     # DEBUG: print("DEBUG: EXIT!")
106
107 def fetch_blocks(args: argparse.Namespace):
108     # DEBUG: print(f"DEBUG: args[]={type(args)} - CALLED!")
109     if args.domain is not None and args.domain != "":
110         # DEBUG: print(f"DEBUG: args.domain='{args.domain}' - checking ...")
111         if not validators.domain(args.domain):
112             print(f"WARNING: domain='{args.domain}' is not valid.")
113             return
114         elif blacklist.is_blacklisted(args.domain):
115             print(f"WARNING: domain='{args.domain}' is blacklisted, won't check it!")
116             return
117         elif not instances.is_registered(args.domain):
118             print(f"WARNING: domain='{args.domain}' is not registered, please run ./fba.py fetch_instances {args.domain} first.")
119             return
120
121     boot.acquire_lock()
122
123     if args.domain is not None and args.domain != "":
124         # Re-check single domain
125         fba.cursor.execute(
126             "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'bookwyrm', 'takahe') AND domain = ?", [args.domain]
127         )
128     else:
129         # Re-check after "timeout" (aka. minimum interval)
130         fba.cursor.execute(
131             "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'bookwyrm', 'takahe') AND (last_blocked IS NULL OR last_blocked < ?) ORDER BY rowid DESC", [time.time() - config.get("recheck_block")]
132         )
133
134     rows = fba.cursor.fetchall()
135     print(f"INFO: Checking {len(rows)} entries ...")
136     for blocker, software, origin, nodeinfo_url in rows:
137         # DEBUG: print("DEBUG: BEFORE blocker,software,origin,nodeinfo_url:", blocker, software, origin, nodeinfo_url)
138         blockdict = list()
139         blocker = fba.tidyup_domain(blocker)
140         # DEBUG: print("DEBUG: AFTER blocker,software:", blocker, software)
141
142         if blocker == "":
143             print("WARNING: blocker is now empty!")
144             continue
145         elif blacklist.is_blacklisted(blocker):
146             print(f"WARNING: blocker='{blocker}' is blacklisted now!")
147             continue
148
149         # DEBUG: print(f"DEBUG: blocker='{blocker}'")
150         instances.update_last_blocked(blocker)
151
152         if software == "pleroma":
153             print(f"INFO: blocker='{blocker}',software='{software}'")
154             pleroma.fetch_blocks(blocker, origin, nodeinfo_url)
155         elif software == "mastodon":
156             print(f"INFO: blocker='{blocker}',software='{software}'")
157             mastodon.fetch_blocks(blocker, origin, nodeinfo_url)
158         elif software == "friendica" or software == "misskey":
159             print(f"INFO: blocker='{blocker}',software='{software}'")
160             try:
161                 if software == "friendica":
162                     json = friendica.fetch_blocks(blocker)
163                 elif software == "misskey":
164                     json = misskey.fetch_blocks(blocker)
165
166                 print(f"INFO: Checking {len(json.items())} entries from blocker='{blocker}',software='{software}' ...")
167                 for block_level, blocklist in json.items():
168                     # DEBUG: print("DEBUG: blocker,block_level,blocklist():", blocker, block_level, len(blocklist))
169                     block_level = fba.tidyup_domain(block_level)
170                     # DEBUG: print("DEBUG: AFTER-block_level:", block_level)
171                     if block_level == "":
172                         print("WARNING: block_level is empty, blocker:", blocker)
173                         continue
174
175                     # DEBUG: print(f"DEBUG: Checking {len(blocklist)} entries from blocker='{blocker}',software='{software}',block_level='{block_level}' ...")
176                     for block in blocklist:
177                         blocked, reason = block.values()
178                         # DEBUG: print(f"DEBUG: blocked='{blocked}',reason='{reason}' - BEFORE!")
179                         blocked = fba.tidyup_domain(blocked)
180                         reason  = fba.tidyup_reason(reason) if reason is not None and reason != "" else None
181                         # DEBUG: print(f"DEBUG: blocked='{blocked}',reason='{reason}' - AFTER!")
182
183                         if blocked == "":
184                             print("WARNING: blocked is empty:", blocker)
185                             continue
186                         elif blacklist.is_blacklisted(blocked):
187                             # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - skipping!")
188                             continue
189                         elif blocked.count("*") > 0:
190                             # Some friendica servers also obscure domains without hash
191                             fba.cursor.execute(
192                                 "SELECT domain, origin, nodeinfo_url FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", [blocked.replace("*", "_")]
193                             )
194
195                             searchres = fba.cursor.fetchone()
196
197                             if searchres is None:
198                                 print(f"WARNING: Cannot deobsfucate blocked='{blocked}' - SKIPPED!")
199                                 continue
200
201                             blocked = searchres[0]
202                             origin = searchres[1]
203                             nodeinfo_url = searchres[2]
204                         elif blocked.count("?") > 0:
205                             # Some obscure them with question marks, not sure if that's dependent on version or not
206                             fba.cursor.execute(
207                                 "SELECT domain, origin, nodeinfo_url FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", [blocked.replace("?", "_")]
208                             )
209
210                             searchres = fba.cursor.fetchone()
211
212                             if searchres is None:
213                                 print(f"WARNING: Cannot deobsfucate blocked='{blocked}' - SKIPPED!")
214                                 continue
215
216                             blocked = searchres[0]
217                             origin = searchres[1]
218                             nodeinfo_url = searchres[2]
219                         elif not validators.domain(blocked):
220                             print(f"WARNING: blocked='{blocked}',software='{software}' is not a valid domain name - skipped!")
221                             continue
222
223                         # DEBUG: print("DEBUG: Looking up instance by domain:", blocked)
224                         if not validators.domain(blocked):
225                             print(f"WARNING: blocked='{blocked}',software='{software}' is not a valid domain name - skipped!")
226                             continue
227                         elif not instances.is_registered(blocked):
228                             # DEBUG: print("DEBUG: Hash wasn't found, adding:", blocked, blocker)
229                             instances.add(blocked, blocker, inspect.currentframe().f_code.co_name, nodeinfo_url)
230
231                         if not blocks.is_instance_blocked(blocker, blocked, block_level):
232                             blocks.add_instance(blocker, blocked, reason, block_level)
233
234                             if block_level == "reject":
235                                 blockdict.append({
236                                     "blocked": blocked,
237                                     "reason" : reason
238                                 })
239                         else:
240                             # DEBUG: print(f"DEBUG: Updating block last seen and reason for blocker='{blocker}',blocked='{blocked}' ...")
241                             blocks.update_last_seen(blocker, blocked, block_level)
242                             blocks.update_reason(reason, blocker, blocked, block_level)
243
244                 # DEBUG: print("DEBUG: Committing changes ...")
245                 fba.connection.commit()
246             except Exception as exception:
247                 print(f"ERROR: blocker='{blocker}',software='{software}',exception[{type(exception)}]:'{str(exception)}'")
248         else:
249             print("WARNING: Unknown software:", blocker, software)
250
251         if config.get("bot_enabled") and len(blockdict) > 0:
252             network.send_bot_post(blocker, blockdict)
253
254         blockdict = []
255
256     # DEBUG: print("DEBUG: EXIT!")
257
258 def fetch_cs(args: argparse.Namespace):
259     # DEBUG: print(f"DEBUG: args[]={type(args)} - CALLED!")
260     domains = {
261         "silenced": list(),
262         "reject"  : list(),
263     }
264
265     try:
266         doc = bs4.BeautifulSoup(
267             network.fetch_response("meta.chaos.social", "/federation", fba.headers, (config.get("connection_timeout"), config.get("read_timeout"))).text,
268             "html.parser",
269         )
270         # DEBUG: print(f"DEBUG: doc()={len(doc)}[]={type(doc)}")
271         silenced = doc.find("h2", {"id": "silenced-instances"}).findNext("table")
272
273         # DEBUG: print(f"DEBUG: silenced[]={type(silenced)}")
274         domains["silenced"] = domains["silenced"] + fba.find_domains(silenced)
275         blocked = doc.find("h2", {"id": "blocked-instances"}).findNext("table")
276
277         # DEBUG: print(f"DEBUG: blocked[]={type(blocked)}")
278         domains["reject"] = domains["reject"] + fba.find_domains(blocked)
279
280     except BaseException as exception:
281         print(f"ERROR: Cannot fetch from meta.chaos.social,exception[{type(exception)}]:'{str(exception)}'")
282         sys.exit(255)
283
284     # DEBUG: print(f"DEBUG: domains()={len(domains)}")
285     if len(domains) > 0:
286         boot.acquire_lock()
287
288         print(f"INFO: Adding {len(domains)} new instances ...")
289         for block_level in domains:
290             # DEBUG: print(f"DEBUG: block_level='{block_level}'")
291
292             for row in domains[block_level]:
293                 # DEBUG: print(f"DEBUG: row='{row}'")
294                 if not blocks.is_instance_blocked('chaos.social', row["domain"], block_level):
295                     # DEBUG: print(f"DEBUG: domain='{row['domain']}',block_level='{block_level}' blocked by chaos.social, adding ...")
296                     blocks.add_instance('chaos.social', row["domain"], row["reason"], block_level)
297
298                 if not instances.is_registered(row["domain"]):
299                     print(f"INFO: Fetching instances from domain='{row['domain']}' ...")
300                     fba.fetch_instances(row["domain"], 'chaos.social', None, inspect.currentframe().f_code.co_name)
301
302         # DEBUG: print("DEBUG: Committing changes ...")
303         fba.connection.commit()
304
305     # DEBUG: print("DEBUG: EXIT!")
306
307 def fetch_fba_rss(args: argparse.Namespace):
308     # DEBUG: print(f"DEBUG: args[]={type(args)} - CALLED!")
309     domains = list()
310
311     try:
312         print(f"INFO: Fetch FBA-specific RSS args.feed='{args.feed}' ...")
313         response = fba.fetch_url(args.feed, fba.headers, (config.get("connection_timeout"), config.get("read_timeout")))
314
315         # DEBUG: print(f"DEBUG: response.ok={response.ok},response.status_code='{response.status_code}',response.text()={len(response.text)}")
316         if response.ok and response.status_code < 300 and len(response.text) > 0:
317             # DEBUG: print(f"DEBUG: Parsing RSS feed ...")
318             rss = atoma.parse_rss_bytes(response.content)
319
320             # DEBUG: print(f"DEBUG: rss[]={type(rss)}")
321             for item in rss.items:
322                 # DEBUG: print(f"DEBUG: item={item}")
323                 domain = item.link.split("=")[1]
324
325                 if blacklist.is_blacklisted(domain):
326                     # DEBUG: print(f"DEBUG: domain='{domain}' is blacklisted - SKIPPED!")
327                     continue
328                 elif domain in domains:
329                     # DEBUG: print(f"DEBUG: domain='{domain}' is already added - SKIPPED!")
330                     continue
331                 elif instances.is_registered(domain):
332                     # DEBUG: print(f"DEBUG: domain='{domain}' is already registered - SKIPPED!")
333                     continue
334
335                 # DEBUG: print(f"DEBUG: Adding domain='{domain}'")
336                 domains.append(domain)
337
338     except BaseException as exception:
339         print(f"ERROR: Cannot fetch feed='{feed}',exception[{type(exception)}]:'{str(exception)}'")
340         sys.exit(255)
341
342     # DEBUG: print(f"DEBUG: domains()={len(domains)}")
343     if len(domains) > 0:
344         boot.acquire_lock()
345
346         print(f"INFO: Adding {len(domains)} new instances ...")
347         for domain in domains:
348             print(f"INFO: Fetching instances from domain='{domain}' ...")
349             fba.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
350
351     # DEBUG: print("DEBUG: EXIT!")
352
353 def fetch_fbabot_atom(args: argparse.Namespace):
354     # DEBUG: print(f"DEBUG: args[]={type(args)} - CALLED!")
355     feed = "https://ryona.agency/users/fba/feed.atom"
356
357     domains = list()
358     try:
359         print(f"INFO: Fetching ATOM feed='{feed}' from FBA bot account ...")
360         response = fba.fetch_url(feed, fba.headers, (config.get("connection_timeout"), config.get("read_timeout")))
361
362         # DEBUG: print(f"DEBUG: response.ok={response.ok},response.status_code='{response.status_code}',response.text()={len(response.text)}")
363         if response.ok and response.status_code < 300 and len(response.text) > 0:
364             # DEBUG: print(f"DEBUG: Parsing ATOM feed ...")
365             atom = atoma.parse_atom_bytes(response.content)
366
367             # DEBUG: print(f"DEBUG: atom[]={type(atom)}")
368             for entry in atom.entries:
369                 # DEBUG: print(f"DEBUG: entry[]={type(entry)}")
370                 doc = bs4.BeautifulSoup(entry.content.value, "html.parser")
371                 # DEBUG: print(f"DEBUG: doc[]={type(doc)}")
372                 for element in doc.findAll("a"):
373                     for href in element["href"].split(","):
374                         # DEBUG: print(f"DEBUG: href[{type(href)}]={href}")
375                         domain = fba.tidyup_domain(href)
376
377                         # DEBUG: print(f"DEBUG: domain='{domain}'")
378                         if blacklist.is_blacklisted(domain):
379                             # DEBUG: print(f"DEBUG: domain='{domain}' is blacklisted - SKIPPED!")
380                             continue
381                         elif domain in domains:
382                             # DEBUG: print(f"DEBUG: domain='{domain}' is already added - SKIPPED!")
383                             continue
384                         elif instances.is_registered(domain):
385                             # DEBUG: print(f"DEBUG: domain='{domain}' is already registered - SKIPPED!")
386                             continue
387
388                         # DEBUG: print(f"DEBUG: Adding domain='{domain}',domains()={len(domains)}")
389                         domains.append(domain)
390
391     except BaseException as exception:
392         print(f"ERROR: Cannot fetch feed='{feed}',exception[{type(exception)}]:'{str(exception)}'")
393         sys.exit(255)
394
395     # DEBUG: print(f"DEBUG: domains({len(domains)})={domains}")
396     if len(domains) > 0:
397         boot.acquire_lock()
398
399         print(f"INFO: Adding {len(domains)} new instances ...")
400         for domain in domains:
401             print(f"INFO: Fetching instances from domain='{domain}' ...")
402             fba.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
403
404     # DEBUG: print("DEBUG: EXIT!")
405
406 def fetch_instances(args: argparse.Namespace):
407     # DEBUG: print(f"DEBUG: args[]={type(args)} - CALLED!")
408     boot.acquire_lock()
409
410     # Initial fetch
411     fba.fetch_instances(args.domain, None, None, inspect.currentframe().f_code.co_name)
412
413     if args.single:
414         # DEBUG: print(f"DEBUG: Not fetching more instances - EXIT!")
415         return
416
417     # Loop through some instances
418     fba.cursor.execute(
419         "SELECT domain, origin, software, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'bookwyrm', 'takahe', 'lemmy') AND (last_instance_fetch IS NULL OR last_instance_fetch < ?) ORDER BY rowid DESC", [time.time() - config.get("recheck_instance")]
420     )
421
422     rows = fba.cursor.fetchall()
423     print(f"INFO: Checking {len(rows)} entries ...")
424     for row in rows:
425         # DEBUG: print("DEBUG: domain:", row[0])
426         if blacklist.is_blacklisted(row[0]):
427             print("WARNING: domain is blacklisted:", row[0])
428             continue
429
430         print(f"INFO: Fetching instances for instance '{row[0]}' ('{row[2]}') of origin='{row[1]}',nodeinfo_url='{row[3]}'")
431         fba.fetch_instances(row[0], row[1], row[2], inspect.currentframe().f_code.co_name, row[3])
432
433     # DEBUG: print("DEBUG: EXIT!")
434
435 def fetch_federater(args: argparse.Namespace):
436     # DEBUG: print(f"DEBUG: args[]={type(args)} - CALLED!")
437     boot.acquire_lock()
438
439     # Fetch this URL
440     response = fba.fetch_url("https://github.com/federater/blocks_recommended/raw/main/federater.csv", fba.headers, (config.get("connection_timeout"), config.get("read_timeout")))
441     # DEBUG: print(f"DEBUG: response[]='{type(response)}'")
442     if response.ok and response.content != "":
443         # DEBUG: print(f"DEBUG: Fetched {len(response.content)} Bytes, parsing CSV ...")
444         #print(f"DEBUG: response.content={response.content}")
445         reader = csv.DictReader(response.content.decode('utf-8').splitlines(), dialect='unix')
446         #, fieldnames='domain,severity,reject_media,reject_reports,public_comment,obfuscate'
447         # DEBUG: print(f"DEBUG: reader[]={type(reader)}")
448         for row in reader:
449             if not validators.domain(row["#domain"]):
450                 print(f"WARNING: domain='{row['#domain']}' is not a valid domain - skipped!")
451                 continue
452             elif blacklist.is_blacklisted(row["#domain"]):
453                 print(f"WARNING: domain='{row['#domain']}' is blacklisted - skipped!")
454                 continue
455             elif instances.is_registered(row["#domain"]):
456                 # DEBUG: print(f"DEBUG: domain='{row['#domain']}' is already registered - skipped!")
457                 continue
458
459             print(f"INFO: Fetching instances for instane='{row['#domain']}' ...")
460             fba.fetch_instances(row["#domain"], None, None, inspect.currentframe().f_code.co_name)
461
462     # DEBUG: print("DEBUG: EXIT!")