]> git.mxchange.org Git - fba.git/blob - fba/commands.py
Continued:
[fba.git] / fba / commands.py
1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
3 #
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
8 #
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 # GNU Affero General Public License for more details.
13 #
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program.  If not, see <https://www.gnu.org/licenses/>.
16
17 import csv
18 import inspect
19 import json
20 import time
21
22 import argparse
23 import atoma
24 import bs4
25 import markdown
26 import reqto
27 import validators
28
29 from fba import blacklist
30 from fba import config
31 from fba import federation
32 from fba import fba
33 from fba import network
34
35 from fba.helpers import locking
36 from fba.helpers import tidyup
37
38 from fba.models import blocks
39 from fba.models import instances
40
41 from fba.networks import friendica
42 from fba.networks import mastodon
43 from fba.networks import misskey
44 from fba.networks import pleroma
45
46 def check_instance(args: argparse.Namespace) -> int:
47     # DEBUG: print(f"DEBUG: args.domain='{args.domain}' - CALLED!")
48     status = 0
49     if not validators.domain(args.domain):
50         print(f"WARNING: args.domain='{args.domain}' is not valid")
51         status = 100
52     elif blacklist.is_blacklisted(args.domain):
53         print(f"WARNING: args.domain='{args.domain}' is blacklisted")
54         status = 101
55     elif instances.is_registered(args.domain):
56         print(f"WARNING: args.domain='{args.domain}' is already registered")
57         status = 102
58     else:
59         print(f"INFO: args.domain='{args.domain}' is not known")
60
61     # DEBUG: print(f"DEBUG: status={status} - EXIT!")
62     return status
63
64 def fetch_bkali(args: argparse.Namespace) -> int:
65     # DEBUG: print(f"DEBUG: args[]='{type(args)}' - CALLED!")
66     domains = list()
67     try:
68         fetched = network.post_json_api("gql.api.bka.li", "/v1/graphql", json.dumps({
69             "query": "query domainlist {nodeinfo(order_by: {domain: asc}) {domain}}"
70         }))
71
72         # DEBUG: print(f"DEBUG: fetched[]='{type(fetched)}'")
73         if "error_message" in fetched:
74             print(f"WARNING: post_json_api() for 'gql.api.bka.li' returned error message: {fetched['error_message']}")
75             return 100
76         elif isinstance(fetched["json"], dict) and "error" in fetched["json"] and "message" in fetched["json"]["error"]:
77             print(f"WARNING: post_json_api() returned error: {fetched['error']['message']}")
78             return 101
79
80         rows = fetched["json"]
81
82         # DEBUG: print(f"DEBUG: rows({len(rows)})[]='{type(rows)}'")
83         if len(rows) == 0:
84             raise Exception("WARNING: Returned no records")
85         elif "data" not in rows:
86             raise Exception(f"WARNING: rows()={len(rows)} does not contain key 'data'")
87         elif "nodeinfo" not in rows["data"]:
88             raise Exception(f"WARNING: rows()={len(rows['data'])} does not contain key 'nodeinfo'")
89
90         for entry in rows["data"]["nodeinfo"]:
91             # DEBUG: print(f"DEBUG: entry['{type(entry)}']='{entry}'")
92             if "domain" not in entry:
93                 print(f"WARNING: entry()={len(entry)} does not contain 'domain' - SKIPPED!")
94                 continue
95             elif not validators.domain(entry["domain"]):
96                 print(f"WARNING: domain='{entry['domain']}' is not a valid domain - SKIPPED!")
97                 continue
98             elif blacklist.is_blacklisted(entry["domain"]):
99                 # DEBUG: print(f"DEBUG: domain='{entry['domain']}' is blacklisted - SKIPPED!")
100                 continue
101             elif instances.is_registered(entry["domain"]):
102                 # DEBUG: print(f"DEBUG: domain='{entry['domain']}' is already registered - SKIPPED!")
103                 continue
104             elif instances.is_recent(entry["domain"]):
105                 # DEBUG: print(f"DEBUG: domain='{entry['domain']}' has been recently fetched - SKIPPED!")
106                 continue
107
108             # DEBUG: print(f"DEBUG: Adding domain='{entry['domain']}' ...")
109             domains.append(entry["domain"])
110
111     except network.exceptions as exception:
112         print(f"ERROR: Cannot fetch graphql,exception[{type(exception)}]:'{str(exception)}' - EXIT!")
113         return 102
114
115     # DEBUG: print(f"DEBUG: domains()={len(domains)}")
116     if len(domains) > 0:
117         locking.acquire()
118
119         print(f"INFO: Adding {len(domains)} new instances ...")
120         for domain in domains:
121             try:
122                 print(f"INFO: Fetching instances from domain='{domain}' ...")
123                 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
124
125                 # DEBUG: print(f"DEBUG: Invoking cookies.clear({domain}) ...")
126                 cookies.clear(domain)
127             except network.exceptions as exception:
128                 print(f"WARNING: Exception '{type(exception)}' during fetching instances (fetch_bkali) from domain='{domain}'")
129                 instances.set_last_error(domain, exception)
130
131     # DEBUG: print("DEBUG: Success - EXIT!")
132     return 0
133
134 def fetch_blocks(args: argparse.Namespace):
135     # DEBUG: print(f"DEBUG: args[]='{type(args)}' - CALLED!")
136     if args.domain is not None and args.domain != "":
137         # DEBUG: print(f"DEBUG: args.domain='{args.domain}' - checking ...")
138         if not validators.domain(args.domain):
139             print(f"WARNING: domain='{args.domain}' is not valid.")
140             return
141         elif blacklist.is_blacklisted(args.domain):
142             print(f"WARNING: domain='{args.domain}' is blacklisted, won't check it!")
143             return
144         elif not instances.is_registered(args.domain):
145             print(f"WARNING: domain='{args.domain}' is not registered, please run ./fba.py fetch_instances {args.domain} first.")
146             return
147
148     locking.acquire()
149
150     if args.domain is not None and args.domain != "":
151         # Re-check single domain
152         # DEBUG: print(f"DEBUG: Querying database for single args.domain='{args.domain}' ...")
153         fba.cursor.execute(
154             "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE domain = ?", [args.domain]
155         )
156     else:
157         # Re-check after "timeout" (aka. minimum interval)
158         fba.cursor.execute(
159             "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'lemmy', 'friendica', 'misskey', 'peertube') AND (last_blocked IS NULL OR last_blocked < ?) ORDER BY rowid DESC", [time.time() - config.get("recheck_block")]
160         )
161
162     rows = fba.cursor.fetchall()
163     print(f"INFO: Checking {len(rows)} entries ...")
164     for blocker, software, origin, nodeinfo_url in rows:
165         # DEBUG: print("DEBUG: BEFORE blocker,software,origin,nodeinfo_url:", blocker, software, origin, nodeinfo_url)
166         blockdict = list()
167         blocker = tidyup.domain(blocker)
168         # DEBUG: print("DEBUG: AFTER blocker,software:", blocker, software)
169
170         if blocker == "":
171             print("WARNING: blocker is now empty!")
172             continue
173         elif nodeinfo_url is None or nodeinfo_url == "":
174             print(f"DEBUG: blocker='{blocker}',software='{software}' has empty nodeinfo_url")
175             continue
176         elif blacklist.is_blacklisted(blocker):
177             print(f"WARNING: blocker='{blocker}' is blacklisted now!")
178             continue
179
180         # DEBUG: print(f"DEBUG: blocker='{blocker}'")
181         instances.set_last_blocked(blocker)
182
183         if software == "pleroma":
184             print(f"INFO: blocker='{blocker}',software='{software}'")
185             pleroma.fetch_blocks(blocker, origin, nodeinfo_url)
186         elif software == "mastodon":
187             print(f"INFO: blocker='{blocker}',software='{software}'")
188             mastodon.fetch_blocks(blocker, origin, nodeinfo_url)
189         elif software == "lemmy":
190             print(f"INFO: blocker='{blocker}',software='{software}'")
191             #lemmy.fetch_blocks(blocker, origin, nodeinfo_url)
192         elif software == "friendica" or software == "misskey":
193             print(f"INFO: blocker='{blocker}',software='{software}'")
194
195             blocking = list()
196             if software == "friendica":
197                 blocking = friendica.fetch_blocks(blocker)
198             elif software == "misskey":
199                 blocking = misskey.fetch_blocks(blocker)
200
201             print(f"INFO: Checking {len(blocking.items())} entries from blocker='{blocker}',software='{software}' ...")
202             for block_level, blocklist in blocking.items():
203                 # DEBUG: print("DEBUG: blocker,block_level,blocklist():", blocker, block_level, len(blocklist))
204                 block_level = tidyup.domain(block_level)
205                 # DEBUG: print("DEBUG: AFTER-block_level:", block_level)
206                 if block_level == "":
207                     print("WARNING: block_level is empty, blocker:", blocker)
208                     continue
209
210                 # DEBUG: print(f"DEBUG: Checking {len(blocklist)} entries from blocker='{blocker}',software='{software}',block_level='{block_level}' ...")
211                 for block in blocklist:
212                     blocked, reason = block.values()
213                     # DEBUG: print(f"DEBUG: blocked='{blocked}',reason='{reason}' - BEFORE!")
214                     blocked = tidyup.domain(blocked)
215                     reason  = tidyup.reason(reason) if reason is not None and reason != "" else None
216                     # DEBUG: print(f"DEBUG: blocked='{blocked}',reason='{reason}' - AFTER!")
217
218                     if blocked == "":
219                         print("WARNING: blocked is empty:", blocker)
220                         continue
221                     elif blacklist.is_blacklisted(blocked):
222                         # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - skipping!")
223                         continue
224                     elif blocked.count("*") > 0:
225                         # Some friendica servers also obscure domains without hash
226                         row = instances.deobscure("*", blocked)
227
228                         # DEBUG: print(f"DEBUG: row[]='{type(row)}'")
229                         if row is None:
230                             print(f"WARNING: Cannot deobsfucate blocked='{blocked}',blocker='{blocker}',software='{software}' - SKIPPED!")
231                             continue
232
233                         blocked      = row[0]
234                         origin       = row[1]
235                         nodeinfo_url = row[2]
236                     elif blocked.count("?") > 0:
237                         # Some obscure them with question marks, not sure if that's dependent on version or not
238                         row = instances.deobscure("?", blocked)
239
240                         # DEBUG: print(f"DEBUG: row[]='{type(row)}'")
241                         if row is None:
242                             print(f"WARNING: Cannot deobsfucate blocked='{blocked}',blocker='{blocker}',software='{software}' - SKIPPED!")
243                             continue
244
245                         blocked      = row[0]
246                         origin       = row[1]
247                         nodeinfo_url = row[2]
248
249                     # DEBUG: print("DEBUG: Looking up instance by domain:", blocked)
250                     if not validators.domain(blocked):
251                         print(f"WARNING: blocked='{blocked}',software='{software}' is not a valid domain name - SKIPPED!")
252                         continue
253                     elif blocked.endswith(".arpa"):
254                         # DEBUG: print(f"DEBUG: blocked='{blocked}' is ending with '.arpa' - SKIPPED!")
255                         continue
256                     elif not instances.is_registered(blocked):
257                         # DEBUG: print("DEBUG: Hash wasn't found, adding:", blocked, blocker)
258                         try:
259                             instances.add(blocked, blocker, inspect.currentframe().f_code.co_name, nodeinfo_url)
260                         except network.exceptions as exception:
261                             print(f"Exception during adding blocked='{blocked}',blocker='{blocker}': '{type(exception)}'")
262                             continue
263
264                     if not blocks.is_instance_blocked(blocker, blocked, block_level):
265                         blocks.add_instance(blocker, blocked, reason, block_level)
266
267                         if block_level == "reject":
268                             blockdict.append({
269                                 "blocked": blocked,
270                                 "reason" : reason
271                             })
272                     else:
273                         # DEBUG: print(f"DEBUG: Updating block last seen and reason for blocker='{blocker}',blocked='{blocked}' ...")
274                         blocks.update_last_seen(blocker, blocked, block_level)
275                         blocks.update_reason(reason, blocker, blocked, block_level)
276
277                     # DEBUG: print(f"DEBUG: Invoking cookies.clear({blocked}) ...")
278                     cookies.clear(blocked)
279
280             # DEBUG: print("DEBUG: Committing changes ...")
281             fba.connection.commit()
282         else:
283             print("WARNING: Unknown software:", blocker, software)
284
285         if instances.has_pending(blocker):
286             # DEBUG: print(f"DEBUG: Invoking instances.update_data({blocker}) ...")
287             instances.update_data(blocker)
288
289         if config.get("bot_enabled") and len(blockdict) > 0:
290             network.send_bot_post(blocker, blockdict)
291
292         # DEBUG: print(f"DEBUG: Invoking cookies.clear({blocker}) ...")
293         cookies.clear(blocker)
294
295     # DEBUG: print("DEBUG: EXIT!")
296
297 def fetch_observer(args: argparse.Namespace):
298     # DEBUG: print(f"DEBUG: args[]='{type(args)}' - CALLED!")
299     types = [
300         "akoma",
301         "birdsitelive",
302         "bookwyrm",
303         "calckey",
304         "diaspora",
305         "foundkey",
306         "friendica",
307         "funkwhale",
308         "gancio",
309         "gnusocial",
310         "gotosocial",
311         "hometown",
312         "hubzilla",
313         "kbin",
314         "ktistec",
315         "lemmy",
316         "mastodon",
317         "microblogpub",
318         "misskey",
319         "mitra",
320         "mobilizon",
321         "owncast",
322         "peertube",
323         "pixelfed",
324         "pleroma",
325         "plume",
326         "snac",
327         "takahe",
328         "wildebeest",
329         "writefreely"
330     ]
331
332     locking.acquire()
333
334     print(f"INFO: Fetching {len(types)} different table data ...")
335     for software in types:
336         doc = None
337
338         try:
339             # DEBUG: print(f"DEBUG: Fetching table data for software='{software}' ...")
340             raw = fba.fetch_url(f"https://fediverse.observer/app/views/tabledata.php?software={software}", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))).text
341             # DEBUG: print(f"DEBUG: raw[{type(raw)}]()={len(raw)}")
342
343             doc = bs4.BeautifulSoup(raw, features='html.parser')
344             # DEBUG: print(f"DEBUG: doc[]='{type(doc)}'")
345         except network.exceptions as exception:
346             print(f"WARNING: Cannot fetch software='{software}' from fediverse.observer: '{type(exception)}'")
347             continue
348
349         items = doc.findAll("a", {"class": "url"})
350         print(f"INFO: Checking {len(items)} items,software='{software}' ...")
351         for item in items:
352             # DEBUG: print(f"DEBUG: item[]='{type(item)}'")
353             domain = item.decode_contents()
354
355             # DEBUG: print(f"DEBUG: domain='{domain}'")
356             if not validators.domain(domain.split("/")[0]):
357                 print(f"WARNING: domain='{domain}' is not a valid domain - SKIPPED!")
358                 continue
359             elif blacklist.is_blacklisted(domain):
360                 # DEBUG: print(f"DEBUG: domain='{domain}' is blacklisted - SKIPPED!")
361                 continue
362             elif instances.is_registered(domain):
363                 # DEBUG: print(f"DEBUG: domain='{domain}' is already registered - SKIPPED!")
364                 continue
365
366             print(f"INFO: Fetching instances for domain='{domain}',software='{software}'")
367             federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
368
369             # DEBUG: print(f"DEBUG: Invoking cookies.clear({domain}) ...")
370             cookies.clear(domain)
371
372     # DEBUG: print("DEBUG: EXIT!")
373
374 def fetch_cs(args: argparse.Namespace):
375     # DEBUG: print(f"DEBUG: args[]='{type(args)}' - CALLED!")
376     extensions = [
377         "extra",
378         "abbr",
379         "attr_list",
380         "def_list",
381         "fenced_code",
382         "footnotes",
383         "md_in_html",
384         "admonition",
385         "codehilite",
386         "legacy_attrs",
387         "legacy_em",
388         "meta",
389         "nl2br",
390         "sane_lists",
391         "smarty",
392         "toc",
393         "wikilinks"
394     ]
395
396     domains = {
397         "silenced": list(),
398         "reject"  : list(),
399     }
400
401     raw = fba.fetch_url("https://raw.githubusercontent.com/chaossocial/meta/master/federation.md", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))).text
402     # DEBUG: print(f"DEBUG: raw()={len(raw)}[]='{type(raw)}'")
403
404     doc = bs4.BeautifulSoup(markdown.markdown(raw, extensions=extensions), features='html.parser')
405
406     # DEBUG: print(f"DEBUG: doc()={len(doc)}[]='{type(doc)}'")
407     silenced = doc.find("h2", {"id": "silenced-instances"}).findNext("table").find("tbody")
408     # DEBUG: print(f"DEBUG: silenced[]='{type(silenced)}'")
409     domains["silenced"] = domains["silenced"] + federation.find_domains(silenced)
410
411     blocked = doc.find("h2", {"id": "blocked-instances"}).findNext("table").find("tbody")
412     # DEBUG: print(f"DEBUG: blocked[]='{type(blocked)}'")
413     domains["reject"] = domains["reject"] + federation.find_domains(blocked)
414
415     # DEBUG: print(f"DEBUG: domains()={len(domains)}")
416     if len(domains) > 0:
417         locking.acquire()
418
419         print(f"INFO: Adding {len(domains)} new instances ...")
420         for block_level in domains:
421             # DEBUG: print(f"DEBUG: block_level='{block_level}'")
422
423             for row in domains[block_level]:
424                 # DEBUG: print(f"DEBUG: row='{row}'")
425                 if not blocks.is_instance_blocked('chaos.social', row["domain"], block_level):
426                     # DEBUG: print(f"DEBUG: domain='{row['domain']}',block_level='{block_level}' blocked by chaos.social, adding ...")
427                     blocks.add_instance('chaos.social', row["domain"], row["reason"], block_level)
428
429                 if not instances.is_registered(row["domain"]):
430                     try:
431                         print(f"INFO: Fetching instances from domain='{row['domain']}' ...")
432                         federation.fetch_instances(row["domain"], 'chaos.social', None, inspect.currentframe().f_code.co_name)
433
434                         # DEBUG: print(f"DEBUG: Invoking cookies.clear({row['domain']}) ...")
435                         cookies.clear(row["domain"])
436                     except network.exceptions as exception:
437                         print(f"WARNING: Exception '{type(exception)}' during fetching instances (fetch_cs) from domain='{row['domain']}'")
438                         instances.set_last_error(row["domain"], exception)
439
440         # DEBUG: print("DEBUG: Committing changes ...")
441         fba.connection.commit()
442
443     # DEBUG: print("DEBUG: EXIT!")
444
445 def fetch_fba_rss(args: argparse.Namespace):
446     # DEBUG: print(f"DEBUG: args[]='{type(args)}' - CALLED!")
447     domains = list()
448
449     print(f"INFO: Fetch FBA-specific RSS args.feed='{args.feed}' ...")
450     response = fba.fetch_url(args.feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
451
452     # DEBUG: print(f"DEBUG: response.ok={response.ok},response.status_code='{response.status_code}',response.text()={len(response.text)}")
453     if response.ok and response.status_code < 300 and len(response.text) > 0:
454         # DEBUG: print(f"DEBUG: Parsing RSS feed ({len(response.text)} Bytes) ...")
455         rss = atoma.parse_rss_bytes(response.content)
456
457         # DEBUG: print(f"DEBUG: rss[]='{type(rss)}'")
458         for item in rss.items:
459             # DEBUG: print(f"DEBUG: item={item}")
460             domain = item.link.split("=")[1]
461
462             if blacklist.is_blacklisted(domain):
463                 # DEBUG: print(f"DEBUG: domain='{domain}' is blacklisted - SKIPPED!")
464                 continue
465             elif domain in domains:
466                 # DEBUG: print(f"DEBUG: domain='{domain}' is already added - SKIPPED!")
467                 continue
468             elif instances.is_registered(domain):
469                 # DEBUG: print(f"DEBUG: domain='{domain}' is already registered - SKIPPED!")
470                 continue
471
472             # DEBUG: print(f"DEBUG: Adding domain='{domain}'")
473             domains.append(domain)
474
475     # DEBUG: print(f"DEBUG: domains()={len(domains)}")
476     if len(domains) > 0:
477         locking.acquire()
478
479         print(f"INFO: Adding {len(domains)} new instances ...")
480         for domain in domains:
481             try:
482                 print(f"INFO: Fetching instances from domain='{domain}' ...")
483                 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
484
485                 # DEBUG: print(f"DEBUG: Invoking cookies.clear({domain}) ...")
486                 cookies.clear(domain)
487             except network.exceptions as exception:
488                 print(f"WARNING: Exception '{type(exception)}' during fetching instances (fetch_fba_rss) from domain='{domain}'")
489                 instances.set_last_error(domain, exception)
490
491     # DEBUG: print("DEBUG: EXIT!")
492
493 def fetch_fbabot_atom(args: argparse.Namespace):
494     # DEBUG: print(f"DEBUG: args[]='{type(args)}' - CALLED!")
495     feed = "https://ryona.agency/users/fba/feed.atom"
496
497     domains = list()
498
499     print(f"INFO: Fetching ATOM feed='{feed}' from FBA bot account ...")
500     response = fba.fetch_url(feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
501
502     # DEBUG: print(f"DEBUG: response.ok={response.ok},response.status_code='{response.status_code}',response.text()={len(response.text)}")
503     if response.ok and response.status_code < 300 and len(response.text) > 0:
504         # DEBUG: print(f"DEBUG: Parsing ATOM feed ({len(response.text)} Bytes) ...")
505         atom = atoma.parse_atom_bytes(response.content)
506
507         # DEBUG: print(f"DEBUG: atom[]='{type(atom)}'")
508         for entry in atom.entries:
509             # DEBUG: print(f"DEBUG: entry[]='{type(entry)}'")
510             doc = bs4.BeautifulSoup(entry.content.value, "html.parser")
511             # DEBUG: print(f"DEBUG: doc[]='{type(doc)}'")
512             for element in doc.findAll("a"):
513                 for href in element["href"].split(","):
514                     # DEBUG: print(f"DEBUG: href[{type(href)}]={href}")
515                     domain = tidyup.domain(href)
516
517                     # DEBUG: print(f"DEBUG: domain='{domain}'")
518                     if blacklist.is_blacklisted(domain):
519                         # DEBUG: print(f"DEBUG: domain='{domain}' is blacklisted - SKIPPED!")
520                         continue
521                     elif domain in domains:
522                         # DEBUG: print(f"DEBUG: domain='{domain}' is already added - SKIPPED!")
523                         continue
524                     elif instances.is_registered(domain):
525                         # DEBUG: print(f"DEBUG: domain='{domain}' is already registered - SKIPPED!")
526                         continue
527
528                     # DEBUG: print(f"DEBUG: Adding domain='{domain}',domains()={len(domains)}")
529                     domains.append(domain)
530
531     # DEBUG: print(f"DEBUG: domains({len(domains)})={domains}")
532     if len(domains) > 0:
533         locking.acquire()
534
535         print(f"INFO: Adding {len(domains)} new instances ...")
536         for domain in domains:
537             try:
538                 print(f"INFO: Fetching instances from domain='{domain}' ...")
539                 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
540
541                 # DEBUG: print(f"DEBUG: Invoking cookies.clear({domain}) ...")
542                 cookies.clear(domain)
543             except network.exceptions as exception:
544                 print(f"WARNING: Exception '{type(exception)}' during fetching instances (fetch_fbabot_atom) from domain='{domain}'")
545                 instances.set_last_error(domain, exception)
546
547     # DEBUG: print("DEBUG: EXIT!")
548
549 def fetch_instances(args: argparse.Namespace) -> int:
550     # DEBUG: print(f"DEBUG: args[]='{type(args)}' - CALLED!")
551     locking.acquire()
552
553     # Initial fetch
554     try:
555         print(f"INFO: Fetching instances from args.domain='{args.domain}' ...")
556         federation.fetch_instances(args.domain, None, None, inspect.currentframe().f_code.co_name)
557
558         # DEBUG: print(f"DEBUG: Invoking cookies.clear({args.domain}) ...")
559         cookies.clear(args.domain)
560     except network.exceptions as exception:
561         print(f"WARNING: Exception '{type(exception)}' during fetching instances (fetch_instances) from args.domain='{args.domain}'")
562         instances.set_last_error(args.domain, exception)
563
564         return 100
565
566     if args.single:
567         # DEBUG: print("DEBUG: Not fetching more instances - EXIT!")
568         return 0
569
570     # Loop through some instances
571     fba.cursor.execute(
572         "SELECT domain, origin, software, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'lemmy', 'peertube') AND (last_instance_fetch IS NULL OR last_instance_fetch < ?) ORDER BY rowid DESC", [time.time() - config.get("recheck_instance")]
573     )
574
575     rows = fba.cursor.fetchall()
576     print(f"INFO: Checking {len(rows)} entries ...")
577     for row in rows:
578         # DEBUG: print(f"DEBUG: domain='{row[0]}'")
579         if blacklist.is_blacklisted(row[0]):
580             print("WARNING: domain is blacklisted:", row[0])
581             continue
582
583         try:
584             print(f"INFO: Fetching instances for instance '{row[0]}' ('{row[2]}') of origin='{row[1]}',nodeinfo_url='{row[3]}'")
585             federation.fetch_instances(row[0], row[1], row[2], inspect.currentframe().f_code.co_name, row[3])
586
587             # DEBUG: print(f"DEBUG: Invoking cookies.clear({row[0]}) ...")
588             cookies.clear(row[0])
589         except network.exceptions as exception:
590             print(f"WARNING: Exception '{type(exception)}' during fetching instances (fetch_instances) from domain='{row[0]}'")
591             instances.set_last_error(row[0], exception)
592
593     # DEBUG: print("DEBUG: Success - EXIT!")
594     return 0
595
596 def fetch_oliphant(args: argparse.Namespace):
597     # DEBUG: print(f"DEBUG: args[]='{type(args)}' - CALLED!")
598     locking.acquire()
599
600     # Base URL
601     base_url = "https://codeberg.org/oliphant/blocklists/raw/branch/main/blocklists"
602
603     # URLs to fetch
604     blocklists = (
605         {
606             "blocker": "artisan.chat",
607             "csv_url": "mastodon/artisan.chat.csv",
608         },{
609             "blocker": "mastodon.art",
610             "csv_url": "mastodon/mastodon.art.csv",
611         },{
612             "blocker": "pleroma.envs.net",
613             "csv_url": "mastodon/pleroma.envs.net.csv",
614         },{
615             "blocker": "oliphant.social",
616             "csv_url": "mastodon/_unified_tier3_blocklist.csv",
617         },{
618             "blocker": "mastodon.online",
619             "csv_url": "mastodon/mastodon.online.csv",
620         },{
621             "blocker": "mastodon.social",
622             "csv_url": "mastodon/mastodon.social.csv",
623         },{
624             "blocker": "mastodon.social",
625             "csv_url": "other/missing-tier0-mastodon.social.csv",
626         },{
627             "blocker": "rage.love",
628             "csv_url": "mastodon/rage.love.csv",
629         },{
630             "blocker": "sunny.garden",
631             "csv_url": "mastodon/sunny.garden.csv",
632         },{
633             "blocker": "solarpunk.moe",
634             "csv_url": "mastodon/solarpunk.moe.csv",
635         },{
636             "blocker": "toot.wales",
637             "csv_url": "mastodon/toot.wales.csv",
638         },{
639             "blocker": "union.place",
640             "csv_url": "mastodon/union.place.csv",
641         }
642     )
643
644     domains = list()
645     for block in blocklists:
646         # Is domain given and not equal blocker?
647         if isinstance(args.domain, str) and args.domain != block["blocker"]:
648             # DEBUG: print(f"DEBUG: Skipping blocker='{block['blocker']}', not matching args.domain='{args.domain}'")
649             continue
650         elif args.domain in domains:
651             # DEBUG: print(f"DEBUG: args.domain='{args.domain}' already handled - SKIPPED!")
652             continue
653
654         # Fetch this URL
655         print(f"INFO: Fetching csv_url='{block['csv_url']}' for blocker='{block['blocker']}' ...")
656         response = fba.fetch_url(f"{base_url}/{block['csv_url']}", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
657
658         # DEBUG: print(f"DEBUG: response[]='{type(response)}'")
659         if response.ok and response.content != "":
660             # DEBUG: print(f"DEBUG: Fetched {len(response.content)} Bytes, parsing CSV ...")
661             reader = csv.DictReader(response.content.decode('utf-8').splitlines(), dialect="unix")
662
663             # DEBUG: print(f"DEBUG: reader[]='{type(reader)}'")
664             for row in reader:
665                 domain = None
666                 if "#domain" in row:
667                     domain = row["#domain"]
668                 elif "domain" in row:
669                     domain = row["domain"]
670                 else:
671                     # DEBUG: print(f"DEBUG: row='{row}' does not contain domain column")
672                     continue
673
674                 # DEBUG: print(f"DEBUG: Marking domain='{domain}' as handled")
675                 domains.append(domain)
676
677                 # DEBUG: print(f"DEBUG: Processing domain='{domain}' ...")
678                 processed = fba.process_domain(domain, block["blocker"], inspect.currentframe().f_code.co_name)
679
680                 # DEBUG: print(f"DEBUG: processed='{processed}'")
681
682     # DEBUG: print("DEBUG: EXIT!")
683
684 def fetch_txt(args: argparse.Namespace):
685     # DEBUG: print(f"DEBUG: args[]='{type(args)}' - CALLED!")
686     locking.acquire()
687
688     # Static URLs
689     urls = (
690         "https://seirdy.one/pb/bsl.txt",
691     )
692
693     print(f"INFO: Checking {len(urls)} text file(s) ...")
694     for url in urls:
695         # DEBUG: print(f"DEBUG: Fetching url='{url}' ...")
696         response = fba.fetch_url(url, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
697
698         # DEBUG: print(f"DEBUG: response[]='{type(response)}'")
699         if response.ok and response.text != "":
700             # DEBUG: print(f"DEBUG: Returned {len(response.text.strip())} Bytes for processing")
701             domains = response.text.split("\n")
702
703             print(f"INFO: Processing {len(domains)} domains ...")
704             for domain in domains:
705                 if domain == "":
706                     continue
707
708                 # DEBUG: print(f"DEBUG: domain='{domain}'")
709                 processed = fba.process_domain(domain, 'seirdy.one', inspect.currentframe().f_code.co_name)
710
711                 # DEBUG: print(f"DEBUG: processed='{processed}'")
712                 if not processed:
713                     # DEBUG: print(f"DEBUG: domain='{domain}' was not generically processed - SKIPPED!")
714                     continue
715
716     # DEBUG: print("DEBUG: EXIT!")