]> git.mxchange.org Git - fba.git/blob - fba/commands.py
Continued:
[fba.git] / fba / commands.py
1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
3 #
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
8 #
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 # GNU Affero General Public License for more details.
13 #
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program.  If not, see <https://www.gnu.org/licenses/>.
16
17 import csv
18 import inspect
19 import json
20 import time
21
22 import argparse
23 import atoma
24 import bs4
25 import markdown
26 import reqto
27 import validators
28
29 from fba import fba
30
31 from fba.helpers import blacklist
32 from fba.helpers import config
33 from fba.helpers import cookies
34 from fba.helpers import locking
35 from fba.helpers import tidyup
36
37 from fba.http import federation
38 from fba.http import network
39
40 from fba.models import blocks
41 from fba.models import instances
42
43 from fba.networks import friendica
44 from fba.networks import lemmy
45 from fba.networks import mastodon
46 from fba.networks import misskey
47 from fba.networks import pleroma
48
49 def check_instance(args: argparse.Namespace) -> int:
50     # DEBUG: print(f"DEBUG: args.domain='{args.domain}' - CALLED!")
51     status = 0
52     if not validators.domain(args.domain):
53         print(f"WARNING: args.domain='{args.domain}' is not valid")
54         status = 100
55     elif blacklist.is_blacklisted(args.domain):
56         print(f"WARNING: args.domain='{args.domain}' is blacklisted")
57         status = 101
58     elif instances.is_registered(args.domain):
59         print(f"WARNING: args.domain='{args.domain}' is already registered")
60         status = 102
61     else:
62         print(f"INFO: args.domain='{args.domain}' is not known")
63
64     # DEBUG: print(f"DEBUG: status={status} - EXIT!")
65     return status
66
67 def fetch_bkali(args: argparse.Namespace) -> int:
68     # DEBUG: print(f"DEBUG: args[]='{type(args)}' - CALLED!")
69     domains = list()
70     try:
71         fetched = network.post_json_api("gql.api.bka.li", "/v1/graphql", json.dumps({
72             "query": "query domainlist {nodeinfo(order_by: {domain: asc}) {domain}}"
73         }))
74
75         # DEBUG: print(f"DEBUG: fetched[]='{type(fetched)}'")
76         if "error_message" in fetched:
77             print(f"WARNING: post_json_api() for 'gql.api.bka.li' returned error message: {fetched['error_message']}")
78             return 100
79         elif isinstance(fetched["json"], dict) and "error" in fetched["json"] and "message" in fetched["json"]["error"]:
80             print(f"WARNING: post_json_api() returned error: {fetched['error']['message']}")
81             return 101
82
83         rows = fetched["json"]
84
85         # DEBUG: print(f"DEBUG: rows({len(rows)})[]='{type(rows)}'")
86         if len(rows) == 0:
87             raise Exception("WARNING: Returned no records")
88         elif "data" not in rows:
89             raise Exception(f"WARNING: rows()={len(rows)} does not contain key 'data'")
90         elif "nodeinfo" not in rows["data"]:
91             raise Exception(f"WARNING: rows()={len(rows['data'])} does not contain key 'nodeinfo'")
92
93         for entry in rows["data"]["nodeinfo"]:
94             # DEBUG: print(f"DEBUG: entry['{type(entry)}']='{entry}'")
95             if "domain" not in entry:
96                 print(f"WARNING: entry()={len(entry)} does not contain 'domain' - SKIPPED!")
97                 continue
98             elif not validators.domain(entry["domain"]):
99                 print(f"WARNING: domain='{entry['domain']}' is not a valid domain - SKIPPED!")
100                 continue
101             elif blacklist.is_blacklisted(entry["domain"]):
102                 # DEBUG: print(f"DEBUG: domain='{entry['domain']}' is blacklisted - SKIPPED!")
103                 continue
104             elif instances.is_registered(entry["domain"]):
105                 # DEBUG: print(f"DEBUG: domain='{entry['domain']}' is already registered - SKIPPED!")
106                 continue
107             elif instances.is_recent(entry["domain"]):
108                 # DEBUG: print(f"DEBUG: domain='{entry['domain']}' has been recently fetched - SKIPPED!")
109                 continue
110
111             # DEBUG: print(f"DEBUG: Adding domain='{entry['domain']}' ...")
112             domains.append(entry["domain"])
113
114     except network.exceptions as exception:
115         print(f"ERROR: Cannot fetch graphql,exception[{type(exception)}]:'{str(exception)}' - EXIT!")
116         return 102
117
118     # DEBUG: print(f"DEBUG: domains()={len(domains)}")
119     if len(domains) > 0:
120         locking.acquire()
121
122         print(f"INFO: Adding {len(domains)} new instances ...")
123         for domain in domains:
124             try:
125                 print(f"INFO: Fetching instances from domain='{domain}' ...")
126                 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
127
128                 # DEBUG: print(f"DEBUG: Invoking cookies.clear({domain}) ...")
129                 cookies.clear(domain)
130             except network.exceptions as exception:
131                 print(f"WARNING: Exception '{type(exception)}' during fetching instances (fetch_bkali) from domain='{domain}'")
132                 instances.set_last_error(domain, exception)
133
134     # DEBUG: print("DEBUG: Success - EXIT!")
135     return 0
136
137 def fetch_blocks(args: argparse.Namespace):
138     # DEBUG: print(f"DEBUG: args[]='{type(args)}' - CALLED!")
139     if args.domain is not None and args.domain != "":
140         # DEBUG: print(f"DEBUG: args.domain='{args.domain}' - checking ...")
141         if not validators.domain(args.domain):
142             print(f"WARNING: domain='{args.domain}' is not valid.")
143             return
144         elif blacklist.is_blacklisted(args.domain):
145             print(f"WARNING: domain='{args.domain}' is blacklisted, won't check it!")
146             return
147         elif not instances.is_registered(args.domain):
148             print(f"WARNING: domain='{args.domain}' is not registered, please run ./fba.py fetch_instances {args.domain} first.")
149             return
150
151     locking.acquire()
152
153     if args.domain is not None and args.domain != "":
154         # Re-check single domain
155         # DEBUG: print(f"DEBUG: Querying database for single args.domain='{args.domain}' ...")
156         fba.cursor.execute(
157             "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE domain = ?", [args.domain]
158         )
159     elif args.software is not None and args.software != "":
160         # Re-check single software
161         # DEBUG: print(f"DEBUG: Querying database for args.software='{args.software}' ...")
162         fba.cursor.execute(
163             "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software = ?", [args.software]
164         )
165     else:
166         # Re-check after "timeout" (aka. minimum interval)
167         fba.cursor.execute(
168             "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'lemmy', 'friendica', 'misskey', 'peertube') AND (last_blocked IS NULL OR last_blocked < ?) ORDER BY rowid DESC", [time.time() - config.get("recheck_block")]
169         )
170
171     rows = fba.cursor.fetchall()
172     print(f"INFO: Checking {len(rows)} entries ...")
173     for blocker, software, origin, nodeinfo_url in rows:
174         # DEBUG: print("DEBUG: BEFORE blocker,software,origin,nodeinfo_url:", blocker, software, origin, nodeinfo_url)
175         blockdict = list()
176         blocker = tidyup.domain(blocker)
177         # DEBUG: print("DEBUG: AFTER blocker,software:", blocker, software)
178
179         if blocker == "":
180             print("WARNING: blocker is now empty!")
181             continue
182         elif nodeinfo_url is None or nodeinfo_url == "":
183             # DEBUG: print(f"DEBUG: blocker='{blocker}',software='{software}' has empty nodeinfo_url")
184             continue
185         elif blacklist.is_blacklisted(blocker):
186             print(f"WARNING: blocker='{blocker}' is blacklisted now!")
187             continue
188
189         # DEBUG: print(f"DEBUG: blocker='{blocker}'")
190         instances.set_last_blocked(blocker)
191
192         if software == "pleroma":
193             print(f"INFO: blocker='{blocker}',software='{software}'")
194             pleroma.fetch_blocks(blocker, origin, nodeinfo_url)
195         elif software == "mastodon":
196             print(f"INFO: blocker='{blocker}',software='{software}'")
197             mastodon.fetch_blocks(blocker, origin, nodeinfo_url)
198         elif software == "lemmy":
199             print(f"INFO: blocker='{blocker}',software='{software}'")
200             lemmy.fetch_blocks(blocker, origin, nodeinfo_url)
201         elif software == "friendica" or software == "misskey":
202             print(f"INFO: blocker='{blocker}',software='{software}'")
203
204             blocking = list()
205             if software == "friendica":
206                 blocking = friendica.fetch_blocks(blocker)
207             elif software == "misskey":
208                 blocking = misskey.fetch_blocks(blocker)
209
210             print(f"INFO: Checking {len(blocking.items())} entries from blocker='{blocker}',software='{software}' ...")
211             for block_level, blocklist in blocking.items():
212                 # DEBUG: print("DEBUG: blocker,block_level,blocklist():", blocker, block_level, len(blocklist))
213                 block_level = tidyup.domain(block_level)
214                 # DEBUG: print("DEBUG: AFTER-block_level:", block_level)
215                 if block_level == "":
216                     print("WARNING: block_level is empty, blocker:", blocker)
217                     continue
218
219                 # DEBUG: print(f"DEBUG: Checking {len(blocklist)} entries from blocker='{blocker}',software='{software}',block_level='{block_level}' ...")
220                 for block in blocklist:
221                     blocked, reason = block.values()
222                     # DEBUG: print(f"DEBUG: blocked='{blocked}',reason='{reason}' - BEFORE!")
223                     blocked = tidyup.domain(blocked)
224                     reason  = tidyup.reason(reason) if reason is not None and reason != "" else None
225                     # DEBUG: print(f"DEBUG: blocked='{blocked}',reason='{reason}' - AFTER!")
226
227                     if blocked == "":
228                         print("WARNING: blocked is empty:", blocker)
229                         continue
230                     elif blacklist.is_blacklisted(blocked):
231                         # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - skipping!")
232                         continue
233                     elif blocked.count("*") > 0:
234                         # Some friendica servers also obscure domains without hash
235                         row = instances.deobscure("*", blocked)
236
237                         # DEBUG: print(f"DEBUG: row[]='{type(row)}'")
238                         if row is None:
239                             print(f"WARNING: Cannot deobsfucate blocked='{blocked}',blocker='{blocker}',software='{software}' - SKIPPED!")
240                             continue
241
242                         blocked      = row[0]
243                         origin       = row[1]
244                         nodeinfo_url = row[2]
245                     elif blocked.count("?") > 0:
246                         # Some obscure them with question marks, not sure if that's dependent on version or not
247                         row = instances.deobscure("?", blocked)
248
249                         # DEBUG: print(f"DEBUG: row[]='{type(row)}'")
250                         if row is None:
251                             print(f"WARNING: Cannot deobsfucate blocked='{blocked}',blocker='{blocker}',software='{software}' - SKIPPED!")
252                             continue
253
254                         blocked      = row[0]
255                         origin       = row[1]
256                         nodeinfo_url = row[2]
257
258                     # DEBUG: print("DEBUG: Looking up instance by domain:", blocked)
259                     if not validators.domain(blocked):
260                         print(f"WARNING: blocked='{blocked}',software='{software}' is not a valid domain name - SKIPPED!")
261                         continue
262                     elif blocked.endswith(".arpa"):
263                         # DEBUG: print(f"DEBUG: blocked='{blocked}' is ending with '.arpa' - SKIPPED!")
264                         continue
265                     elif not instances.is_registered(blocked):
266                         # DEBUG: print("DEBUG: Hash wasn't found, adding:", blocked, blocker)
267                         try:
268                             instances.add(blocked, blocker, inspect.currentframe().f_code.co_name, nodeinfo_url)
269                         except network.exceptions as exception:
270                             print(f"Exception during adding blocked='{blocked}',blocker='{blocker}': '{type(exception)}'")
271                             continue
272
273                     if not blocks.is_instance_blocked(blocker, blocked, block_level):
274                         blocks.add_instance(blocker, blocked, reason, block_level)
275
276                         if block_level == "reject":
277                             blockdict.append({
278                                 "blocked": blocked,
279                                 "reason" : reason
280                             })
281                     else:
282                         # DEBUG: print(f"DEBUG: Updating block last seen and reason for blocker='{blocker}',blocked='{blocked}' ...")
283                         blocks.update_last_seen(blocker, blocked, block_level)
284                         blocks.update_reason(reason, blocker, blocked, block_level)
285
286                     # DEBUG: print(f"DEBUG: Invoking cookies.clear({blocked}) ...")
287                     cookies.clear(blocked)
288
289             # DEBUG: print("DEBUG: Committing changes ...")
290             fba.connection.commit()
291         else:
292             print("WARNING: Unknown software:", blocker, software)
293
294         if instances.has_pending(blocker):
295             # DEBUG: print(f"DEBUG: Invoking instances.update_data({blocker}) ...")
296             instances.update_data(blocker)
297
298         if config.get("bot_enabled") and len(blockdict) > 0:
299             network.send_bot_post(blocker, blockdict)
300
301         # DEBUG: print(f"DEBUG: Invoking cookies.clear({blocker}) ...")
302         cookies.clear(blocker)
303
304     # DEBUG: print("DEBUG: EXIT!")
305
306 def fetch_observer(args: argparse.Namespace):
307     # DEBUG: print(f"DEBUG: args[]='{type(args)}' - CALLED!")
308     types = [
309         "akoma",
310         "birdsitelive",
311         "bookwyrm",
312         "calckey",
313         "diaspora",
314         "foundkey",
315         "friendica",
316         "funkwhale",
317         "gancio",
318         "gnusocial",
319         "gotosocial",
320         "hometown",
321         "hubzilla",
322         "kbin",
323         "ktistec",
324         "lemmy",
325         "mastodon",
326         "microblogpub",
327         "misskey",
328         "mitra",
329         "mobilizon",
330         "owncast",
331         "peertube",
332         "pixelfed",
333         "pleroma",
334         "plume",
335         "snac",
336         "takahe",
337         "wildebeest",
338         "writefreely"
339     ]
340
341     locking.acquire()
342
343     print(f"INFO: Fetching {len(types)} different table data ...")
344     for software in types:
345         doc = None
346
347         try:
348             # DEBUG: print(f"DEBUG: Fetching table data for software='{software}' ...")
349             raw = fba.fetch_url(f"https://fediverse.observer/app/views/tabledata.php?software={software}", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))).text
350             # DEBUG: print(f"DEBUG: raw[{type(raw)}]()={len(raw)}")
351
352             doc = bs4.BeautifulSoup(raw, features='html.parser')
353             # DEBUG: print(f"DEBUG: doc[]='{type(doc)}'")
354         except network.exceptions as exception:
355             print(f"WARNING: Cannot fetch software='{software}' from fediverse.observer: '{type(exception)}'")
356             continue
357
358         items = doc.findAll("a", {"class": "url"})
359         print(f"INFO: Checking {len(items)} items,software='{software}' ...")
360         for item in items:
361             # DEBUG: print(f"DEBUG: item[]='{type(item)}'")
362             domain = item.decode_contents()
363
364             # DEBUG: print(f"DEBUG: domain='{domain}'")
365             if not validators.domain(domain.split("/")[0]):
366                 print(f"WARNING: domain='{domain}' is not a valid domain - SKIPPED!")
367                 continue
368             elif blacklist.is_blacklisted(domain):
369                 # DEBUG: print(f"DEBUG: domain='{domain}' is blacklisted - SKIPPED!")
370                 continue
371             elif instances.is_registered(domain):
372                 # DEBUG: print(f"DEBUG: domain='{domain}' is already registered - SKIPPED!")
373                 continue
374
375             print(f"INFO: Fetching instances for domain='{domain}',software='{software}'")
376             federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
377
378             # DEBUG: print(f"DEBUG: Invoking cookies.clear({domain}) ...")
379             cookies.clear(domain)
380
381     # DEBUG: print("DEBUG: EXIT!")
382
383 def fetch_cs(args: argparse.Namespace):
384     # DEBUG: print(f"DEBUG: args[]='{type(args)}' - CALLED!")
385     extensions = [
386         "extra",
387         "abbr",
388         "attr_list",
389         "def_list",
390         "fenced_code",
391         "footnotes",
392         "md_in_html",
393         "admonition",
394         "codehilite",
395         "legacy_attrs",
396         "legacy_em",
397         "meta",
398         "nl2br",
399         "sane_lists",
400         "smarty",
401         "toc",
402         "wikilinks"
403     ]
404
405     domains = {
406         "silenced": list(),
407         "reject"  : list(),
408     }
409
410     raw = fba.fetch_url("https://raw.githubusercontent.com/chaossocial/meta/master/federation.md", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))).text
411     # DEBUG: print(f"DEBUG: raw()={len(raw)}[]='{type(raw)}'")
412
413     doc = bs4.BeautifulSoup(markdown.markdown(raw, extensions=extensions), features='html.parser')
414
415     # DEBUG: print(f"DEBUG: doc()={len(doc)}[]='{type(doc)}'")
416     silenced = doc.find("h2", {"id": "silenced-instances"}).findNext("table").find("tbody")
417     # DEBUG: print(f"DEBUG: silenced[]='{type(silenced)}'")
418     domains["silenced"] = domains["silenced"] + federation.find_domains(silenced)
419
420     blocked = doc.find("h2", {"id": "blocked-instances"}).findNext("table").find("tbody")
421     # DEBUG: print(f"DEBUG: blocked[]='{type(blocked)}'")
422     domains["reject"] = domains["reject"] + federation.find_domains(blocked)
423
424     # DEBUG: print(f"DEBUG: domains()={len(domains)}")
425     if len(domains) > 0:
426         locking.acquire()
427
428         print(f"INFO: Adding {len(domains)} new instances ...")
429         for block_level in domains:
430             # DEBUG: print(f"DEBUG: block_level='{block_level}'")
431
432             for row in domains[block_level]:
433                 # DEBUG: print(f"DEBUG: row='{row}'")
434                 if not blocks.is_instance_blocked('chaos.social', row["domain"], block_level):
435                     # DEBUG: print(f"DEBUG: domain='{row['domain']}',block_level='{block_level}' blocked by chaos.social, adding ...")
436                     blocks.add_instance('chaos.social', row["domain"], row["reason"], block_level)
437
438                 if not instances.is_registered(row["domain"]):
439                     try:
440                         print(f"INFO: Fetching instances from domain='{row['domain']}' ...")
441                         federation.fetch_instances(row["domain"], 'chaos.social', None, inspect.currentframe().f_code.co_name)
442
443                         # DEBUG: print(f"DEBUG: Invoking cookies.clear({row['domain']}) ...")
444                         cookies.clear(row["domain"])
445                     except network.exceptions as exception:
446                         print(f"WARNING: Exception '{type(exception)}' during fetching instances (fetch_cs) from domain='{row['domain']}'")
447                         instances.set_last_error(row["domain"], exception)
448
449         # DEBUG: print("DEBUG: Committing changes ...")
450         fba.connection.commit()
451
452     # DEBUG: print("DEBUG: EXIT!")
453
454 def fetch_fba_rss(args: argparse.Namespace):
455     # DEBUG: print(f"DEBUG: args[]='{type(args)}' - CALLED!")
456     domains = list()
457
458     print(f"INFO: Fetch FBA-specific RSS args.feed='{args.feed}' ...")
459     response = fba.fetch_url(args.feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
460
461     # DEBUG: print(f"DEBUG: response.ok={response.ok},response.status_code='{response.status_code}',response.text()={len(response.text)}")
462     if response.ok and response.status_code < 300 and len(response.text) > 0:
463         # DEBUG: print(f"DEBUG: Parsing RSS feed ({len(response.text)} Bytes) ...")
464         rss = atoma.parse_rss_bytes(response.content)
465
466         # DEBUG: print(f"DEBUG: rss[]='{type(rss)}'")
467         for item in rss.items:
468             # DEBUG: print(f"DEBUG: item={item}")
469             domain = item.link.split("=")[1]
470
471             if blacklist.is_blacklisted(domain):
472                 # DEBUG: print(f"DEBUG: domain='{domain}' is blacklisted - SKIPPED!")
473                 continue
474             elif domain in domains:
475                 # DEBUG: print(f"DEBUG: domain='{domain}' is already added - SKIPPED!")
476                 continue
477             elif instances.is_registered(domain):
478                 # DEBUG: print(f"DEBUG: domain='{domain}' is already registered - SKIPPED!")
479                 continue
480
481             # DEBUG: print(f"DEBUG: Adding domain='{domain}'")
482             domains.append(domain)
483
484     # DEBUG: print(f"DEBUG: domains()={len(domains)}")
485     if len(domains) > 0:
486         locking.acquire()
487
488         print(f"INFO: Adding {len(domains)} new instances ...")
489         for domain in domains:
490             try:
491                 print(f"INFO: Fetching instances from domain='{domain}' ...")
492                 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
493
494                 # DEBUG: print(f"DEBUG: Invoking cookies.clear({domain}) ...")
495                 cookies.clear(domain)
496             except network.exceptions as exception:
497                 print(f"WARNING: Exception '{type(exception)}' during fetching instances (fetch_fba_rss) from domain='{domain}'")
498                 instances.set_last_error(domain, exception)
499
500     # DEBUG: print("DEBUG: EXIT!")
501
502 def fetch_fbabot_atom(args: argparse.Namespace):
503     # DEBUG: print(f"DEBUG: args[]='{type(args)}' - CALLED!")
504     feed = "https://ryona.agency/users/fba/feed.atom"
505
506     domains = list()
507
508     print(f"INFO: Fetching ATOM feed='{feed}' from FBA bot account ...")
509     response = fba.fetch_url(feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
510
511     # DEBUG: print(f"DEBUG: response.ok={response.ok},response.status_code='{response.status_code}',response.text()={len(response.text)}")
512     if response.ok and response.status_code < 300 and len(response.text) > 0:
513         # DEBUG: print(f"DEBUG: Parsing ATOM feed ({len(response.text)} Bytes) ...")
514         atom = atoma.parse_atom_bytes(response.content)
515
516         # DEBUG: print(f"DEBUG: atom[]='{type(atom)}'")
517         for entry in atom.entries:
518             # DEBUG: print(f"DEBUG: entry[]='{type(entry)}'")
519             doc = bs4.BeautifulSoup(entry.content.value, "html.parser")
520             # DEBUG: print(f"DEBUG: doc[]='{type(doc)}'")
521             for element in doc.findAll("a"):
522                 for href in element["href"].split(","):
523                     # DEBUG: print(f"DEBUG: href[{type(href)}]={href}")
524                     domain = tidyup.domain(href)
525
526                     # DEBUG: print(f"DEBUG: domain='{domain}'")
527                     if blacklist.is_blacklisted(domain):
528                         # DEBUG: print(f"DEBUG: domain='{domain}' is blacklisted - SKIPPED!")
529                         continue
530                     elif domain in domains:
531                         # DEBUG: print(f"DEBUG: domain='{domain}' is already added - SKIPPED!")
532                         continue
533                     elif instances.is_registered(domain):
534                         # DEBUG: print(f"DEBUG: domain='{domain}' is already registered - SKIPPED!")
535                         continue
536
537                     # DEBUG: print(f"DEBUG: Adding domain='{domain}',domains()={len(domains)}")
538                     domains.append(domain)
539
540     # DEBUG: print(f"DEBUG: domains({len(domains)})={domains}")
541     if len(domains) > 0:
542         locking.acquire()
543
544         print(f"INFO: Adding {len(domains)} new instances ...")
545         for domain in domains:
546             try:
547                 print(f"INFO: Fetching instances from domain='{domain}' ...")
548                 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
549
550                 # DEBUG: print(f"DEBUG: Invoking cookies.clear({domain}) ...")
551                 cookies.clear(domain)
552             except network.exceptions as exception:
553                 print(f"WARNING: Exception '{type(exception)}' during fetching instances (fetch_fbabot_atom) from domain='{domain}'")
554                 instances.set_last_error(domain, exception)
555
556     # DEBUG: print("DEBUG: EXIT!")
557
558 def fetch_instances(args: argparse.Namespace) -> int:
559     # DEBUG: print(f"DEBUG: args[]='{type(args)}' - CALLED!")
560     locking.acquire()
561
562     # Initial fetch
563     try:
564         print(f"INFO: Fetching instances from args.domain='{args.domain}' ...")
565         federation.fetch_instances(args.domain, None, None, inspect.currentframe().f_code.co_name)
566
567         # DEBUG: print(f"DEBUG: Invoking cookies.clear({args.domain}) ...")
568         cookies.clear(args.domain)
569     except network.exceptions as exception:
570         print(f"WARNING: Exception '{type(exception)}' during fetching instances (fetch_instances) from args.domain='{args.domain}'")
571         instances.set_last_error(args.domain, exception)
572
573         return 100
574
575     if args.single:
576         # DEBUG: print("DEBUG: Not fetching more instances - EXIT!")
577         return 0
578
579     # Loop through some instances
580     fba.cursor.execute(
581         "SELECT domain, origin, software, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'lemmy', 'peertube') AND (last_instance_fetch IS NULL OR last_instance_fetch < ?) ORDER BY rowid DESC", [time.time() - config.get("recheck_instance")]
582     )
583
584     rows = fba.cursor.fetchall()
585     print(f"INFO: Checking {len(rows)} entries ...")
586     for row in rows:
587         # DEBUG: print(f"DEBUG: domain='{row[0]}'")
588         if blacklist.is_blacklisted(row[0]):
589             print("WARNING: domain is blacklisted:", row[0])
590             continue
591
592         try:
593             print(f"INFO: Fetching instances for instance '{row[0]}' ('{row[2]}') of origin='{row[1]}',nodeinfo_url='{row[3]}'")
594             federation.fetch_instances(row[0], row[1], row[2], inspect.currentframe().f_code.co_name, row[3])
595
596             # DEBUG: print(f"DEBUG: Invoking cookies.clear({row[0]}) ...")
597             cookies.clear(row[0])
598         except network.exceptions as exception:
599             print(f"WARNING: Exception '{type(exception)}' during fetching instances (fetch_instances) from domain='{row[0]}'")
600             instances.set_last_error(row[0], exception)
601
602     # DEBUG: print("DEBUG: Success - EXIT!")
603     return 0
604
605 def fetch_oliphant(args: argparse.Namespace):
606     # DEBUG: print(f"DEBUG: args[]='{type(args)}' - CALLED!")
607     locking.acquire()
608
609     # Base URL
610     base_url = "https://codeberg.org/oliphant/blocklists/raw/branch/main/blocklists"
611
612     # URLs to fetch
613     blocklists = (
614         {
615             "blocker": "artisan.chat",
616             "csv_url": "mastodon/artisan.chat.csv",
617         },{
618             "blocker": "mastodon.art",
619             "csv_url": "mastodon/mastodon.art.csv",
620         },{
621             "blocker": "pleroma.envs.net",
622             "csv_url": "mastodon/pleroma.envs.net.csv",
623         },{
624             "blocker": "oliphant.social",
625             "csv_url": "mastodon/_unified_tier3_blocklist.csv",
626         },{
627             "blocker": "mastodon.online",
628             "csv_url": "mastodon/mastodon.online.csv",
629         },{
630             "blocker": "mastodon.social",
631             "csv_url": "mastodon/mastodon.social.csv",
632         },{
633             "blocker": "mastodon.social",
634             "csv_url": "other/missing-tier0-mastodon.social.csv",
635         },{
636             "blocker": "rage.love",
637             "csv_url": "mastodon/rage.love.csv",
638         },{
639             "blocker": "sunny.garden",
640             "csv_url": "mastodon/sunny.garden.csv",
641         },{
642             "blocker": "solarpunk.moe",
643             "csv_url": "mastodon/solarpunk.moe.csv",
644         },{
645             "blocker": "toot.wales",
646             "csv_url": "mastodon/toot.wales.csv",
647         },{
648             "blocker": "union.place",
649             "csv_url": "mastodon/union.place.csv",
650         }
651     )
652
653     domains = list()
654     for block in blocklists:
655         # Is domain given and not equal blocker?
656         if isinstance(args.domain, str) and args.domain != block["blocker"]:
657             # DEBUG: print(f"DEBUG: Skipping blocker='{block['blocker']}', not matching args.domain='{args.domain}'")
658             continue
659         elif args.domain in domains:
660             # DEBUG: print(f"DEBUG: args.domain='{args.domain}' already handled - SKIPPED!")
661             continue
662
663         # Fetch this URL
664         print(f"INFO: Fetching csv_url='{block['csv_url']}' for blocker='{block['blocker']}' ...")
665         response = fba.fetch_url(f"{base_url}/{block['csv_url']}", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
666
667         # DEBUG: print(f"DEBUG: response[]='{type(response)}'")
668         if response.ok and response.content != "":
669             # DEBUG: print(f"DEBUG: Fetched {len(response.content)} Bytes, parsing CSV ...")
670             reader = csv.DictReader(response.content.decode('utf-8').splitlines(), dialect="unix")
671
672             # DEBUG: print(f"DEBUG: reader[]='{type(reader)}'")
673             for row in reader:
674                 domain = None
675                 if "#domain" in row:
676                     domain = row["#domain"]
677                 elif "domain" in row:
678                     domain = row["domain"]
679                 else:
680                     # DEBUG: print(f"DEBUG: row='{row}' does not contain domain column")
681                     continue
682
683                 # DEBUG: print(f"DEBUG: Marking domain='{domain}' as handled")
684                 domains.append(domain)
685
686                 # DEBUG: print(f"DEBUG: Processing domain='{domain}' ...")
687                 processed = fba.process_domain(domain, block["blocker"], inspect.currentframe().f_code.co_name)
688
689                 # DEBUG: print(f"DEBUG: processed='{processed}'")
690
691     # DEBUG: print("DEBUG: EXIT!")
692
693 def fetch_txt(args: argparse.Namespace):
694     # DEBUG: print(f"DEBUG: args[]='{type(args)}' - CALLED!")
695     locking.acquire()
696
697     # Static URLs
698     urls = (
699         "https://seirdy.one/pb/bsl.txt",
700     )
701
702     print(f"INFO: Checking {len(urls)} text file(s) ...")
703     for url in urls:
704         # DEBUG: print(f"DEBUG: Fetching url='{url}' ...")
705         response = fba.fetch_url(url, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
706
707         # DEBUG: print(f"DEBUG: response[]='{type(response)}'")
708         if response.ok and response.text != "":
709             # DEBUG: print(f"DEBUG: Returned {len(response.text.strip())} Bytes for processing")
710             domains = response.text.split("\n")
711
712             print(f"INFO: Processing {len(domains)} domains ...")
713             for domain in domains:
714                 if domain == "":
715                     continue
716
717                 # DEBUG: print(f"DEBUG: domain='{domain}'")
718                 processed = fba.process_domain(domain, 'seirdy.one', inspect.currentframe().f_code.co_name)
719
720                 # DEBUG: print(f"DEBUG: processed='{processed}'")
721                 if not processed:
722                     # DEBUG: print(f"DEBUG: domain='{domain}' was not generically processed - SKIPPED!")
723                     continue
724
725     # DEBUG: print("DEBUG: EXIT!")