]> git.mxchange.org Git - fba.git/blob - fba/commands.py
Continued:
[fba.git] / fba / commands.py
1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
3 #
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
8 #
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 # GNU Affero General Public License for more details.
13 #
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program.  If not, see <https://www.gnu.org/licenses/>.
16
17 import csv
18 import inspect
19 import json
20 import time
21
22 import argparse
23 import atoma
24 import bs4
25 import markdown
26 import reqto
27 import validators
28
29 from fba import blacklist
30 from fba import config
31 from fba import federation
32 from fba import fba
33 from fba import network
34
35 from fba.helpers import locking
36 from fba.helpers import tidyup
37
38 from fba.models import blocks
39 from fba.models import instances
40
41 from fba.networks import friendica
42 from fba.networks import mastodon
43 from fba.networks import misskey
44 from fba.networks import pleroma
45
46 def check_instance(args: argparse.Namespace) -> int:
47     # DEBUG: print(f"DEBUG: args.domain='{args.domain}' - CALLED!")
48     status = 0
49     if not validators.domain(args.domain):
50         print(f"WARNING: args.domain='{args.domain}' is not valid")
51         status = 100
52     elif blacklist.is_blacklisted(args.domain):
53         print(f"WARNING: args.domain='{args.domain}' is blacklisted")
54         status = 101
55     elif instances.is_registered(args.domain):
56         print(f"WARNING: args.domain='{args.domain}' is already registered")
57         status = 102
58     else:
59         print(f"INFO: args.domain='{args.domain}' is not known")
60
61     # DEBUG: print(f"DEBUG: status={status} - EXIT!")
62     return status
63
64 def fetch_bkali(args: argparse.Namespace) -> int:
65     # DEBUG: print(f"DEBUG: args[]='{type(args)}' - CALLED!")
66     domains = list()
67     try:
68         fetched = network.post_json_api("gql.api.bka.li", "/v1/graphql", json.dumps({
69             "query": "query domainlist {nodeinfo(order_by: {domain: asc}) {domain}}"
70         }))
71
72         # DEBUG: print(f"DEBUG: fetched[]='{type(fetched)}'")
73         if "error_message" in fetched:
74             print(f"WARNING: post_json_api() for 'gql.api.bka.li' returned error message: {fetched['error_message']}")
75             return 100
76         elif isinstance(fetched["json"], dict) and "error" in fetched["json"] and "message" in fetched["json"]["error"]:
77             print(f"WARNING: post_json_api() returned error: {fetched['error']['message']}")
78             return 101
79
80         rows = fetched["json"]
81
82         # DEBUG: print(f"DEBUG: rows({len(rows)})[]='{type(rows)}'")
83         if len(rows) == 0:
84             raise Exception("WARNING: Returned no records")
85         elif "data" not in rows:
86             raise Exception(f"WARNING: rows()={len(rows)} does not contain key 'data'")
87         elif "nodeinfo" not in rows["data"]:
88             raise Exception(f"WARNING: rows()={len(rows['data'])} does not contain key 'nodeinfo'")
89
90         for entry in rows["data"]["nodeinfo"]:
91             # DEBUG: print(f"DEBUG: entry['{type(entry)}']='{entry}'")
92             if "domain" not in entry:
93                 print(f"WARNING: entry()={len(entry)} does not contain 'domain' - SKIPPED!")
94                 continue
95             elif not validators.domain(entry["domain"]):
96                 print(f"WARNING: domain='{entry['domain']}' is not a valid domain - SKIPPED!")
97                 continue
98             elif blacklist.is_blacklisted(entry["domain"]):
99                 # DEBUG: print(f"DEBUG: domain='{entry['domain']}' is blacklisted - SKIPPED!")
100                 continue
101             elif instances.is_registered(entry["domain"]):
102                 # DEBUG: print(f"DEBUG: domain='{entry['domain']}' is already registered - SKIPPED!")
103                 continue
104             elif instances.is_recent(entry["domain"]):
105                 # DEBUG: print(f"DEBUG: domain='{entry['domain']}' has been recently fetched - SKIPPED!")
106                 continue
107
108             # DEBUG: print(f"DEBUG: Adding domain='{entry['domain']}' ...")
109             domains.append(entry["domain"])
110
111     except network.exceptions as exception:
112         print(f"ERROR: Cannot fetch graphql,exception[{type(exception)}]:'{str(exception)}' - EXIT!")
113         return 102
114
115     # DEBUG: print(f"DEBUG: domains()={len(domains)}")
116     if len(domains) > 0:
117         locking.acquire()
118
119         print(f"INFO: Adding {len(domains)} new instances ...")
120         for domain in domains:
121             try:
122                 print(f"INFO: Fetching instances from domain='{domain}' ...")
123                 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
124
125                 # DEBUG: print(f"DEBUG: Invoking cookies.clear({domain}) ...")
126                 cookies.clear(domain)
127             except network.exceptions as exception:
128                 print(f"WARNING: Exception '{type(exception)}' during fetching instances (fetch_bkali) from domain='{domain}'")
129                 instances.set_last_error(domain, exception)
130
131     # DEBUG: print("DEBUG: Success - EXIT!")
132     return 0
133
134 def fetch_blocks(args: argparse.Namespace):
135     # DEBUG: print(f"DEBUG: args[]='{type(args)}' - CALLED!")
136     if args.domain is not None and args.domain != "":
137         # DEBUG: print(f"DEBUG: args.domain='{args.domain}' - checking ...")
138         if not validators.domain(args.domain):
139             print(f"WARNING: domain='{args.domain}' is not valid.")
140             return
141         elif blacklist.is_blacklisted(args.domain):
142             print(f"WARNING: domain='{args.domain}' is blacklisted, won't check it!")
143             return
144         elif not instances.is_registered(args.domain):
145             print(f"WARNING: domain='{args.domain}' is not registered, please run ./fba.py fetch_instances {args.domain} first.")
146             return
147
148     locking.acquire()
149
150     if args.domain is not None and args.domain != "":
151         # Re-check single domain
152         # DEBUG: print(f"DEBUG: Querying database for single args.domain='{args.domain}' ...")
153         fba.cursor.execute(
154             "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE domain = ?", [args.domain]
155         )
156     elif args.software is not None and args.software != "":
157         # Re-check single software
158         # DEBUG: print(f"DEBUG: Querying database for args.software='{args.software}' ...")
159         fba.cursor.execute(
160             "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software = ?", [args.software]
161         )
162     else:
163         # Re-check after "timeout" (aka. minimum interval)
164         fba.cursor.execute(
165             "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'lemmy', 'friendica', 'misskey', 'peertube') AND (last_blocked IS NULL OR last_blocked < ?) ORDER BY rowid DESC", [time.time() - config.get("recheck_block")]
166         )
167
168     rows = fba.cursor.fetchall()
169     print(f"INFO: Checking {len(rows)} entries ...")
170     for blocker, software, origin, nodeinfo_url in rows:
171         # DEBUG: print("DEBUG: BEFORE blocker,software,origin,nodeinfo_url:", blocker, software, origin, nodeinfo_url)
172         blockdict = list()
173         blocker = tidyup.domain(blocker)
174         # DEBUG: print("DEBUG: AFTER blocker,software:", blocker, software)
175
176         if blocker == "":
177             print("WARNING: blocker is now empty!")
178             continue
179         elif nodeinfo_url is None or nodeinfo_url == "":
180             print(f"DEBUG: blocker='{blocker}',software='{software}' has empty nodeinfo_url")
181             continue
182         elif blacklist.is_blacklisted(blocker):
183             print(f"WARNING: blocker='{blocker}' is blacklisted now!")
184             continue
185
186         # DEBUG: print(f"DEBUG: blocker='{blocker}'")
187         instances.set_last_blocked(blocker)
188
189         if software == "pleroma":
190             print(f"INFO: blocker='{blocker}',software='{software}'")
191             pleroma.fetch_blocks(blocker, origin, nodeinfo_url)
192         elif software == "mastodon":
193             print(f"INFO: blocker='{blocker}',software='{software}'")
194             mastodon.fetch_blocks(blocker, origin, nodeinfo_url)
195         elif software == "lemmy":
196             print(f"INFO: blocker='{blocker}',software='{software}'")
197             #lemmy.fetch_blocks(blocker, origin, nodeinfo_url)
198         elif software == "friendica" or software == "misskey":
199             print(f"INFO: blocker='{blocker}',software='{software}'")
200
201             blocking = list()
202             if software == "friendica":
203                 blocking = friendica.fetch_blocks(blocker)
204             elif software == "misskey":
205                 blocking = misskey.fetch_blocks(blocker)
206
207             print(f"INFO: Checking {len(blocking.items())} entries from blocker='{blocker}',software='{software}' ...")
208             for block_level, blocklist in blocking.items():
209                 # DEBUG: print("DEBUG: blocker,block_level,blocklist():", blocker, block_level, len(blocklist))
210                 block_level = tidyup.domain(block_level)
211                 # DEBUG: print("DEBUG: AFTER-block_level:", block_level)
212                 if block_level == "":
213                     print("WARNING: block_level is empty, blocker:", blocker)
214                     continue
215
216                 # DEBUG: print(f"DEBUG: Checking {len(blocklist)} entries from blocker='{blocker}',software='{software}',block_level='{block_level}' ...")
217                 for block in blocklist:
218                     blocked, reason = block.values()
219                     # DEBUG: print(f"DEBUG: blocked='{blocked}',reason='{reason}' - BEFORE!")
220                     blocked = tidyup.domain(blocked)
221                     reason  = tidyup.reason(reason) if reason is not None and reason != "" else None
222                     # DEBUG: print(f"DEBUG: blocked='{blocked}',reason='{reason}' - AFTER!")
223
224                     if blocked == "":
225                         print("WARNING: blocked is empty:", blocker)
226                         continue
227                     elif blacklist.is_blacklisted(blocked):
228                         # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - skipping!")
229                         continue
230                     elif blocked.count("*") > 0:
231                         # Some friendica servers also obscure domains without hash
232                         row = instances.deobscure("*", blocked)
233
234                         # DEBUG: print(f"DEBUG: row[]='{type(row)}'")
235                         if row is None:
236                             print(f"WARNING: Cannot deobsfucate blocked='{blocked}',blocker='{blocker}',software='{software}' - SKIPPED!")
237                             continue
238
239                         blocked      = row[0]
240                         origin       = row[1]
241                         nodeinfo_url = row[2]
242                     elif blocked.count("?") > 0:
243                         # Some obscure them with question marks, not sure if that's dependent on version or not
244                         row = instances.deobscure("?", blocked)
245
246                         # DEBUG: print(f"DEBUG: row[]='{type(row)}'")
247                         if row is None:
248                             print(f"WARNING: Cannot deobsfucate blocked='{blocked}',blocker='{blocker}',software='{software}' - SKIPPED!")
249                             continue
250
251                         blocked      = row[0]
252                         origin       = row[1]
253                         nodeinfo_url = row[2]
254
255                     # DEBUG: print("DEBUG: Looking up instance by domain:", blocked)
256                     if not validators.domain(blocked):
257                         print(f"WARNING: blocked='{blocked}',software='{software}' is not a valid domain name - SKIPPED!")
258                         continue
259                     elif blocked.endswith(".arpa"):
260                         # DEBUG: print(f"DEBUG: blocked='{blocked}' is ending with '.arpa' - SKIPPED!")
261                         continue
262                     elif not instances.is_registered(blocked):
263                         # DEBUG: print("DEBUG: Hash wasn't found, adding:", blocked, blocker)
264                         try:
265                             instances.add(blocked, blocker, inspect.currentframe().f_code.co_name, nodeinfo_url)
266                         except network.exceptions as exception:
267                             print(f"Exception during adding blocked='{blocked}',blocker='{blocker}': '{type(exception)}'")
268                             continue
269
270                     if not blocks.is_instance_blocked(blocker, blocked, block_level):
271                         blocks.add_instance(blocker, blocked, reason, block_level)
272
273                         if block_level == "reject":
274                             blockdict.append({
275                                 "blocked": blocked,
276                                 "reason" : reason
277                             })
278                     else:
279                         # DEBUG: print(f"DEBUG: Updating block last seen and reason for blocker='{blocker}',blocked='{blocked}' ...")
280                         blocks.update_last_seen(blocker, blocked, block_level)
281                         blocks.update_reason(reason, blocker, blocked, block_level)
282
283                     # DEBUG: print(f"DEBUG: Invoking cookies.clear({blocked}) ...")
284                     cookies.clear(blocked)
285
286             # DEBUG: print("DEBUG: Committing changes ...")
287             fba.connection.commit()
288         else:
289             print("WARNING: Unknown software:", blocker, software)
290
291         if instances.has_pending(blocker):
292             # DEBUG: print(f"DEBUG: Invoking instances.update_data({blocker}) ...")
293             instances.update_data(blocker)
294
295         if config.get("bot_enabled") and len(blockdict) > 0:
296             network.send_bot_post(blocker, blockdict)
297
298         # DEBUG: print(f"DEBUG: Invoking cookies.clear({blocker}) ...")
299         cookies.clear(blocker)
300
301     # DEBUG: print("DEBUG: EXIT!")
302
303 def fetch_observer(args: argparse.Namespace):
304     # DEBUG: print(f"DEBUG: args[]='{type(args)}' - CALLED!")
305     types = [
306         "akoma",
307         "birdsitelive",
308         "bookwyrm",
309         "calckey",
310         "diaspora",
311         "foundkey",
312         "friendica",
313         "funkwhale",
314         "gancio",
315         "gnusocial",
316         "gotosocial",
317         "hometown",
318         "hubzilla",
319         "kbin",
320         "ktistec",
321         "lemmy",
322         "mastodon",
323         "microblogpub",
324         "misskey",
325         "mitra",
326         "mobilizon",
327         "owncast",
328         "peertube",
329         "pixelfed",
330         "pleroma",
331         "plume",
332         "snac",
333         "takahe",
334         "wildebeest",
335         "writefreely"
336     ]
337
338     locking.acquire()
339
340     print(f"INFO: Fetching {len(types)} different table data ...")
341     for software in types:
342         doc = None
343
344         try:
345             # DEBUG: print(f"DEBUG: Fetching table data for software='{software}' ...")
346             raw = fba.fetch_url(f"https://fediverse.observer/app/views/tabledata.php?software={software}", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))).text
347             # DEBUG: print(f"DEBUG: raw[{type(raw)}]()={len(raw)}")
348
349             doc = bs4.BeautifulSoup(raw, features='html.parser')
350             # DEBUG: print(f"DEBUG: doc[]='{type(doc)}'")
351         except network.exceptions as exception:
352             print(f"WARNING: Cannot fetch software='{software}' from fediverse.observer: '{type(exception)}'")
353             continue
354
355         items = doc.findAll("a", {"class": "url"})
356         print(f"INFO: Checking {len(items)} items,software='{software}' ...")
357         for item in items:
358             # DEBUG: print(f"DEBUG: item[]='{type(item)}'")
359             domain = item.decode_contents()
360
361             # DEBUG: print(f"DEBUG: domain='{domain}'")
362             if not validators.domain(domain.split("/")[0]):
363                 print(f"WARNING: domain='{domain}' is not a valid domain - SKIPPED!")
364                 continue
365             elif blacklist.is_blacklisted(domain):
366                 # DEBUG: print(f"DEBUG: domain='{domain}' is blacklisted - SKIPPED!")
367                 continue
368             elif instances.is_registered(domain):
369                 # DEBUG: print(f"DEBUG: domain='{domain}' is already registered - SKIPPED!")
370                 continue
371
372             print(f"INFO: Fetching instances for domain='{domain}',software='{software}'")
373             federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
374
375             # DEBUG: print(f"DEBUG: Invoking cookies.clear({domain}) ...")
376             cookies.clear(domain)
377
378     # DEBUG: print("DEBUG: EXIT!")
379
380 def fetch_cs(args: argparse.Namespace):
381     # DEBUG: print(f"DEBUG: args[]='{type(args)}' - CALLED!")
382     extensions = [
383         "extra",
384         "abbr",
385         "attr_list",
386         "def_list",
387         "fenced_code",
388         "footnotes",
389         "md_in_html",
390         "admonition",
391         "codehilite",
392         "legacy_attrs",
393         "legacy_em",
394         "meta",
395         "nl2br",
396         "sane_lists",
397         "smarty",
398         "toc",
399         "wikilinks"
400     ]
401
402     domains = {
403         "silenced": list(),
404         "reject"  : list(),
405     }
406
407     raw = fba.fetch_url("https://raw.githubusercontent.com/chaossocial/meta/master/federation.md", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))).text
408     # DEBUG: print(f"DEBUG: raw()={len(raw)}[]='{type(raw)}'")
409
410     doc = bs4.BeautifulSoup(markdown.markdown(raw, extensions=extensions), features='html.parser')
411
412     # DEBUG: print(f"DEBUG: doc()={len(doc)}[]='{type(doc)}'")
413     silenced = doc.find("h2", {"id": "silenced-instances"}).findNext("table").find("tbody")
414     # DEBUG: print(f"DEBUG: silenced[]='{type(silenced)}'")
415     domains["silenced"] = domains["silenced"] + federation.find_domains(silenced)
416
417     blocked = doc.find("h2", {"id": "blocked-instances"}).findNext("table").find("tbody")
418     # DEBUG: print(f"DEBUG: blocked[]='{type(blocked)}'")
419     domains["reject"] = domains["reject"] + federation.find_domains(blocked)
420
421     # DEBUG: print(f"DEBUG: domains()={len(domains)}")
422     if len(domains) > 0:
423         locking.acquire()
424
425         print(f"INFO: Adding {len(domains)} new instances ...")
426         for block_level in domains:
427             # DEBUG: print(f"DEBUG: block_level='{block_level}'")
428
429             for row in domains[block_level]:
430                 # DEBUG: print(f"DEBUG: row='{row}'")
431                 if not blocks.is_instance_blocked('chaos.social', row["domain"], block_level):
432                     # DEBUG: print(f"DEBUG: domain='{row['domain']}',block_level='{block_level}' blocked by chaos.social, adding ...")
433                     blocks.add_instance('chaos.social', row["domain"], row["reason"], block_level)
434
435                 if not instances.is_registered(row["domain"]):
436                     try:
437                         print(f"INFO: Fetching instances from domain='{row['domain']}' ...")
438                         federation.fetch_instances(row["domain"], 'chaos.social', None, inspect.currentframe().f_code.co_name)
439
440                         # DEBUG: print(f"DEBUG: Invoking cookies.clear({row['domain']}) ...")
441                         cookies.clear(row["domain"])
442                     except network.exceptions as exception:
443                         print(f"WARNING: Exception '{type(exception)}' during fetching instances (fetch_cs) from domain='{row['domain']}'")
444                         instances.set_last_error(row["domain"], exception)
445
446         # DEBUG: print("DEBUG: Committing changes ...")
447         fba.connection.commit()
448
449     # DEBUG: print("DEBUG: EXIT!")
450
451 def fetch_fba_rss(args: argparse.Namespace):
452     # DEBUG: print(f"DEBUG: args[]='{type(args)}' - CALLED!")
453     domains = list()
454
455     print(f"INFO: Fetch FBA-specific RSS args.feed='{args.feed}' ...")
456     response = fba.fetch_url(args.feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
457
458     # DEBUG: print(f"DEBUG: response.ok={response.ok},response.status_code='{response.status_code}',response.text()={len(response.text)}")
459     if response.ok and response.status_code < 300 and len(response.text) > 0:
460         # DEBUG: print(f"DEBUG: Parsing RSS feed ({len(response.text)} Bytes) ...")
461         rss = atoma.parse_rss_bytes(response.content)
462
463         # DEBUG: print(f"DEBUG: rss[]='{type(rss)}'")
464         for item in rss.items:
465             # DEBUG: print(f"DEBUG: item={item}")
466             domain = item.link.split("=")[1]
467
468             if blacklist.is_blacklisted(domain):
469                 # DEBUG: print(f"DEBUG: domain='{domain}' is blacklisted - SKIPPED!")
470                 continue
471             elif domain in domains:
472                 # DEBUG: print(f"DEBUG: domain='{domain}' is already added - SKIPPED!")
473                 continue
474             elif instances.is_registered(domain):
475                 # DEBUG: print(f"DEBUG: domain='{domain}' is already registered - SKIPPED!")
476                 continue
477
478             # DEBUG: print(f"DEBUG: Adding domain='{domain}'")
479             domains.append(domain)
480
481     # DEBUG: print(f"DEBUG: domains()={len(domains)}")
482     if len(domains) > 0:
483         locking.acquire()
484
485         print(f"INFO: Adding {len(domains)} new instances ...")
486         for domain in domains:
487             try:
488                 print(f"INFO: Fetching instances from domain='{domain}' ...")
489                 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
490
491                 # DEBUG: print(f"DEBUG: Invoking cookies.clear({domain}) ...")
492                 cookies.clear(domain)
493             except network.exceptions as exception:
494                 print(f"WARNING: Exception '{type(exception)}' during fetching instances (fetch_fba_rss) from domain='{domain}'")
495                 instances.set_last_error(domain, exception)
496
497     # DEBUG: print("DEBUG: EXIT!")
498
499 def fetch_fbabot_atom(args: argparse.Namespace):
500     # DEBUG: print(f"DEBUG: args[]='{type(args)}' - CALLED!")
501     feed = "https://ryona.agency/users/fba/feed.atom"
502
503     domains = list()
504
505     print(f"INFO: Fetching ATOM feed='{feed}' from FBA bot account ...")
506     response = fba.fetch_url(feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
507
508     # DEBUG: print(f"DEBUG: response.ok={response.ok},response.status_code='{response.status_code}',response.text()={len(response.text)}")
509     if response.ok and response.status_code < 300 and len(response.text) > 0:
510         # DEBUG: print(f"DEBUG: Parsing ATOM feed ({len(response.text)} Bytes) ...")
511         atom = atoma.parse_atom_bytes(response.content)
512
513         # DEBUG: print(f"DEBUG: atom[]='{type(atom)}'")
514         for entry in atom.entries:
515             # DEBUG: print(f"DEBUG: entry[]='{type(entry)}'")
516             doc = bs4.BeautifulSoup(entry.content.value, "html.parser")
517             # DEBUG: print(f"DEBUG: doc[]='{type(doc)}'")
518             for element in doc.findAll("a"):
519                 for href in element["href"].split(","):
520                     # DEBUG: print(f"DEBUG: href[{type(href)}]={href}")
521                     domain = tidyup.domain(href)
522
523                     # DEBUG: print(f"DEBUG: domain='{domain}'")
524                     if blacklist.is_blacklisted(domain):
525                         # DEBUG: print(f"DEBUG: domain='{domain}' is blacklisted - SKIPPED!")
526                         continue
527                     elif domain in domains:
528                         # DEBUG: print(f"DEBUG: domain='{domain}' is already added - SKIPPED!")
529                         continue
530                     elif instances.is_registered(domain):
531                         # DEBUG: print(f"DEBUG: domain='{domain}' is already registered - SKIPPED!")
532                         continue
533
534                     # DEBUG: print(f"DEBUG: Adding domain='{domain}',domains()={len(domains)}")
535                     domains.append(domain)
536
537     # DEBUG: print(f"DEBUG: domains({len(domains)})={domains}")
538     if len(domains) > 0:
539         locking.acquire()
540
541         print(f"INFO: Adding {len(domains)} new instances ...")
542         for domain in domains:
543             try:
544                 print(f"INFO: Fetching instances from domain='{domain}' ...")
545                 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
546
547                 # DEBUG: print(f"DEBUG: Invoking cookies.clear({domain}) ...")
548                 cookies.clear(domain)
549             except network.exceptions as exception:
550                 print(f"WARNING: Exception '{type(exception)}' during fetching instances (fetch_fbabot_atom) from domain='{domain}'")
551                 instances.set_last_error(domain, exception)
552
553     # DEBUG: print("DEBUG: EXIT!")
554
555 def fetch_instances(args: argparse.Namespace) -> int:
556     # DEBUG: print(f"DEBUG: args[]='{type(args)}' - CALLED!")
557     locking.acquire()
558
559     # Initial fetch
560     try:
561         print(f"INFO: Fetching instances from args.domain='{args.domain}' ...")
562         federation.fetch_instances(args.domain, None, None, inspect.currentframe().f_code.co_name)
563
564         # DEBUG: print(f"DEBUG: Invoking cookies.clear({args.domain}) ...")
565         cookies.clear(args.domain)
566     except network.exceptions as exception:
567         print(f"WARNING: Exception '{type(exception)}' during fetching instances (fetch_instances) from args.domain='{args.domain}'")
568         instances.set_last_error(args.domain, exception)
569
570         return 100
571
572     if args.single:
573         # DEBUG: print("DEBUG: Not fetching more instances - EXIT!")
574         return 0
575
576     # Loop through some instances
577     fba.cursor.execute(
578         "SELECT domain, origin, software, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'lemmy', 'peertube') AND (last_instance_fetch IS NULL OR last_instance_fetch < ?) ORDER BY rowid DESC", [time.time() - config.get("recheck_instance")]
579     )
580
581     rows = fba.cursor.fetchall()
582     print(f"INFO: Checking {len(rows)} entries ...")
583     for row in rows:
584         # DEBUG: print(f"DEBUG: domain='{row[0]}'")
585         if blacklist.is_blacklisted(row[0]):
586             print("WARNING: domain is blacklisted:", row[0])
587             continue
588
589         try:
590             print(f"INFO: Fetching instances for instance '{row[0]}' ('{row[2]}') of origin='{row[1]}',nodeinfo_url='{row[3]}'")
591             federation.fetch_instances(row[0], row[1], row[2], inspect.currentframe().f_code.co_name, row[3])
592
593             # DEBUG: print(f"DEBUG: Invoking cookies.clear({row[0]}) ...")
594             cookies.clear(row[0])
595         except network.exceptions as exception:
596             print(f"WARNING: Exception '{type(exception)}' during fetching instances (fetch_instances) from domain='{row[0]}'")
597             instances.set_last_error(row[0], exception)
598
599     # DEBUG: print("DEBUG: Success - EXIT!")
600     return 0
601
602 def fetch_oliphant(args: argparse.Namespace):
603     # DEBUG: print(f"DEBUG: args[]='{type(args)}' - CALLED!")
604     locking.acquire()
605
606     # Base URL
607     base_url = "https://codeberg.org/oliphant/blocklists/raw/branch/main/blocklists"
608
609     # URLs to fetch
610     blocklists = (
611         {
612             "blocker": "artisan.chat",
613             "csv_url": "mastodon/artisan.chat.csv",
614         },{
615             "blocker": "mastodon.art",
616             "csv_url": "mastodon/mastodon.art.csv",
617         },{
618             "blocker": "pleroma.envs.net",
619             "csv_url": "mastodon/pleroma.envs.net.csv",
620         },{
621             "blocker": "oliphant.social",
622             "csv_url": "mastodon/_unified_tier3_blocklist.csv",
623         },{
624             "blocker": "mastodon.online",
625             "csv_url": "mastodon/mastodon.online.csv",
626         },{
627             "blocker": "mastodon.social",
628             "csv_url": "mastodon/mastodon.social.csv",
629         },{
630             "blocker": "mastodon.social",
631             "csv_url": "other/missing-tier0-mastodon.social.csv",
632         },{
633             "blocker": "rage.love",
634             "csv_url": "mastodon/rage.love.csv",
635         },{
636             "blocker": "sunny.garden",
637             "csv_url": "mastodon/sunny.garden.csv",
638         },{
639             "blocker": "solarpunk.moe",
640             "csv_url": "mastodon/solarpunk.moe.csv",
641         },{
642             "blocker": "toot.wales",
643             "csv_url": "mastodon/toot.wales.csv",
644         },{
645             "blocker": "union.place",
646             "csv_url": "mastodon/union.place.csv",
647         }
648     )
649
650     domains = list()
651     for block in blocklists:
652         # Is domain given and not equal blocker?
653         if isinstance(args.domain, str) and args.domain != block["blocker"]:
654             # DEBUG: print(f"DEBUG: Skipping blocker='{block['blocker']}', not matching args.domain='{args.domain}'")
655             continue
656         elif args.domain in domains:
657             # DEBUG: print(f"DEBUG: args.domain='{args.domain}' already handled - SKIPPED!")
658             continue
659
660         # Fetch this URL
661         print(f"INFO: Fetching csv_url='{block['csv_url']}' for blocker='{block['blocker']}' ...")
662         response = fba.fetch_url(f"{base_url}/{block['csv_url']}", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
663
664         # DEBUG: print(f"DEBUG: response[]='{type(response)}'")
665         if response.ok and response.content != "":
666             # DEBUG: print(f"DEBUG: Fetched {len(response.content)} Bytes, parsing CSV ...")
667             reader = csv.DictReader(response.content.decode('utf-8').splitlines(), dialect="unix")
668
669             # DEBUG: print(f"DEBUG: reader[]='{type(reader)}'")
670             for row in reader:
671                 domain = None
672                 if "#domain" in row:
673                     domain = row["#domain"]
674                 elif "domain" in row:
675                     domain = row["domain"]
676                 else:
677                     # DEBUG: print(f"DEBUG: row='{row}' does not contain domain column")
678                     continue
679
680                 # DEBUG: print(f"DEBUG: Marking domain='{domain}' as handled")
681                 domains.append(domain)
682
683                 # DEBUG: print(f"DEBUG: Processing domain='{domain}' ...")
684                 processed = fba.process_domain(domain, block["blocker"], inspect.currentframe().f_code.co_name)
685
686                 # DEBUG: print(f"DEBUG: processed='{processed}'")
687
688     # DEBUG: print("DEBUG: EXIT!")
689
690 def fetch_txt(args: argparse.Namespace):
691     # DEBUG: print(f"DEBUG: args[]='{type(args)}' - CALLED!")
692     locking.acquire()
693
694     # Static URLs
695     urls = (
696         "https://seirdy.one/pb/bsl.txt",
697     )
698
699     print(f"INFO: Checking {len(urls)} text file(s) ...")
700     for url in urls:
701         # DEBUG: print(f"DEBUG: Fetching url='{url}' ...")
702         response = fba.fetch_url(url, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
703
704         # DEBUG: print(f"DEBUG: response[]='{type(response)}'")
705         if response.ok and response.text != "":
706             # DEBUG: print(f"DEBUG: Returned {len(response.text.strip())} Bytes for processing")
707             domains = response.text.split("\n")
708
709             print(f"INFO: Processing {len(domains)} domains ...")
710             for domain in domains:
711                 if domain == "":
712                     continue
713
714                 # DEBUG: print(f"DEBUG: domain='{domain}'")
715                 processed = fba.process_domain(domain, 'seirdy.one', inspect.currentframe().f_code.co_name)
716
717                 # DEBUG: print(f"DEBUG: processed='{processed}'")
718                 if not processed:
719                     # DEBUG: print(f"DEBUG: domain='{domain}' was not generically processed - SKIPPED!")
720                     continue
721
722     # DEBUG: print("DEBUG: EXIT!")