]> git.mxchange.org Git - fba.git/blob - fba/commands.py
Continued:
[fba.git] / fba / commands.py
1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
3 #
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
8 #
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 # GNU Affero General Public License for more details.
13 #
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program.  If not, see <https://www.gnu.org/licenses/>.
16
17 import csv
18 import inspect
19 import json
20 import logging
21 import time
22
23 import argparse
24 import atoma
25 import bs4
26 import markdown
27 import reqto
28 import validators
29
30 from fba import fba
31
32 from fba.helpers import blacklist
33 from fba.helpers import config
34 from fba.helpers import cookies
35 from fba.helpers import locking
36 from fba.helpers import tidyup
37
38 from fba.http import federation
39 from fba.http import network
40
41 from fba.models import blocks
42 from fba.models import instances
43
44 from fba.networks import friendica
45 from fba.networks import lemmy
46 from fba.networks import mastodon
47 from fba.networks import misskey
48 from fba.networks import pleroma
49
50 logging.basicConfig(level=logging.INFO)
51 logger = logging.getLogger(__name__)
52
53 def check_instance(args: argparse.Namespace) -> int:
54     logger.debug(f"args.domain='{args.domain}' - CALLED!")
55     status = 0
56     if not validators.domain(args.domain):
57         logger.warning(f"args.domain='{args.domain}' is not valid")
58         status = 100
59     elif blacklist.is_blacklisted(args.domain):
60         logger.warning(f"args.domain='{args.domain}' is blacklisted")
61         status = 101
62     elif instances.is_registered(args.domain):
63         logger.warning(f"args.domain='{args.domain}' is already registered")
64         status = 102
65     else:
66         logger.info(f"args.domain='{args.domain}' is not known")
67
68     logger.debug(f"status={status} - EXIT!")
69     return status
70
71 def fetch_bkali(args: argparse.Namespace) -> int:
72     logger.debug(f"args[]='{type(args)}' - CALLED!")
73     domains = list()
74     try:
75         fetched = network.post_json_api("gql.api.bka.li", "/v1/graphql", json.dumps({
76             "query": "query domainlist {nodeinfo(order_by: {domain: asc}) {domain}}"
77         }))
78
79         logger.debug(f"fetched[]='{type(fetched)}'")
80         if "error_message" in fetched:
81             logger.warning(f"post_json_api() for 'gql.api.bka.li' returned error message: {fetched['error_message']}")
82             return 100
83         elif isinstance(fetched["json"], dict) and "error" in fetched["json"] and "message" in fetched["json"]["error"]:
84             logger.warning(f"post_json_api() returned error: {fetched['error']['message']}")
85             return 101
86
87         rows = fetched["json"]
88
89         logger.debug(f"rows({len(rows)})[]='{type(rows)}'")
90         if len(rows) == 0:
91             raise Exception("WARNING: Returned no records")
92         elif "data" not in rows:
93             raise Exception(f"WARNING: rows()={len(rows)} does not contain key 'data'")
94         elif "nodeinfo" not in rows["data"]:
95             raise Exception(f"WARNING: rows()={len(rows['data'])} does not contain key 'nodeinfo'")
96
97         for entry in rows["data"]["nodeinfo"]:
98             logger.debug(f"entry['{type(entry)}']='{entry}'")
99             if "domain" not in entry:
100                 logger.warning(f"entry()={len(entry)} does not contain 'domain' - SKIPPED!")
101                 continue
102             elif not validators.domain(entry["domain"]):
103                 logger.warning(f"domain='{entry['domain']}' is not a valid domain - SKIPPED!")
104                 continue
105             elif blacklist.is_blacklisted(entry["domain"]):
106                 logger.debug(f"domain='{entry['domain']}' is blacklisted - SKIPPED!")
107                 continue
108             elif instances.is_registered(entry["domain"]):
109                 logger.debug(f"domain='{entry['domain']}' is already registered - SKIPPED!")
110                 continue
111             elif instances.is_recent(entry["domain"]):
112                 logger.debug(f"domain='{entry['domain']}' has been recently fetched - SKIPPED!")
113                 continue
114
115             logger.debug(f"Adding domain='{entry['domain']}' ...")
116             domains.append(entry["domain"])
117
118     except network.exceptions as exception:
119         logger.error(f"Cannot fetch graphql,exception[{type(exception)}]:'{str(exception)}' - EXIT!")
120         return 102
121
122     logger.debug(f"domains()={len(domains)}")
123     if len(domains) > 0:
124         locking.acquire()
125
126         logger.info(f"Adding {len(domains)} new instances ...")
127         for domain in domains:
128             try:
129                 logger.info(f"Fetching instances from domain='{domain}' ...")
130                 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
131
132                 logger.debug(f"Invoking cookies.clear({domain}) ...")
133                 cookies.clear(domain)
134             except network.exceptions as exception:
135                 logger.warning(f"Exception '{type(exception)}' during fetching instances (fetch_bkali) from domain='{domain}'")
136                 instances.set_last_error(domain, exception)
137
138     logger.debug("Success - EXIT!")
139     return 0
140
141 def fetch_blocks(args: argparse.Namespace):
142     logger.debug(f"args[]='{type(args)}' - CALLED!")
143     if args.domain is not None and args.domain != "":
144         logger.debug(f"args.domain='{args.domain}' - checking ...")
145         if not validators.domain(args.domain):
146             logger.warning(f"domain='{args.domain}' is not valid.")
147             return
148         elif blacklist.is_blacklisted(args.domain):
149             logger.warning(f"domain='{args.domain}' is blacklisted, won't check it!")
150             return
151         elif not instances.is_registered(args.domain):
152             logger.warning(f"domain='{args.domain}' is not registered, please run ./fba.py fetch_instances {args.domain} first.")
153             return
154
155     locking.acquire()
156
157     if args.domain is not None and args.domain != "":
158         # Re-check single domain
159         logger.debug(f"Querying database for single args.domain='{args.domain}' ...")
160         fba.cursor.execute(
161             "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE domain = ?", [args.domain]
162         )
163     elif args.software is not None and args.software != "":
164         # Re-check single software
165         logger.debug(f"Querying database for args.software='{args.software}' ...")
166         fba.cursor.execute(
167             "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software = ?", [args.software]
168         )
169     else:
170         # Re-check after "timeout" (aka. minimum interval)
171         fba.cursor.execute(
172             "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'lemmy', 'friendica', 'misskey', 'peertube') AND (last_blocked IS NULL OR last_blocked < ?) ORDER BY rowid DESC", [time.time() - config.get("recheck_block")]
173         )
174
175     rows = fba.cursor.fetchall()
176     logger.info(f"Checking {len(rows)} entries ...")
177     for blocker, software, origin, nodeinfo_url in rows:
178         logger.debug("BEFORE blocker,software,origin,nodeinfo_url:", blocker, software, origin, nodeinfo_url)
179         blockdict = list()
180         blocker = tidyup.domain(blocker)
181         logger.debug("AFTER blocker,software:", blocker, software)
182
183         if blocker == "":
184             logger.warning("blocker is now empty!")
185             continue
186         elif nodeinfo_url is None or nodeinfo_url == "":
187             logger.debug(f"blocker='{blocker}',software='{software}' has empty nodeinfo_url")
188             continue
189         elif blacklist.is_blacklisted(blocker):
190             logger.warning(f"blocker='{blocker}' is blacklisted now!")
191             continue
192
193         logger.debug(f"blocker='{blocker}'")
194         instances.set_last_blocked(blocker)
195
196         if software == "pleroma":
197             logger.info(f"blocker='{blocker}',software='{software}'")
198             pleroma.fetch_blocks(blocker, origin, nodeinfo_url)
199         elif software == "mastodon":
200             logger.info(f"blocker='{blocker}',software='{software}'")
201             mastodon.fetch_blocks(blocker, origin, nodeinfo_url)
202         elif software == "lemmy":
203             logger.info(f"blocker='{blocker}',software='{software}'")
204             lemmy.fetch_blocks(blocker, origin, nodeinfo_url)
205         elif software == "friendica" or software == "misskey":
206             logger.info(f"blocker='{blocker}',software='{software}'")
207
208             blocking = list()
209             if software == "friendica":
210                 blocking = friendica.fetch_blocks(blocker)
211             elif software == "misskey":
212                 blocking = misskey.fetch_blocks(blocker)
213
214             logger.info(f"Checking {len(blocking.items())} entries from blocker='{blocker}',software='{software}' ...")
215             for block_level, blocklist in blocking.items():
216                 logger.debug("blocker,block_level,blocklist():", blocker, block_level, len(blocklist))
217                 block_level = tidyup.domain(block_level)
218                 logger.debug("AFTER-block_level:", block_level)
219                 if block_level == "":
220                     logger.warning("block_level is empty, blocker:", blocker)
221                     continue
222
223                 logger.debug(f"Checking {len(blocklist)} entries from blocker='{blocker}',software='{software}',block_level='{block_level}' ...")
224                 for block in blocklist:
225                     blocked, reason = block.values()
226                     logger.debug(f"blocked='{blocked}',reason='{reason}' - BEFORE!")
227                     blocked = tidyup.domain(blocked)
228                     reason  = tidyup.reason(reason) if reason is not None and reason != "" else None
229                     logger.debug(f"blocked='{blocked}',reason='{reason}' - AFTER!")
230
231                     if blocked == "":
232                         logger.warning("blocked is empty:", blocker)
233                         continue
234                     elif blacklist.is_blacklisted(blocked):
235                         logger.debug(f"blocked='{blocked}' is blacklisted - skipping!")
236                         continue
237                     elif blocked.count("*") > 0:
238                         # Some friendica servers also obscure domains without hash
239                         row = instances.deobscure("*", blocked)
240
241                         logger.debug(f"row[]='{type(row)}'")
242                         if row is None:
243                             logger.warning(f"Cannot deobsfucate blocked='{blocked}',blocker='{blocker}',software='{software}' - SKIPPED!")
244                             continue
245
246                         blocked      = row[0]
247                         origin       = row[1]
248                         nodeinfo_url = row[2]
249                     elif blocked.count("?") > 0:
250                         # Some obscure them with question marks, not sure if that's dependent on version or not
251                         row = instances.deobscure("?", blocked)
252
253                         logger.debug(f"row[]='{type(row)}'")
254                         if row is None:
255                             logger.warning(f"Cannot deobsfucate blocked='{blocked}',blocker='{blocker}',software='{software}' - SKIPPED!")
256                             continue
257
258                         blocked      = row[0]
259                         origin       = row[1]
260                         nodeinfo_url = row[2]
261
262                     logger.debug("Looking up instance by domain:", blocked)
263                     if not validators.domain(blocked):
264                         logger.warning(f"blocked='{blocked}',software='{software}' is not a valid domain name - SKIPPED!")
265                         continue
266                     elif blocked.endswith(".arpa"):
267                         logger.debug(f"blocked='{blocked}' is a reverse IP domain - SKIPPED!")
268                         continue
269                     elif blocked.endswith(".tld"):
270                         logger.debug(f"blocked='{blocked}' is a fake domain - SKIPPED!")
271                         continue
272                     elif not instances.is_registered(blocked):
273                         logger.debug("Hash wasn't found, adding:", blocked, blocker)
274                         try:
275                             instances.add(blocked, blocker, inspect.currentframe().f_code.co_name, nodeinfo_url)
276                         except network.exceptions as exception:
277                             print(f"Exception during adding blocked='{blocked}',blocker='{blocker}': '{type(exception)}'")
278                             continue
279
280                     if not blocks.is_instance_blocked(blocker, blocked, block_level):
281                         blocks.add_instance(blocker, blocked, reason, block_level)
282
283                         if block_level == "reject":
284                             blockdict.append({
285                                 "blocked": blocked,
286                                 "reason" : reason
287                             })
288                     else:
289                         logger.debug(f"Updating block last seen and reason for blocker='{blocker}',blocked='{blocked}' ...")
290                         blocks.update_last_seen(blocker, blocked, block_level)
291                         blocks.update_reason(reason, blocker, blocked, block_level)
292
293                     logger.debug(f"Invoking cookies.clear({blocked}) ...")
294                     cookies.clear(blocked)
295
296             logger.debug("Committing changes ...")
297             fba.connection.commit()
298         else:
299             logger.warning("Unknown software:", blocker, software)
300
301         if instances.has_pending(blocker):
302             logger.debug(f"Invoking instances.update_data({blocker}) ...")
303             instances.update_data(blocker)
304
305         if config.get("bot_enabled") and len(blockdict) > 0:
306             network.send_bot_post(blocker, blockdict)
307
308         logger.debug(f"Invoking cookies.clear({blocker}) ...")
309         cookies.clear(blocker)
310
311     logger.debug("EXIT!")
312
313 def fetch_observer(args: argparse.Namespace):
314     logger.debug(f"args[]='{type(args)}' - CALLED!")
315     types = [
316         "akoma",
317         "birdsitelive",
318         "bookwyrm",
319         "calckey",
320         "diaspora",
321         "foundkey",
322         "friendica",
323         "funkwhale",
324         "gancio",
325         "gnusocial",
326         "gotosocial",
327         "hometown",
328         "hubzilla",
329         "kbin",
330         "ktistec",
331         "lemmy",
332         "mastodon",
333         "microblogpub",
334         "misskey",
335         "mitra",
336         "mobilizon",
337         "owncast",
338         "peertube",
339         "pixelfed",
340         "pleroma",
341         "plume",
342         "snac",
343         "takahe",
344         "wildebeest",
345         "writefreely"
346     ]
347
348     locking.acquire()
349
350     logger.info(f"Fetching {len(types)} different table data ...")
351     for software in types:
352         doc = None
353
354         try:
355             logger.debug(f"Fetching table data for software='{software}' ...")
356             raw = fba.fetch_url(f"https://fediverse.observer/app/views/tabledata.php?software={software}", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))).text
357             logger.debug(f"raw[{type(raw)}]()={len(raw)}")
358
359             doc = bs4.BeautifulSoup(raw, features='html.parser')
360             logger.debug(f"doc[]='{type(doc)}'")
361         except network.exceptions as exception:
362             logger.warning(f"Cannot fetch software='{software}' from fediverse.observer: '{type(exception)}'")
363             continue
364
365         items = doc.findAll("a", {"class": "url"})
366         logger.info(f"Checking {len(items)} items,software='{software}' ...")
367         for item in items:
368             logger.debug(f"item[]='{type(item)}'")
369             domain = item.decode_contents()
370
371             logger.debug(f"domain='{domain}'")
372             if not validators.domain(domain.split("/")[0]):
373                 logger.warning(f"domain='{domain}' is not a valid domain - SKIPPED!")
374                 continue
375             elif blacklist.is_blacklisted(domain):
376                 logger.debug(f"domain='{domain}' is blacklisted - SKIPPED!")
377                 continue
378             elif instances.is_registered(domain):
379                 logger.debug(f"domain='{domain}' is already registered - SKIPPED!")
380                 continue
381
382             logger.info(f"Fetching instances for domain='{domain}',software='{software}'")
383             federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
384
385             logger.debug(f"Invoking cookies.clear({domain}) ...")
386             cookies.clear(domain)
387
388     logger.debug("EXIT!")
389
390 def fetch_cs(args: argparse.Namespace):
391     logger.debug(f"args[]='{type(args)}' - CALLED!")
392     extensions = [
393         "extra",
394         "abbr",
395         "attr_list",
396         "def_list",
397         "fenced_code",
398         "footnotes",
399         "md_in_html",
400         "admonition",
401         "codehilite",
402         "legacy_attrs",
403         "legacy_em",
404         "meta",
405         "nl2br",
406         "sane_lists",
407         "smarty",
408         "toc",
409         "wikilinks"
410     ]
411
412     domains = {
413         "silenced": list(),
414         "reject"  : list(),
415     }
416
417     raw = fba.fetch_url("https://raw.githubusercontent.com/chaossocial/meta/master/federation.md", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))).text
418     logger.debug(f"raw()={len(raw)}[]='{type(raw)}'")
419
420     doc = bs4.BeautifulSoup(markdown.markdown(raw, extensions=extensions), features='html.parser')
421
422     logger.debug(f"doc()={len(doc)}[]='{type(doc)}'")
423     silenced = doc.find("h2", {"id": "silenced-instances"}).findNext("table").find("tbody")
424     logger.debug(f"silenced[]='{type(silenced)}'")
425     domains["silenced"] = domains["silenced"] + federation.find_domains(silenced)
426
427     blocked = doc.find("h2", {"id": "blocked-instances"}).findNext("table").find("tbody")
428     logger.debug(f"blocked[]='{type(blocked)}'")
429     domains["reject"] = domains["reject"] + federation.find_domains(blocked)
430
431     logger.debug(f"domains()={len(domains)}")
432     if len(domains) > 0:
433         locking.acquire()
434
435         logger.info(f"Adding {len(domains)} new instances ...")
436         for block_level in domains:
437             logger.debug(f"block_level='{block_level}'")
438
439             for row in domains[block_level]:
440                 logger.debug(f"row='{row}'")
441                 if not blocks.is_instance_blocked('chaos.social', row["domain"], block_level):
442                     logger.debug(f"domain='{row['domain']}',block_level='{block_level}' blocked by chaos.social, adding ...")
443                     blocks.add_instance('chaos.social', row["domain"], row["reason"], block_level)
444
445                 if not instances.is_registered(row["domain"]):
446                     try:
447                         logger.info(f"Fetching instances from domain='{row['domain']}' ...")
448                         federation.fetch_instances(row["domain"], 'chaos.social', None, inspect.currentframe().f_code.co_name)
449
450                         logger.debug(f"Invoking cookies.clear({row['domain']}) ...")
451                         cookies.clear(row["domain"])
452                     except network.exceptions as exception:
453                         logger.warning(f"Exception '{type(exception)}' during fetching instances (fetch_cs) from domain='{row['domain']}'")
454                         instances.set_last_error(row["domain"], exception)
455
456         logger.debug("Committing changes ...")
457         fba.connection.commit()
458
459     logger.debug("EXIT!")
460
461 def fetch_fba_rss(args: argparse.Namespace):
462     logger.debug(f"args[]='{type(args)}' - CALLED!")
463     domains = list()
464
465     logger.info(f"Fetch FBA-specific RSS args.feed='{args.feed}' ...")
466     response = fba.fetch_url(args.feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
467
468     logger.debug(f"response.ok={response.ok},response.status_code='{response.status_code}',response.text()={len(response.text)}")
469     if response.ok and response.status_code < 300 and len(response.text) > 0:
470         logger.debug(f"Parsing RSS feed ({len(response.text)} Bytes) ...")
471         rss = atoma.parse_rss_bytes(response.content)
472
473         logger.debug(f"rss[]='{type(rss)}'")
474         for item in rss.items:
475             logger.debug(f"item={item}")
476             domain = item.link.split("=")[1]
477
478             if blacklist.is_blacklisted(domain):
479                 logger.debug(f"domain='{domain}' is blacklisted - SKIPPED!")
480                 continue
481             elif domain in domains:
482                 logger.debug(f"domain='{domain}' is already added - SKIPPED!")
483                 continue
484             elif instances.is_registered(domain):
485                 logger.debug(f"domain='{domain}' is already registered - SKIPPED!")
486                 continue
487
488             logger.debug(f"Adding domain='{domain}'")
489             domains.append(domain)
490
491     logger.debug(f"domains()={len(domains)}")
492     if len(domains) > 0:
493         locking.acquire()
494
495         logger.info(f"Adding {len(domains)} new instances ...")
496         for domain in domains:
497             try:
498                 logger.info(f"Fetching instances from domain='{domain}' ...")
499                 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
500
501                 logger.debug(f"Invoking cookies.clear({domain}) ...")
502                 cookies.clear(domain)
503             except network.exceptions as exception:
504                 logger.warning(f"Exception '{type(exception)}' during fetching instances (fetch_fba_rss) from domain='{domain}'")
505                 instances.set_last_error(domain, exception)
506
507     logger.debug("EXIT!")
508
509 def fetch_fbabot_atom(args: argparse.Namespace):
510     logger.debug(f"args[]='{type(args)}' - CALLED!")
511     feed = "https://ryona.agency/users/fba/feed.atom"
512
513     domains = list()
514
515     logger.info(f"Fetching ATOM feed='{feed}' from FBA bot account ...")
516     response = fba.fetch_url(feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
517
518     logger.debug(f"response.ok={response.ok},response.status_code='{response.status_code}',response.text()={len(response.text)}")
519     if response.ok and response.status_code < 300 and len(response.text) > 0:
520         logger.debug(f"Parsing ATOM feed ({len(response.text)} Bytes) ...")
521         atom = atoma.parse_atom_bytes(response.content)
522
523         logger.debug(f"atom[]='{type(atom)}'")
524         for entry in atom.entries:
525             logger.debug(f"entry[]='{type(entry)}'")
526             doc = bs4.BeautifulSoup(entry.content.value, "html.parser")
527             logger.debug(f"doc[]='{type(doc)}'")
528             for element in doc.findAll("a"):
529                 for href in element["href"].split(","):
530                     logger.debug(f"href[{type(href)}]={href}")
531                     domain = tidyup.domain(href)
532
533                     logger.debug(f"domain='{domain}'")
534                     if blacklist.is_blacklisted(domain):
535                         logger.debug(f"domain='{domain}' is blacklisted - SKIPPED!")
536                         continue
537                     elif domain in domains:
538                         logger.debug(f"domain='{domain}' is already added - SKIPPED!")
539                         continue
540                     elif instances.is_registered(domain):
541                         logger.debug(f"domain='{domain}' is already registered - SKIPPED!")
542                         continue
543
544                     logger.debug(f"Adding domain='{domain}',domains()={len(domains)}")
545                     domains.append(domain)
546
547     logger.debug(f"domains({len(domains)})={domains}")
548     if len(domains) > 0:
549         locking.acquire()
550
551         logger.info(f"Adding {len(domains)} new instances ...")
552         for domain in domains:
553             try:
554                 logger.info(f"Fetching instances from domain='{domain}' ...")
555                 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
556
557                 logger.debug(f"Invoking cookies.clear({domain}) ...")
558                 cookies.clear(domain)
559             except network.exceptions as exception:
560                 logger.warning(f"Exception '{type(exception)}' during fetching instances (fetch_fbabot_atom) from domain='{domain}'")
561                 instances.set_last_error(domain, exception)
562
563     logger.debug("EXIT!")
564
565 def fetch_instances(args: argparse.Namespace) -> int:
566     logger.debug(f"args[]='{type(args)}' - CALLED!")
567     locking.acquire()
568
569     # Initial fetch
570     try:
571         logger.info(f"Fetching instances from args.domain='{args.domain}' ...")
572         federation.fetch_instances(args.domain, None, None, inspect.currentframe().f_code.co_name)
573
574         logger.debug(f"Invoking cookies.clear({args.domain}) ...")
575         cookies.clear(args.domain)
576     except network.exceptions as exception:
577         logger.warning(f"Exception '{type(exception)}' during fetching instances (fetch_instances) from args.domain='{args.domain}'")
578         instances.set_last_error(args.domain, exception)
579
580         return 100
581
582     if args.single:
583         logger.debug("Not fetching more instances - EXIT!")
584         return 0
585
586     # Loop through some instances
587     fba.cursor.execute(
588         "SELECT domain, origin, software, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'lemmy', 'peertube') AND (last_instance_fetch IS NULL OR last_instance_fetch < ?) ORDER BY rowid DESC", [time.time() - config.get("recheck_instance")]
589     )
590
591     rows = fba.cursor.fetchall()
592     logger.info(f"Checking {len(rows)} entries ...")
593     for row in rows:
594         logger.debug(f"domain='{row[0]}'")
595         if blacklist.is_blacklisted(row[0]):
596             logger.warning("domain is blacklisted:", row[0])
597             continue
598
599         try:
600             logger.info(f"Fetching instances for instance '{row[0]}' ('{row[2]}') of origin='{row[1]}',nodeinfo_url='{row[3]}'")
601             federation.fetch_instances(row[0], row[1], row[2], inspect.currentframe().f_code.co_name, row[3])
602
603             logger.debug(f"Invoking cookies.clear({row[0]}) ...")
604             cookies.clear(row[0])
605         except network.exceptions as exception:
606             logger.warning(f"Exception '{type(exception)}' during fetching instances (fetch_instances) from domain='{row[0]}'")
607             instances.set_last_error(row[0], exception)
608
609     logger.debug("Success - EXIT!")
610     return 0
611
612 def fetch_oliphant(args: argparse.Namespace):
613     logger.debug(f"args[]='{type(args)}' - CALLED!")
614     locking.acquire()
615
616     # Base URL
617     base_url = "https://codeberg.org/oliphant/blocklists/raw/branch/main/blocklists"
618
619     # URLs to fetch
620     blocklists = (
621         {
622             "blocker": "artisan.chat",
623             "csv_url": "mastodon/artisan.chat.csv",
624         },{
625             "blocker": "mastodon.art",
626             "csv_url": "mastodon/mastodon.art.csv",
627         },{
628             "blocker": "pleroma.envs.net",
629             "csv_url": "mastodon/pleroma.envs.net.csv",
630         },{
631             "blocker": "oliphant.social",
632             "csv_url": "mastodon/_unified_tier3_blocklist.csv",
633         },{
634             "blocker": "mastodon.online",
635             "csv_url": "mastodon/mastodon.online.csv",
636         },{
637             "blocker": "mastodon.social",
638             "csv_url": "mastodon/mastodon.social.csv",
639         },{
640             "blocker": "mastodon.social",
641             "csv_url": "other/missing-tier0-mastodon.social.csv",
642         },{
643             "blocker": "rage.love",
644             "csv_url": "mastodon/rage.love.csv",
645         },{
646             "blocker": "sunny.garden",
647             "csv_url": "mastodon/sunny.garden.csv",
648         },{
649             "blocker": "solarpunk.moe",
650             "csv_url": "mastodon/solarpunk.moe.csv",
651         },{
652             "blocker": "toot.wales",
653             "csv_url": "mastodon/toot.wales.csv",
654         },{
655             "blocker": "union.place",
656             "csv_url": "mastodon/union.place.csv",
657         }
658     )
659
660     domains = list()
661     for block in blocklists:
662         # Is domain given and not equal blocker?
663         if isinstance(args.domain, str) and args.domain != block["blocker"]:
664             logger.debug(f"Skipping blocker='{block['blocker']}', not matching args.domain='{args.domain}'")
665             continue
666         elif args.domain in domains:
667             logger.debug(f"args.domain='{args.domain}' already handled - SKIPPED!")
668             continue
669
670         # Fetch this URL
671         logger.info(f"Fetching csv_url='{block['csv_url']}' for blocker='{block['blocker']}' ...")
672         response = fba.fetch_url(f"{base_url}/{block['csv_url']}", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
673
674         logger.debug(f"response[]='{type(response)}'")
675         if response.ok and response.content != "":
676             logger.debug(f"Fetched {len(response.content)} Bytes, parsing CSV ...")
677             reader = csv.DictReader(response.content.decode('utf-8').splitlines(), dialect="unix")
678
679             logger.debug(f"reader[]='{type(reader)}'")
680             for row in reader:
681                 domain = None
682                 if "#domain" in row:
683                     domain = row["#domain"]
684                 elif "domain" in row:
685                     domain = row["domain"]
686                 else:
687                     logger.debug(f"row='{row}' does not contain domain column")
688                     continue
689
690                 if not validators.domain(domain):
691                     logger.warning(f"domain='{domain}' is not a valid domain name - SKIPPED!")
692                     continue
693                 elif domain.endswith(".arpa"):
694                     logger.debug(f"domain='{domain}' is a reverse IP domain - SKIPPED!")
695                     continue
696                 elif domain.endswith(".tld"):
697                     logger.debug(f"domain='{domain}' is a fake domain - SKIPPED!")
698                     continue
699                 elif blacklist.is_blacklisted(domain):
700                     logger.debug(f"domain='{domain}' is blacklisted - SKIPPED!")
701                     continue
702
703                 logger.debug(f"Marking domain='{domain}' as handled")
704                 domains.append(domain)
705
706                 logger.debug(f"Processing domain='{domain}' ...")
707                 processed = fba.process_domain(domain, block["blocker"], inspect.currentframe().f_code.co_name)
708
709                 logger.debug(f"processed='{processed}'")
710
711     logger.debug("EXIT!")
712
713 def fetch_txt(args: argparse.Namespace):
714     logger.debug(f"args[]='{type(args)}' - CALLED!")
715     locking.acquire()
716
717     # Static URLs
718     urls = (
719         "https://seirdy.one/pb/bsl.txt",
720     )
721
722     logger.info(f"Checking {len(urls)} text file(s) ...")
723     for url in urls:
724         logger.debug(f"Fetching url='{url}' ...")
725         response = fba.fetch_url(url, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
726
727         logger.debug(f"response[]='{type(response)}'")
728         if response.ok and response.text != "":
729             logger.debug(f"Returned {len(response.text.strip())} Bytes for processing")
730             domains = response.text.split("\n")
731
732             logger.info(f"Processing {len(domains)} domains ...")
733             for domain in domains:
734                 logger.debug(f"domain='{domain}'")
735                 if domain == "":
736                     logger.debug("domain is empty - SKIPPED!")
737                     continue
738                 elif not validators.domain(domain):
739                     logger.warning(f"domain='{domain}' is not a valid domain name - SKIPPED!")
740                     continue
741                 elif domain.endswith(".arpa"):
742                     logger.debug(f"domain='{domain}' is a reverse IP domain - SKIPPED!")
743                     continue
744                 elif domain.endswith(".tld"):
745                     logger.debug(f"domain='{domain}' is a fake domain - SKIPPED!")
746                     continue
747                 elif blacklist.is_blacklisted(domain):
748                     logger.debug(f"domain='{domain}' is blacklisted - SKIPPED!")
749                     continue
750
751                 logger.debug(f"domain='{domain}'")
752                 processed = fba.process_domain(domain, 'seirdy.one', inspect.currentframe().f_code.co_name)
753
754                 logger.debug(f"processed='{processed}'")
755                 if not processed:
756                     logger.debug(f"domain='{domain}' was not generically processed - SKIPPED!")
757                     continue
758
759     logger.debug("EXIT!")