]> git.mxchange.org Git - fba.git/blob - fba/commands.py
bf3f1d429d543c208996c36d879965a13b4c4eaa
[fba.git] / fba / commands.py
1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
3 #
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
8 #
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 # GNU Affero General Public License for more details.
13 #
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program.  If not, see <https://www.gnu.org/licenses/>.
16
17 import csv
18 import inspect
19 import json
20 import logging
21 import time
22
23 import argparse
24 import atoma
25 import bs4
26 import markdown
27 import reqto
28 import validators
29
30 from fba import database
31 from fba import utils
32
33 from fba.helpers import blacklist
34 from fba.helpers import config
35 from fba.helpers import cookies
36 from fba.helpers import locking
37 from fba.helpers import tidyup
38
39 from fba.http import federation
40 from fba.http import network
41
42 from fba.models import blocks
43 from fba.models import instances
44
45 from fba.networks import friendica
46 from fba.networks import lemmy
47 from fba.networks import mastodon
48 from fba.networks import misskey
49 from fba.networks import pleroma
50
51 logging.basicConfig(level=logging.INFO)
52 logger = logging.getLogger(__name__)
53 #logger.setLevel(logging.DEBUG)
54
55 def check_instance(args: argparse.Namespace) -> int:
56     logger.debug("args.domain='%s' - CALLED!", args.domain)
57     status = 0
58     if not validators.domain(args.domain):
59         logger.warning("args.domain='%s' is not valid", args.domain)
60         status = 100
61     elif blacklist.is_blacklisted(args.domain):
62         logger.warning("args.domain='%s' is blacklisted", args.domain)
63         status = 101
64     elif instances.is_registered(args.domain):
65         logger.warning("args.domain='%s' is already registered", args.domain)
66         status = 102
67     else:
68         logger.info("args.domain='%s' is not known", args.domain)
69
70     logger.debug("status='%d' - EXIT!", status)
71     return status
72
73 def fetch_bkali(args: argparse.Namespace) -> int:
74     logger.debug("args[]='%s' - CALLED!", type(args))
75     domains = list()
76     try:
77         fetched = network.post_json_api("gql.api.bka.li", "/v1/graphql", json.dumps({
78             "query": "query domainlist {nodeinfo(order_by: {domain: asc}) {domain}}"
79         }))
80
81         logger.debug("fetched[]='%s'", type(fetched))
82         if "error_message" in fetched:
83             logger.warning("post_json_api() for 'gql.api.bka.li' returned error message='%s", fetched['error_message'])
84             return 100
85         elif isinstance(fetched["json"], dict) and "error" in fetched["json"] and "message" in fetched["json"]["error"]:
86             logger.warning("post_json_api() returned error: '%s", fetched['error']['message'])
87             return 101
88
89         rows = fetched["json"]
90
91         logger.debug("rows(%d)[]='%s'", len(rows), type(rows))
92         if len(rows) == 0:
93             raise Exception("WARNING: Returned no records")
94         elif "data" not in rows:
95             raise Exception(f"WARNING: rows()={len(rows)} does not contain key 'data'")
96         elif "nodeinfo" not in rows["data"]:
97             raise Exception(f"WARNING: rows()={len(rows['data'])} does not contain key 'nodeinfo'")
98
99         for entry in rows["data"]["nodeinfo"]:
100             logger.debug("entry[%s]='%s'", type(entry), entry)
101             if "domain" not in entry:
102                 logger.warning("entry()=%d does not contain 'domain' - SKIPPED!", len(entry))
103                 continue
104             elif not utils.is_domain_wanted(entry["domain"]):
105                 logger.debug("entry[domain]='%s' is not wanted - SKIPPED!")
106                 continue
107             elif instances.is_registered(entry["domain"]):
108                 logger.debug("domain='%s' is already registered - SKIPPED!", entry['domain'])
109                 continue
110
111             logger.debug("Adding domain='%s' ...", entry['domain'])
112             domains.append(entry["domain"])
113
114     except network.exceptions as exception:
115         logger.warning("Cannot fetch graphql,exception[%s]:'%s' - EXIT!", type(exception), str(exception))
116         return 102
117
118     logger.debug("domains()=%d", len(domains))
119     if len(domains) > 0:
120         locking.acquire()
121
122         logger.info("Adding %d new instances ...", len(domains))
123         for domain in domains:
124             try:
125                 logger.info("Fetching instances from domain='%s' ...", domain)
126                 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
127
128                 logger.debug("Invoking cookies.clear(%s) ...", domain)
129                 cookies.clear(domain)
130             except network.exceptions as exception:
131                 logger.warning("Exception '%s' during fetching instances (fetch_bkali) from domain='%s'", type(exception), domain)
132                 instances.set_last_error(domain, exception)
133
134     logger.debug("Success - EXIT!")
135     return 0
136
137 def fetch_blocks(args: argparse.Namespace) -> int:
138     logger.debug("args[]='%s' - CALLED!", type(args))
139     if args.domain is not None and args.domain != "":
140         logger.debug("args.domain='%s' - checking ...", args.domain)
141         if not validators.domain(args.domain):
142             logger.warning("args.domain='%s' is not valid.", args.domain)
143             return 100
144         elif blacklist.is_blacklisted(args.domain):
145             logger.warning("args.domain='%s' is blacklisted, won't check it!", args.domain)
146             return 101
147         elif not instances.is_registered(args.domain):
148             logger.warning("args.domain='%s' is not registered, please run ./utils.py fetch_instances '%s' first.", args.domain, args.domain)
149             return 102
150
151     locking.acquire()
152
153     if args.domain is not None and args.domain != "":
154         # Re-check single domain
155         logger.debug("Querying database for single args.domain='%s' ...", args.domain)
156         database.cursor.execute(
157             "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE domain = ?", [args.domain]
158         )
159     elif args.software is not None and args.software != "":
160         # Re-check single software
161         logger.debug("Querying database for args.software='%s' ...", args.software)
162         database.cursor.execute(
163             "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software = ?", [args.software]
164         )
165     else:
166         # Re-check after "timeout" (aka. minimum interval)
167         database.cursor.execute(
168             "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'lemmy', 'friendica', 'misskey', 'peertube') AND (last_blocked IS NULL OR last_blocked < ?) ORDER BY rowid DESC", [time.time() - config.get("recheck_block")]
169         )
170
171     rows = database.cursor.fetchall()
172     logger.info("Checking %d entries ...", len(rows))
173     for blocker, software, origin, nodeinfo_url in rows:
174         logger.debug("blocker='%s',software='%s',origin='%s',nodeinfo_url='%s'", blocker, software, origin, nodeinfo_url)
175         blockdict = list()
176         blocker = tidyup.domain(blocker)
177         logger.debug("blocker='%s' - AFTER!", blocker)
178
179         if blocker == "":
180             logger.warning("blocker is now empty!")
181             continue
182         elif nodeinfo_url is None or nodeinfo_url == "":
183             logger.debug("blocker='%s',software='%s' has empty nodeinfo_url", blocker, software)
184             continue
185         elif not utils.is_domain_wanted(blocker):
186             logger.warning("blocker='%s' is not wanted - SKIPPED!", blocker)
187             continue
188
189         logger.debug("blocker='%s'", blocker)
190         instances.set_last_blocked(blocker)
191         instances.set_has_obfuscation(blocker, False)
192
193         blocking = list()
194         if software == "pleroma":
195             logger.info("blocker='%s',software='%s'", blocker, software)
196             blocking = pleroma.fetch_blocks(blocker, nodeinfo_url)
197         elif software == "mastodon":
198             logger.info("blocker='%s',software='%s'", blocker, software)
199             blocking = mastodon.fetch_blocks(blocker, nodeinfo_url)
200         elif software == "lemmy":
201             logger.info("blocker='%s',software='%s'", blocker, software)
202             blocking = lemmy.fetch_blocks(blocker, nodeinfo_url)
203         elif software == "friendica":
204             logger.info("blocker='%s',software='%s'", blocker, software)
205             blocking = friendica.fetch_blocks(blocker)
206         elif software == "misskey":
207             logger.info("blocker='%s',software='%s'", blocker, software)
208             blocking = misskey.fetch_blocks(blocker)
209         else:
210             logger.warning("Unknown software: blocker='%s',software='%s'", blocker, software)
211
212         logger.info("Checking %d entries from blocker='%s',software='%s' ...", len(blocking), blocker, software)
213         for block in blocking:
214             logger.debug("blocked='%s',block_level='%s',reason='%s'", block['blocked'], block['block_level'], block['reason'])
215
216             if block['block_level'] == "":
217                 logger.warning("block_level is empty, blocker='%s',blocked='%s'", block['blocker'], block['blocked'])
218                 continue
219
220             logger.debug("blocked='%s',reason='%s' - BEFORE!", block['blocked'], block['reason'])
221             block['blocked'] = tidyup.domain(block['blocked'])
222             block['reason']  = tidyup.reason(block['reason']) if block['reason'] is not None and block['reason'] != "" else None
223             logger.debug("blocked='%s',reason='%s' - AFTER!", block['blocked'], block['reason'])
224
225             if block['blocked'] == "":
226                 logger.warning("blocked is empty, blocker='%s'", blocker)
227                 continue
228             elif block['blocked'].count("*") > 0:
229                 logger.debug("blocker='%s' uses obfuscated domains, marking ...", blocker)
230                 instances.set_has_obfuscation(blocker, True)
231
232                 # Some friendica servers also obscure domains without hash
233                 row = instances.deobfuscate("*", block['blocked'])
234
235                 logger.debug("row[]='%s'", type(row))
236                 if row is None:
237                     logger.warning("Cannot deobfuscate blocked='%s',blocker='%s',software='%s' - SKIPPED!", block['blocked'], blocker, software)
238                     continue
239
240                 block['blocked'] = row[0]
241                 origin           = row[1]
242                 nodeinfo_url     = row[2]
243             elif block['blocked'].count("?") > 0:
244                 logger.debug("blocker='%s' uses obfuscated domains, marking ...", blocker)
245                 instances.set_has_obfuscation(blocker, True)
246
247                 # Some obscure them with question marks, not sure if that's dependent on version or not
248                 row = instances.deobfuscate("?", block['blocked'])
249
250                 logger.debug("row[]='%s'", type(row))
251                 if row is None:
252                     logger.warning("Cannot deobfuscate blocked='%s',blocker='%s',software='%s' - SKIPPED!", block['blocked'], blocker, software)
253                     continue
254
255                 block['blocked'] = row[0]
256                 origin           = row[1]
257                 nodeinfo_url     = row[2]
258
259             logger.debug("Looking up instance by domainm, blocked='%s'", block['blocked'])
260             if not utils.is_domain_wanted(block['blocked']):
261                 logger.debug("blocked='%s' is not wanted - SKIPPED!", block['blocked'])
262                 continue
263             elif not instances.is_registered(block['blocked']):
264                 logger.debug("Hash wasn't found, adding: blocked='%s',blocker='%s'", block['blocked'], blocker)
265                 try:
266                     instances.add(block['blocked'], blocker, inspect.currentframe().f_code.co_name, nodeinfo_url)
267                 except network.exceptions as exception:
268                     logger.warning("Exception during adding blocked='%s',blocker='%s': '%s'", block['blocked'], blocker, type(exception))
269                     continue
270
271             if not blocks.is_instance_blocked(blocker, block['blocked'], block['block_level']):
272                 blocks.add_instance(blocker, block['blocked'], block['reason'], block['block_level'])
273
274                 if block['block_level'] == "reject":
275                     blockdict.append({
276                         "blocked": block['blocked'],
277                         "reason" : block['reason'],
278                     })
279             else:
280                 logger.debug("Updating block last seen and reason for blocker='%s',blocked='%s' ...", blocker, block['blocked'])
281                 blocks.update_last_seen(blocker, block['blocked'], block['block_level'])
282                 blocks.update_reason(block['reason'], blocker, block['blocked'], block['block_level'])
283
284             logger.debug("Invoking cookies.clear(%s) ...", block['blocked'])
285             cookies.clear(block['blocked'])
286
287         if instances.has_pending(blocker):
288             logger.debug("Invoking instances.update_data(%s) ...", blocker)
289             instances.update_data(blocker)
290
291         logger.debug("Invoking commit() ...")
292         database.connection.commit()
293
294         if config.get("bot_enabled") and len(blockdict) > 0:
295             network.send_bot_post(blocker, blockdict)
296
297         logger.debug("Invoking cookies.clear(%s) ...", blocker)
298         cookies.clear(blocker)
299
300     logger.debug("Success! - EXIT!")
301     return 0
302
303 def fetch_observer(args: argparse.Namespace) -> int:
304     logger.debug("args[]='%s' - CALLED!", type(args))
305     types = [
306         "akoma",
307         "birdsitelive",
308         "bookwyrm",
309         "calckey",
310         "diaspora",
311         "foundkey",
312         "friendica",
313         "funkwhale",
314         "gancio",
315         "gnusocial",
316         "gotosocial",
317         "hometown",
318         "hubzilla",
319         "kbin",
320         "ktistec",
321         "lemmy",
322         "mastodon",
323         "microblogpub",
324         "misskey",
325         "mitra",
326         "mobilizon",
327         "owncast",
328         "peertube",
329         "pixelfed",
330         "pleroma",
331         "plume",
332         "snac",
333         "takahe",
334         "wildebeest",
335         "writefreely"
336     ]
337
338     locking.acquire()
339
340     logger.info("Fetching %d different table data ...", len(types))
341     for software in types:
342         doc = None
343
344         try:
345             logger.debug("Fetching table data for software='%s' ...", software)
346             raw = utils.fetch_url(f"https://fediverse.observer/app/views/tabledata.php?software={software}", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))).text
347             logger.debug("raw[%s]()=%d", type(raw), len(raw))
348
349             doc = bs4.BeautifulSoup(raw, features='html.parser')
350             logger.debug("doc[]='%s'", type(doc))
351         except network.exceptions as exception:
352             logger.warning("Cannot fetch software='%s' from fediverse.observer: '%s'", software, type(exception))
353             continue
354
355         items = doc.findAll("a", {"class": "url"})
356         logger.info("Checking %d items,software='%s' ...", len(items), software)
357         for item in items:
358             logger.debug("item[]='%s'", type(item))
359             domain = item.decode_contents()
360
361             logger.debug("domain='%s'", domain)
362             if not utils.is_domain_wanted(domain):
363                 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
364                 continue
365             elif instances.is_registered(domain):
366                 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
367                 continue
368
369             logger.info("Fetching instances for domain='%s',software='%s'", domain, software)
370             federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
371
372             logger.debug("Invoking cookies.clear(%s) ...", domain)
373             cookies.clear(domain)
374
375     logger.debug("Success! - EXIT!")
376     return 0
377
378 def fetch_todon_wiki(args: argparse.Namespace) -> int:
379     logger.debug("args[]='%s' - CALLED!", type(args))
380
381     locking.acquire()
382     blocklist = {
383         "silenced": list(),
384         "reject": list(),
385     }
386
387     raw = utils.fetch_url("https://wiki.todon.eu/todon/domainblocks", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))).text
388     logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
389
390     doc = bs4.BeautifulSoup(raw, "html.parser")
391     logger.debug("doc[]='%s'", type(doc))
392
393     silenced = doc.find("h3", {"id": "silencedlimited_servers"}).find_next("ul").findAll("li")
394     logger.info("Checking %d silenced/limited entries ...", len(silenced))
395     blocklist["silenced"] = utils.find_domains(silenced, "div")
396
397     suspended = doc.find("h3", {"id": "suspended_servers"}).find_next("ul").findAll("li")
398     logger.info("Checking %d suspended entries ...", len(suspended))
399     blocklist["reject"] = utils.find_domains(suspended, "div")
400
401     for block_level in blocklist:
402         blockers = blocklist[block_level]
403
404         logger.debug("block_level='%s',blockers()=%d'", block_level, len(blockers))
405         for blocked in blockers:
406             logger.debug("blocked='%s'", blocked)
407
408             if not instances.is_registered(blocked):
409                 try:
410                     logger.info("Fetching instances from domain='%s' ...", blocked)
411                     federation.fetch_instances(blocked, 'chaos.social', None, inspect.currentframe().f_code.co_name)
412
413                     logger.debug("Invoking cookies.clear(%s) ...", blocked)
414                     cookies.clear(blocked)
415                 except network.exceptions as exception:
416                     logger.warning("Exception '%s' during fetching instances (fetch_cs) from blocked='%s'", type(exception), blocked)
417                     instances.set_last_error(blocked, exception)
418
419             if blocks.is_instance_blocked("todon.eu", blocked, block_level):
420                 logger.debug("blocked='%s',block_level='%s' is already blocked - SKIPPED!", blocked, block_level)
421                 continue
422
423             logger.info("Adding new block: blocked='%s',block_level='%s'", blocked, block_level)
424             blocks.add_instance("todon.eu", blocked, None, block_level)
425
426         logger.debug("Invoking commit() ...")
427         database.connection.commit()
428
429     logger.debug("Success! - EXIT!")
430     return 0
431
432 def fetch_cs(args: argparse.Namespace):
433     logger.debug("args[]='%s' - CALLED!", type(args))
434     extensions = [
435         "extra",
436         "abbr",
437         "attr_list",
438         "def_list",
439         "fenced_code",
440         "footnotes",
441         "md_in_html",
442         "admonition",
443         "codehilite",
444         "legacy_attrs",
445         "legacy_em",
446         "meta",
447         "nl2br",
448         "sane_lists",
449         "smarty",
450         "toc",
451         "wikilinks"
452     ]
453
454     domains = {
455         "silenced": list(),
456         "reject"  : list(),
457     }
458
459     raw = utils.fetch_url("https://raw.githubusercontent.com/chaossocial/meta/master/federation.md", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))).text
460     logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
461
462     doc = bs4.BeautifulSoup(markdown.markdown(raw, extensions=extensions), features='html.parser')
463     logger.debug("doc()=%d[]='%s'", len(doc), type(doc))
464
465     silenced = doc.find("h2", {"id": "silenced-instances"}).findNext("table").find("tbody")
466     logger.debug("silenced[%s]()=%d", type(silenced), len(silenced))
467     domains["silenced"] = federation.find_domains(silenced)
468
469     blocked = doc.find("h2", {"id": "blocked-instances"}).findNext("table").find("tbody")
470     logger.debug("blocked[%s]()=%d", type(blocked), len(blocked))
471     domains["reject"] = federation.find_domains(blocked)
472
473     logger.debug("domains[silenced]()=%d,domains[reject]()=%d", len(domains["silenced"]), len(domains["reject"]))
474     if len(domains) > 0:
475         locking.acquire()
476
477         for block_level in domains:
478             logger.info("block_level='%s' has %d row(s)", block_level, len(domains[block_level]))
479
480             for row in domains[block_level]:
481                 logger.debug("row[%s]='%s'", type(row), row)
482                 if not instances.is_registered(row["domain"]):
483                     try:
484                         logger.info("Fetching instances from domain='%s' ...", row["domain"])
485                         federation.fetch_instances(row["domain"], 'chaos.social', None, inspect.currentframe().f_code.co_name)
486
487                         logger.debug("Invoking cookies.clear(%s) ...", row["domain"])
488                         cookies.clear(row["domain"])
489                     except network.exceptions as exception:
490                         logger.warning("Exception '%s' during fetching instances (fetch_cs) from row[domain]='%s'", type(exception), row["domain"])
491                         instances.set_last_error(row["domain"], exception)
492
493                 if not blocks.is_instance_blocked('chaos.social', row["domain"], block_level):
494                     logger.debug("domain='%s',block_level='%s' blocked by chaos.social, adding ...", row["domain"], block_level)
495                     blocks.add_instance('chaos.social', row["domain"], row["reason"], block_level)
496
497         logger.debug("Invoking commit() ...")
498         database.connection.commit()
499
500     logger.debug("Success! - EXIT!")
501     return 0
502
503 def fetch_fba_rss(args: argparse.Namespace) -> int:
504     logger.debug("args[]='%s' - CALLED!", type(args))
505     domains = list()
506
507     logger.info("Fetch FBA-specific RSS args.feed='%s' ...", args.feed)
508     response = utils.fetch_url(args.feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
509
510     logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
511     if response.ok and response.status_code < 300 and len(response.text) > 0:
512         logger.debug("Parsing RSS feed (%d Bytes) ...", len(response.text))
513         rss = atoma.parse_rss_bytes(response.content)
514
515         logger.debug("rss[]='%s'", type(rss))
516         for item in rss.items:
517             logger.debug("item='%s'", item)
518             domain = item.link.split("=")[1]
519
520             if blacklist.is_blacklisted(domain):
521                 logger.debug("domain='%s' is blacklisted - SKIPPED!", domain)
522                 continue
523             elif domain in domains:
524                 logger.debug("domain='%s' is already added - SKIPPED!", domain)
525                 continue
526             elif instances.is_registered(domain):
527                 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
528                 continue
529
530             logger.debug("Adding domain='%s'", domain)
531             domains.append(domain)
532
533     logger.debug("domains()=%d", len(domains))
534     if len(domains) > 0:
535         locking.acquire()
536
537         logger.info("Adding %d new instances ...", len(domains))
538         for domain in domains:
539             try:
540                 logger.info("Fetching instances from domain='%s' ...", domain)
541                 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
542
543                 logger.debug("Invoking cookies.clear(%s) ...", domain)
544                 cookies.clear(domain)
545             except network.exceptions as exception:
546                 logger.warning("Exception '%s' during fetching instances (fetch_fba_rss) from domain='%s'", type(exception), domain)
547                 instances.set_last_error(domain, exception)
548
549     logger.debug("Success! - EXIT!")
550     return 0
551
552 def fetch_fbabot_atom(args: argparse.Namespace) -> int:
553     logger.debug("args[]='%s' - CALLED!", type(args))
554     feed = "https://ryona.agency/users/fba/feed.atom"
555
556     domains = list()
557
558     logger.info("Fetching ATOM feed='%s' from FBA bot account ...", feed)
559     response = utils.fetch_url(feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
560
561     logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
562     if response.ok and response.status_code < 300 and len(response.text) > 0:
563         logger.debug("Parsing ATOM feed (%d Bytes) ...", len(response.text))
564         atom = atoma.parse_atom_bytes(response.content)
565
566         logger.debug("atom[]='%s'", type(atom))
567         for entry in atom.entries:
568             logger.debug("entry[]='%s'", type(entry))
569             doc = bs4.BeautifulSoup(entry.content.value, "html.parser")
570             logger.debug("doc[]='%s'", type(doc))
571             for element in doc.findAll("a"):
572                 for href in element["href"].split(","):
573                     logger.debug("href[%s]='%s", type(href), href)
574                     domain = tidyup.domain(href)
575
576                     logger.debug("domain='%s'", domain)
577                     if not utils.is_domain_wanted(domain):
578                         logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
579                         continue
580                     elif domain in domains:
581                         logger.debug("domain='%s' is already added - SKIPPED!", domain)
582                         continue
583                     elif instances.is_registered(domain):
584                         logger.debug("domain='%s' is already registered - SKIPPED!", domain)
585                         continue
586
587                     logger.debug("Adding domain='%s',domains()=%d", domain, len(domains))
588                     domains.append(domain)
589
590     logger.debug("domains(%d)='%s", len(domains), domains)
591     if len(domains) > 0:
592         locking.acquire()
593
594         logger.info("Adding %d new instances ...", len(domains))
595         for domain in domains:
596             try:
597                 logger.info("Fetching instances from domain='%s' ...", domain)
598                 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
599
600                 logger.debug("Invoking cookies.clear(%s) ...", domain)
601                 cookies.clear(domain)
602             except network.exceptions as exception:
603                 logger.warning("Exception '%s' during fetching instances (fetch_fbabot_atom) from domain='%s'", type(exception), domain)
604                 instances.set_last_error(domain, exception)
605
606     logger.debug("Success! - EXIT!")
607     return 0
608
609 def fetch_instances(args: argparse.Namespace) -> int:
610     logger.debug("args[]='%s' - CALLED!", type(args))
611     locking.acquire()
612
613     # Initial fetch
614     try:
615         logger.info("Fetching instances from args.domain='%s' ...", args.domain)
616         federation.fetch_instances(args.domain, None, None, inspect.currentframe().f_code.co_name)
617
618         logger.debug("Invoking cookies.clear(%s) ...", args.domain)
619         cookies.clear(args.domain)
620     except network.exceptions as exception:
621         logger.warning("Exception '%s' during fetching instances (fetch_instances) from args.domain='%s'", type(exception), args.domain)
622         instances.set_last_error(args.domain, exception)
623         return 100
624
625     if args.single:
626         logger.debug("Not fetching more instances - EXIT!")
627         return 0
628
629     # Loop through some instances
630     database.cursor.execute(
631         "SELECT domain, origin, software, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'lemmy', 'peertube') AND (last_instance_fetch IS NULL OR last_instance_fetch < ?) ORDER BY rowid DESC", [time.time() - config.get("recheck_instance")]
632     )
633
634     rows = database.cursor.fetchall()
635     logger.info("Checking %d entries ...", len(rows))
636     for row in rows:
637         logger.debug("domain='%s'", row[0])
638         if blacklist.is_blacklisted(row[0]):
639             logger.warning("Domain is blacklisted: row[0]='%s'", row[0])
640             continue
641
642         try:
643             logger.info("Fetching instances for instance domain='%s',software='%s',origin='%s',nodeinfo_url='%s'", row[0], row[2], row[1], row[3])
644             federation.fetch_instances(row[0], row[1], row[2], inspect.currentframe().f_code.co_name, row[3])
645
646             logger.debug("Invoking cookies.clear(%s) ...", row[0])
647             cookies.clear(row[0])
648         except network.exceptions as exception:
649             logger.warning("Exception '%s' during fetching instances (fetch_instances) from row[0]='%s'", type(exception), row[0])
650             instances.set_last_error(row[0], exception)
651
652     logger.debug("Success - EXIT!")
653     return 0
654
655 def fetch_oliphant(args: argparse.Namespace) -> int:
656     logger.debug("args[]='%s' - CALLED!", type(args))
657     locking.acquire()
658
659     # Base URL
660     base_url = "https://codeberg.org/oliphant/blocklists/raw/branch/main/blocklists"
661
662     # URLs to fetch
663     blocklists = (
664         {
665             "blocker": "artisan.chat",
666             "csv_url": "mastodon/artisan.chat.csv",
667         },{
668             "blocker": "mastodon.art",
669             "csv_url": "mastodon/mastodon.art.csv",
670         },{
671             "blocker": "pleroma.envs.net",
672             "csv_url": "mastodon/pleroma.envs.net.csv",
673         },{
674             "blocker": "oliphant.social",
675             "csv_url": "mastodon/_unified_tier3_blocklist.csv",
676         },{
677             "blocker": "mastodon.online",
678             "csv_url": "mastodon/mastodon.online.csv",
679         },{
680             "blocker": "mastodon.social",
681             "csv_url": "mastodon/mastodon.social.csv",
682         },{
683             "blocker": "mastodon.social",
684             "csv_url": "other/missing-tier0-mastodon.social.csv",
685         },{
686             "blocker": "rage.love",
687             "csv_url": "mastodon/rage.love.csv",
688         },{
689             "blocker": "sunny.garden",
690             "csv_url": "mastodon/sunny.garden.csv",
691         },{
692             "blocker": "solarpunk.moe",
693             "csv_url": "mastodon/solarpunk.moe.csv",
694         },{
695             "blocker": "toot.wales",
696             "csv_url": "mastodon/toot.wales.csv",
697         },{
698             "blocker": "union.place",
699             "csv_url": "mastodon/union.place.csv",
700         }
701     )
702
703     domains = list()
704
705     logger.debug("Downloading %d files ...", len(blocklists))
706     for block in blocklists:
707         # Is domain given and not equal blocker?
708         if isinstance(args.domain, str) and args.domain != block["blocker"]:
709             logger.debug("Skipping blocker='%s', not matching args.domain='%s'", block['blocker'], args.domain)
710             continue
711         elif args.domain in domains:
712             logger.debug("args.domain='%s' already handled - SKIPPED!", args.domain)
713             continue
714
715         # Fetch this URL
716         logger.info("Fetching csv_url='%s' for blocker='%s' ...", block['csv_url'], block['blocker'])
717         response = utils.fetch_url(f"{base_url}/{block['csv_url']}", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
718
719         logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
720         if response.ok and response.content != "":
721             logger.debug("Fetched %d Bytes, parsing CSV ...", len(response.content))
722             reader = csv.DictReader(response.content.decode('utf-8').splitlines(), dialect="unix")
723
724             logger.debug("reader[]='%s'", type(reader))
725             for row in reader:
726                 logger.debug("row[%s]='%s'", type(row), row)
727                 domain = None
728                 if "#domain" in row:
729                     domain = row["#domain"]
730                 elif "domain" in row:
731                     domain = row["domain"]
732                 else:
733                     logger.debug("row='%s' does not contain domain column", row)
734                     continue
735
736                 logger.debug("domain='%s'", domain)
737                 if not utils.is_domain_wanted(domain):
738                     logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
739                     continue
740
741                 logger.debug("Marking domain='%s' as handled", domain)
742                 domains.append(domain)
743
744                 logger.debug("Processing domain='%s' ...", domain)
745                 processed = utils.process_domain(domain, block["blocker"], inspect.currentframe().f_code.co_name)
746
747                 logger.debug("processed='%s'", processed)
748
749     logger.debug("Success! - EXIT!")
750     return 0
751
752 def fetch_txt(args: argparse.Namespace) -> int:
753     logger.debug("args[]='%s' - CALLED!", type(args))
754     locking.acquire()
755
756     # Static URLs
757     urls = (
758         "https://seirdy.one/pb/bsl.txt",
759     )
760
761     logger.info("Checking %d text file(s) ...", len(urls))
762     for url in urls:
763         logger.debug("Fetching url='%s' ...", url)
764         response = utils.fetch_url(url, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
765
766         logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
767         if response.ok and response.status_code < 300 and response.text != "":
768             logger.debug("Returned %d Bytes for processing", len(response.text.strip()))
769             domains = response.text.split("\n")
770
771             logger.info("Processing %d domains ...", len(domains))
772             for domain in domains:
773                 logger.debug("domain='%s'", domain)
774                 if domain == "":
775                     logger.debug("domain is empty - SKIPPED!")
776                     continue
777                 elif not utils.is_domain_wanted(domain):
778                     logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
779                     continue
780
781                 logger.debug("domain='%s'", domain)
782                 processed = utils.process_domain(domain, 'seirdy.one', inspect.currentframe().f_code.co_name)
783
784                 logger.debug("processed='%s'", processed)
785                 if not processed:
786                     logger.debug("domain='%s' was not generically processed - SKIPPED!", domain)
787                     continue
788
789     logger.debug("Success! - EXIT!")
790     return 0
791
792 def fetch_fedipact(args: argparse.Namespace) -> int:
793     logger.debug("args[]='%s' - CALLED!", type(args))
794     locking.acquire()
795
796     response = utils.fetch_url("https://fedipact.online", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
797
798     logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
799     if response.ok and response.status_code < 300 and response.text != "":
800         logger.debug("Parsing %d Bytes ...", len(response.text))
801
802         doc = bs4.BeautifulSoup(response.text, "html.parser")
803         logger.debug("doc[]='%s'", type(doc))
804
805         rows = doc.findAll("li")
806         logger.info("Checking %d row(s) ...", len(rows))
807         for row in rows:
808             logger.debug("row[]='%s'", type(row))
809             domain = tidyup.domain(row.contents[0])
810
811             logger.debug("domain='%s'", domain)
812             if domain == "":
813                 logger.debug("domain is empty - SKIPPED!")
814                 continue
815             elif not utils.is_domain_wanted(domain):
816                 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
817                 continue
818             elif instances.is_registered(domain):
819                 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
820                 continue
821
822             logger.info("Fetching domain='%s' ...", domain)
823             federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
824
825     logger.debug("Success! - EXIT!")
826     return 0