]> git.mxchange.org Git - fba.git/blob - fba/commands.py
83934f30f208f7e023db9aa42502e1787c79c9bf
[fba.git] / fba / commands.py
1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
3 #
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
8 #
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 # GNU Affero General Public License for more details.
13 #
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program.  If not, see <https://www.gnu.org/licenses/>.
16
17 import csv
18 import inspect
19 import json
20 import logging
21 import time
22
23 import argparse
24 import atoma
25 import bs4
26 import markdown
27 import reqto
28 import validators
29
30 from fba import database
31 from fba import utils
32
33 from fba.helpers import blacklist
34 from fba.helpers import config
35 from fba.helpers import cookies
36 from fba.helpers import locking
37 from fba.helpers import tidyup
38
39 from fba.http import federation
40 from fba.http import network
41
42 from fba.models import blocks
43 from fba.models import instances
44
45 from fba.networks import friendica
46 from fba.networks import lemmy
47 from fba.networks import mastodon
48 from fba.networks import misskey
49 from fba.networks import pleroma
50
51 logging.basicConfig(level=logging.INFO)
52 logger = logging.getLogger(__name__)
53 #logger.setLevel(logging.DEBUG)
54
55 def check_instance(args: argparse.Namespace) -> int:
56     logger.debug("args.domain='%s' - CALLED!", args.domain)
57     status = 0
58     if not validators.domain(args.domain):
59         logger.warning("args.domain='%s' is not valid", args.domain)
60         status = 100
61     elif blacklist.is_blacklisted(args.domain):
62         logger.warning("args.domain='%s' is blacklisted", args.domain)
63         status = 101
64     elif instances.is_registered(args.domain):
65         logger.warning("args.domain='%s' is already registered", args.domain)
66         status = 102
67     else:
68         logger.info("args.domain='%s' is not known", args.domain)
69
70     logger.debug(f"status={status} - EXIT!")
71     return status
72
73 def fetch_bkali(args: argparse.Namespace) -> int:
74     logger.debug("args[]='%s' - CALLED!", type(args))
75     domains = list()
76     try:
77         fetched = network.post_json_api("gql.api.bka.li", "/v1/graphql", json.dumps({
78             "query": "query domainlist {nodeinfo(order_by: {domain: asc}) {domain}}"
79         }))
80
81         logger.debug("fetched[]='%s'", type(fetched))
82         if "error_message" in fetched:
83             logger.warning("post_json_api() for 'gql.api.bka.li' returned error message='%s", fetched['error_message'])
84             return 100
85         elif isinstance(fetched["json"], dict) and "error" in fetched["json"] and "message" in fetched["json"]["error"]:
86             logger.warning("post_json_api() returned error: '%s", fetched['error']['message'])
87             return 101
88
89         rows = fetched["json"]
90
91         logger.debug(f"rows({len(rows)})[]='{type(rows)}'")
92         if len(rows) == 0:
93             raise Exception("WARNING: Returned no records")
94         elif "data" not in rows:
95             raise Exception(f"WARNING: rows()={len(rows)} does not contain key 'data'")
96         elif "nodeinfo" not in rows["data"]:
97             raise Exception(f"WARNING: rows()={len(rows['data'])} does not contain key 'nodeinfo'")
98
99         for entry in rows["data"]["nodeinfo"]:
100             logger.debug(f"entry['{type(entry)}']='{entry}'")
101             if "domain" not in entry:
102                 logger.warning("entry()=%d does not contain 'domain' - SKIPPED!", len(entry))
103                 continue
104             elif not utils.is_domain_wanted(entry["domain"]):
105                 logger.debug("entry[domain]='%s' is not wanted - SKIPPED!")
106                 continue
107             elif instances.is_registered(entry["domain"]):
108                 logger.debug("domain='%s' is already registered - SKIPPED!", entry['domain'])
109                 continue
110
111             logger.debug(f"Adding domain='{entry['domain']}' ...")
112             domains.append(entry["domain"])
113
114     except network.exceptions as exception:
115         logger.error(f"Cannot fetch graphql,exception[{type(exception)}]:'{str(exception)}' - EXIT!")
116         return 102
117
118     logger.debug(f"domains()={len(domains)}")
119     if len(domains) > 0:
120         locking.acquire()
121
122         logger.info("Adding %d new instances ...", len(domains))
123         for domain in domains:
124             try:
125                 logger.info("Fetching instances from domain='%s' ...", domain)
126                 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
127
128                 logger.debug("Invoking cookies.clear(%s) ...", domain)
129                 cookies.clear(domain)
130             except network.exceptions as exception:
131                 logger.warning("Exception '%s' during fetching instances (fetch_bkali) from domain='%s'", type(exception), domain)
132                 instances.set_last_error(domain, exception)
133
134     logger.debug("Success - EXIT!")
135     return 0
136
137 def fetch_blocks(args: argparse.Namespace):
138     logger.debug("args[]='%s' - CALLED!", type(args))
139     if args.domain is not None and args.domain != "":
140         logger.debug(f"args.domain='{args.domain}' - checking ...")
141         if not validators.domain(args.domain):
142             logger.warning("args.domain='%s' is not valid.", args.domain)
143             return
144         elif blacklist.is_blacklisted(args.domain):
145             logger.warning("args.domain='%s' is blacklisted, won't check it!", args.domain)
146             return
147         elif not instances.is_registered(args.domain):
148             logger.warning("args.domain='%s' is not registered, please run ./utils.py fetch_instances '%s' first.", args.domain, args.domain)
149             return
150
151     locking.acquire()
152
153     if args.domain is not None and args.domain != "":
154         # Re-check single domain
155         logger.debug("Querying database for single args.domain='%s' ...", args.domain)
156         database.cursor.execute(
157             "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE domain = ?", [args.domain]
158         )
159     elif args.software is not None and args.software != "":
160         # Re-check single software
161         logger.debug("Querying database for args.software='%s' ...", args.software)
162         database.cursor.execute(
163             "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software = ?", [args.software]
164         )
165     else:
166         # Re-check after "timeout" (aka. minimum interval)
167         database.cursor.execute(
168             "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'lemmy', 'friendica', 'misskey', 'peertube') AND (last_blocked IS NULL OR last_blocked < ?) ORDER BY rowid DESC", [time.time() - config.get("recheck_block")]
169         )
170
171     rows = database.cursor.fetchall()
172     logger.info("Checking %d entries ...", len(rows))
173     for blocker, software, origin, nodeinfo_url in rows:
174         logger.debug("blocker='%s',software='%s',origin='%s',nodeinfo_url='%s'", blocker, software, origin, nodeinfo_url)
175         blockdict = list()
176         blocker = tidyup.domain(blocker)
177         logger.debug("blocker='%s' - AFTER!", blocker)
178
179         if blocker == "":
180             logger.warning("blocker is now empty!")
181             continue
182         elif nodeinfo_url is None or nodeinfo_url == "":
183             logger.debug("blocker='%s',software='%s' has empty nodeinfo_url", blocker, software)
184             continue
185         elif not utils.is_domain_wanted(blocker):
186             logger.warning("blocker='%s' is not wanted - SKIPPED!", blocker)
187             continue
188
189         logger.debug("blocker='%s'", blocker)
190         instances.set_last_blocked(blocker)
191         instances.set_has_obfucation(blocker, False)
192
193         blocking = list()
194         if software == "pleroma":
195             logger.info("blocker='%s',software='%s'", blocker, software)
196             blocking = pleroma.fetch_blocks(blocker, nodeinfo_url)
197         elif software == "mastodon":
198             logger.info("blocker='%s',software='%s'", blocker, software)
199             blocking = mastodon.fetch_blocks(blocker, nodeinfo_url)
200         elif software == "lemmy":
201             logger.info("blocker='%s',software='%s'", blocker, software)
202             blocking = lemmy.fetch_blocks(blocker, nodeinfo_url)
203         elif software == "friendica":
204             logger.info("blocker='%s',software='%s'", blocker, software)
205             blocking = friendica.fetch_blocks(blocker)
206         elif software == "misskey":
207             logger.info("blocker='%s',software='%s'", blocker, software)
208             blocking = misskey.fetch_blocks(blocker)
209         else:
210             logger.warning("Unknown software: blocker='%s',software='%s'", blocker, software)
211
212         logger.info("Checking %d entries from blocker='%s',software='%s' ...", len(blocking), blocker, software)
213         for block in blocking:
214             logger.debug("blocked='%s',block_level='%s',reason='%s'", block['blocked'], block['block_level'], block['reason'])
215
216             if block['block_level'] == "":
217                 logger.warning("block_level is empty, blocker='%s',blocked='%s'", block['blocker'], block['blocked'])
218                 continue
219
220             logger.debug("blocked='%s',reason='%s' - BEFORE!", block['blocked'], block['reason'])
221             block['blocked'] = tidyup.domain(block['blocked'])
222             block['reason']  = tidyup.reason(block['reason']) if block['reason'] is not None and block['reason'] != "" else None
223             logger.debug("blocked='%s',reason='%s' - AFTER!", block['blocked'], block['reason'])
224
225             if block['blocked'] == "":
226                 logger.warning("blocked is empty, blocker='%s'", blocker)
227                 continue
228             elif block['blocked'].count("*") > 0:
229                 logger.debug("blocker='%s' uses obfucated domains, marking ...", blocker)
230                 instances.set_has_obfucation(blocker, True)
231
232                 # Some friendica servers also obscure domains without hash
233                 row = instances.deobfucate("*", block['blocked'])
234
235                 logger.debug("row[]='%s'", type(row))
236                 if row is None:
237                     logger.warning("Cannot deobfucate blocked='%s',blocker='%s',software='%s' - SKIPPED!", block['blocked'], blocker, software)
238                     continue
239
240                 block['blocked'] = row[0]
241                 origin           = row[1]
242                 nodeinfo_url     = row[2]
243             elif block['blocked'].count("?") > 0:
244                 logger.debug("blocker='%s' uses obfucated domains, marking ...", blocker)
245                 instances.set_has_obfucation(blocker, True)
246
247                 # Some obscure them with question marks, not sure if that's dependent on version or not
248                 row = instances.deobfucate("?", block['blocked'])
249
250                 logger.debug("row[]='%s'", type(row))
251                 if row is None:
252                     logger.warning("Cannot deobfucate blocked='%s',blocker='%s',software='%s' - SKIPPED!", block['blocked'], blocker, software)
253                     continue
254
255                 block['blocked'] = row[0]
256                 origin           = row[1]
257                 nodeinfo_url     = row[2]
258
259             logger.debug("Looking up instance by domainm, blocked='%s'", block['blocked'])
260             if not utils.is_domain_wanted(block['blocked']):
261                 logger.debug("blocked='%s' is not wanted - SKIPPED!", block['blocked'])
262                 continue
263             elif not instances.is_registered(block['blocked']):
264                 logger.debug("Hash wasn't found, adding: blocked='%s',blocker='%s'", block['blocked'], blocker)
265                 try:
266                     instances.add(block['blocked'], blocker, inspect.currentframe().f_code.co_name, nodeinfo_url)
267                 except network.exceptions as exception:
268                     logger.warning("Exception during adding blocked='%s',blocker='%s': '%s'", block['blocked'], blocker, type(exception))
269                     continue
270
271             if not blocks.is_instance_blocked(blocker, block['blocked'], block['block_level']):
272                 blocks.add_instance(blocker, block['blocked'], block['reason'], block['block_level'])
273
274                 if block['block_level'] == "reject":
275                     blockdict.append({
276                         "blocked": block['blocked'],
277                         "reason" : block['reason'],
278                     })
279             else:
280                 logger.debug("Updating block last seen and reason for blocker='%s',blocked='%s' ...", blocker, block['blocked'])
281                 blocks.update_last_seen(blocker, block['blocked'], block['block_level'])
282                 blocks.update_reason(block['reason'], blocker, block['blocked'], block['block_level'])
283
284             logger.debug("Invoking cookies.clear(%s) ...", block['blocked'])
285             cookies.clear(block['blocked'])
286
287         if instances.has_pending(blocker):
288             logger.debug("Invoking instances.update_data(%s) ...", blocker)
289             instances.update_data(blocker)
290
291         logger.debug("Invoking commit() ...")
292         database.connection.commit()
293
294         if config.get("bot_enabled") and len(blockdict) > 0:
295             network.send_bot_post(blocker, blockdict)
296
297         logger.debug(f"Invoking cookies.clear({blocker}) ...")
298         cookies.clear(blocker)
299
300     logger.debug("EXIT!")
301
302 def fetch_observer(args: argparse.Namespace):
303     logger.debug("args[]='%s' - CALLED!", type(args))
304     types = [
305         "akoma",
306         "birdsitelive",
307         "bookwyrm",
308         "calckey",
309         "diaspora",
310         "foundkey",
311         "friendica",
312         "funkwhale",
313         "gancio",
314         "gnusocial",
315         "gotosocial",
316         "hometown",
317         "hubzilla",
318         "kbin",
319         "ktistec",
320         "lemmy",
321         "mastodon",
322         "microblogpub",
323         "misskey",
324         "mitra",
325         "mobilizon",
326         "owncast",
327         "peertube",
328         "pixelfed",
329         "pleroma",
330         "plume",
331         "snac",
332         "takahe",
333         "wildebeest",
334         "writefreely"
335     ]
336
337     locking.acquire()
338
339     logger.info("Fetching %d different table data ...", len(types))
340     for software in types:
341         doc = None
342
343         try:
344             logger.debug(f"Fetching table data for software='{software}' ...")
345             raw = utils.fetch_url(f"https://fediverse.observer/app/views/tabledata.php?software={software}", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))).text
346             logger.debug("raw[%s]()=%d", type(raw), len(raw))
347
348             doc = bs4.BeautifulSoup(raw, features='html.parser')
349             logger.debug("doc[]='%s'", type(doc))
350         except network.exceptions as exception:
351             logger.warning("Cannot fetch software='%s' from fediverse.observer: '%s'", software, type(exception))
352             continue
353
354         items = doc.findAll("a", {"class": "url"})
355         logger.info("Checking %d items,software='%s' ...", len(items), software)
356         for item in items:
357             logger.debug("item[]='%s'", type(item))
358             domain = item.decode_contents()
359
360             logger.debug("domain='%s'", domain)
361             if not utils.is_domain_wanted(domain):
362                 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
363                 continue
364             elif instances.is_registered(domain):
365                 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
366                 continue
367
368             logger.info("Fetching instances for domain='%s',software='%s'", domain, software)
369             federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
370
371             logger.debug("Invoking cookies.clear(%s) ...", domain)
372             cookies.clear(domain)
373
374     logger.debug("EXIT!")
375
376 def fetch_todon_wiki(args: argparse.Namespace):
377     logger.debug("args[]='%s' - CALLED!", type(args))
378
379     locking.acquire()
380     blocklist = {
381         "silenced": list(),
382         "reject": list(),
383     }
384
385     raw = utils.fetch_url("https://wiki.todon.eu/todon/domainblocks", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))).text
386     logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
387
388     doc = bs4.BeautifulSoup(raw, "html.parser")
389     logger.debug("doc[]='%s'", type(doc))
390
391     silenced = doc.find("h3", {"id": "silencedlimited_servers"}).find_next("ul").findAll("li")
392     logger.info("Checking %d silenced/limited entries ...", len(silenced))
393     blocklist["silenced"] = utils.find_domains(silenced, "div")
394
395     suspended = doc.find("h3", {"id": "suspended_servers"}).find_next("ul").findAll("li")
396     logger.info("Checking %d suspended entries ...", len(suspended))
397     blocklist["reject"] = utils.find_domains(suspended, "div")
398
399     for block_level in blocklist:
400         blockers = blocklist[block_level]
401
402         logger.debug("block_level='%s',blockers()=%d'", block_level, len(blockers))
403         for blocked in blockers:
404             logger.debug("blocked='%s'", blocked)
405
406             if not instances.is_registered(blocked):
407                 try:
408                     logger.info("Fetching instances from domain='%s' ...", blocked)
409                     federation.fetch_instances(blocked, 'chaos.social', None, inspect.currentframe().f_code.co_name)
410
411                     logger.debug("Invoking cookies.clear(%s) ...", blocked)
412                     cookies.clear(blocked)
413                 except network.exceptions as exception:
414                     logger.warning("Exception '%s' during fetching instances (fetch_cs) from blocked='%s'", type(exception), blocked)
415                     instances.set_last_error(blocked, exception)
416
417             if blocks.is_instance_blocked("todon.eu", blocked, block_level):
418                 logger.debug("blocked='%s',block_level='%s' is already blocked - SKIPPED!", blocked, block_level)
419                 continue
420
421             logger.info("Adding new block: blocked='%s',block_level='%s'", blocked, block_level)
422             blocks.add_instance("todon.eu", blocked, None, block_level)
423
424         logger.debug("Invoking commit() ...")
425         database.connection.commit()
426
427     logger.debug("EXIT!")
428
429 def fetch_cs(args: argparse.Namespace):
430     logger.debug("args[]='%s' - CALLED!", type(args))
431     extensions = [
432         "extra",
433         "abbr",
434         "attr_list",
435         "def_list",
436         "fenced_code",
437         "footnotes",
438         "md_in_html",
439         "admonition",
440         "codehilite",
441         "legacy_attrs",
442         "legacy_em",
443         "meta",
444         "nl2br",
445         "sane_lists",
446         "smarty",
447         "toc",
448         "wikilinks"
449     ]
450
451     domains = {
452         "silenced": list(),
453         "reject"  : list(),
454     }
455
456     raw = utils.fetch_url("https://raw.githubusercontent.com/chaossocial/meta/master/federation.md", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))).text
457     logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
458
459     doc = bs4.BeautifulSoup(markdown.markdown(raw, extensions=extensions), features='html.parser')
460     logger.debug(f"doc()={len(doc)}[]='{type(doc)}'")
461
462     silenced = doc.find("h2", {"id": "silenced-instances"}).findNext("table").find("tbody")
463     logger.debug("silenced[%s]()=%d", type(silenced), len(silenced))
464     domains["silenced"] = federation.find_domains(silenced)
465
466     blocked = doc.find("h2", {"id": "blocked-instances"}).findNext("table").find("tbody")
467     logger.debug("blocked[%s]()=%d", type(blocked), len(blocked))
468     domains["reject"] = federation.find_domains(blocked)
469
470     logger.debug("domains[silenced]()=%d,domains[reject]()=%d", len(domains["silenced"]), len(domains["reject"]))
471     if len(domains) > 0:
472         locking.acquire()
473
474         for block_level in domains:
475             logger.info("block_level='%s' has %d row(s)", block_level, len(domains[block_level]))
476
477             for row in domains[block_level]:
478                 logger.debug(f"row='{row}'")
479                 if not instances.is_registered(row["domain"]):
480                     try:
481                         logger.info("Fetching instances from domain='%s' ...", row["domain"])
482                         federation.fetch_instances(row["domain"], 'chaos.social', None, inspect.currentframe().f_code.co_name)
483
484                         logger.debug("Invoking cookies.clear(%s) ...", row["domain"])
485                         cookies.clear(row["domain"])
486                     except network.exceptions as exception:
487                         logger.warning("Exception '%s' during fetching instances (fetch_cs) from row[domain]='%s'", type(exception), row["domain"])
488                         instances.set_last_error(row["domain"], exception)
489
490                 if not blocks.is_instance_blocked('chaos.social', row["domain"], block_level):
491                     logger.debug("domain='%s',block_level='%s' blocked by chaos.social, adding ...", row["domain"], block_level)
492                     blocks.add_instance('chaos.social', row["domain"], row["reason"], block_level)
493
494         logger.debug("Invoking commit() ...")
495         database.connection.commit()
496
497     logger.debug("EXIT!")
498
499 def fetch_fba_rss(args: argparse.Namespace):
500     logger.debug("args[]='%s' - CALLED!", type(args))
501     domains = list()
502
503     logger.info("Fetch FBA-specific RSS args.feed='%s' ...", args.feed)
504     response = utils.fetch_url(args.feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
505
506     logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
507     if response.ok and response.status_code < 300 and len(response.text) > 0:
508         logger.debug(f"Parsing RSS feed ({len(response.text)} Bytes) ...")
509         rss = atoma.parse_rss_bytes(response.content)
510
511         logger.debug(f"rss[]='{type(rss)}'")
512         for item in rss.items:
513             logger.debug(f"item={item}")
514             domain = item.link.split("=")[1]
515
516             if blacklist.is_blacklisted(domain):
517                 logger.debug("domain='%s' is blacklisted - SKIPPED!", domain)
518                 continue
519             elif domain in domains:
520                 logger.debug("domain='%s' is already added - SKIPPED!", domain)
521                 continue
522             elif instances.is_registered(domain):
523                 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
524                 continue
525
526             logger.debug(f"Adding domain='{domain}'")
527             domains.append(domain)
528
529     logger.debug(f"domains()={len(domains)}")
530     if len(domains) > 0:
531         locking.acquire()
532
533         logger.info("Adding %d new instances ...", len(domains))
534         for domain in domains:
535             try:
536                 logger.info("Fetching instances from domain='%s' ...", domain)
537                 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
538
539                 logger.debug("Invoking cookies.clear(%s) ...", domain)
540                 cookies.clear(domain)
541             except network.exceptions as exception:
542                 logger.warning("Exception '%s' during fetching instances (fetch_fba_rss) from domain='%s'", type(exception), domain)
543                 instances.set_last_error(domain, exception)
544
545     logger.debug("EXIT!")
546
547 def fetch_fbabot_atom(args: argparse.Namespace):
548     logger.debug("args[]='%s' - CALLED!", type(args))
549     feed = "https://ryona.agency/users/fba/feed.atom"
550
551     domains = list()
552
553     logger.info("Fetching ATOM feed='%s' from FBA bot account ...", feed)
554     response = utils.fetch_url(feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
555
556     logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
557     if response.ok and response.status_code < 300 and len(response.text) > 0:
558         logger.debug(f"Parsing ATOM feed ({len(response.text)} Bytes) ...")
559         atom = atoma.parse_atom_bytes(response.content)
560
561         logger.debug(f"atom[]='{type(atom)}'")
562         for entry in atom.entries:
563             logger.debug(f"entry[]='{type(entry)}'")
564             doc = bs4.BeautifulSoup(entry.content.value, "html.parser")
565             logger.debug("doc[]='%s'", type(doc))
566             for element in doc.findAll("a"):
567                 for href in element["href"].split(","):
568                     logger.debug("href[%s]='%s", type(href), href)
569                     domain = tidyup.domain(href)
570
571                     logger.debug("domain='%s'", domain)
572                     if blacklist.is_blacklisted(domain):
573                         logger.debug("domain='%s' is blacklisted - SKIPPED!", domain)
574                         continue
575                     elif domain in domains:
576                         logger.debug("domain='%s' is already added - SKIPPED!", domain)
577                         continue
578                     elif instances.is_registered(domain):
579                         logger.debug("domain='%s' is already registered - SKIPPED!", domain)
580                         continue
581
582                     logger.debug(f"Adding domain='{domain}',domains()={len(domains)}")
583                     domains.append(domain)
584
585     logger.debug(f"domains({len(domains)})={domains}")
586     if len(domains) > 0:
587         locking.acquire()
588
589         logger.info("Adding %d new instances ...", len(domains))
590         for domain in domains:
591             try:
592                 logger.info("Fetching instances from domain='%s' ...", domain)
593                 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
594
595                 logger.debug("Invoking cookies.clear(%s) ...", domain)
596                 cookies.clear(domain)
597             except network.exceptions as exception:
598                 logger.warning("Exception '%s' during fetching instances (fetch_fbabot_atom) from domain='%s'", type(exception), domain)
599                 instances.set_last_error(domain, exception)
600
601     logger.debug("EXIT!")
602
603 def fetch_instances(args: argparse.Namespace) -> int:
604     logger.debug("args[]='%s' - CALLED!", type(args))
605     locking.acquire()
606
607     # Initial fetch
608     try:
609         logger.info("Fetching instances from args.domain='%s' ...", args.domain)
610         federation.fetch_instances(args.domain, None, None, inspect.currentframe().f_code.co_name)
611
612         logger.debug(f"Invoking cookies.clear({args.domain}) ...")
613         cookies.clear(args.domain)
614     except network.exceptions as exception:
615         logger.warning("Exception '%s' during fetching instances (fetch_instances) from args.domain='%s'", type(exception), args.domain)
616         instances.set_last_error(args.domain, exception)
617         return 100
618
619     if args.single:
620         logger.debug("Not fetching more instances - EXIT!")
621         return 0
622
623     # Loop through some instances
624     database.cursor.execute(
625         "SELECT domain, origin, software, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'lemmy', 'peertube') AND (last_instance_fetch IS NULL OR last_instance_fetch < ?) ORDER BY rowid DESC", [time.time() - config.get("recheck_instance")]
626     )
627
628     rows = database.cursor.fetchall()
629     logger.info("Checking %d entries ...", len(rows))
630     for row in rows:
631         logger.debug("domain='%s'", row[0])
632         if blacklist.is_blacklisted(row[0]):
633             logger.warning("domain is blacklisted: row[0]='%s'", row[0])
634             continue
635
636         try:
637             logger.info("Fetching instances for instance domain='%s',software='%s',origin='%s',nodeinfo_url='%s'", row[0], row[2], row[1], row[3])
638             federation.fetch_instances(row[0], row[1], row[2], inspect.currentframe().f_code.co_name, row[3])
639
640             logger.debug(f"Invoking cookies.clear({row[0]}) ...")
641             cookies.clear(row[0])
642         except network.exceptions as exception:
643             logger.warning("Exception '%s' during fetching instances (fetch_instances) from row[0]='%s'", type(exception), row[0])
644             instances.set_last_error(row[0], exception)
645
646     logger.debug("Success - EXIT!")
647     return 0
648
649 def fetch_oliphant(args: argparse.Namespace):
650     logger.debug("args[]='%s' - CALLED!", type(args))
651     locking.acquire()
652
653     # Base URL
654     base_url = "https://codeberg.org/oliphant/blocklists/raw/branch/main/blocklists"
655
656     # URLs to fetch
657     blocklists = (
658         {
659             "blocker": "artisan.chat",
660             "csv_url": "mastodon/artisan.chat.csv",
661         },{
662             "blocker": "mastodon.art",
663             "csv_url": "mastodon/mastodon.art.csv",
664         },{
665             "blocker": "pleroma.envs.net",
666             "csv_url": "mastodon/pleroma.envs.net.csv",
667         },{
668             "blocker": "oliphant.social",
669             "csv_url": "mastodon/_unified_tier3_blocklist.csv",
670         },{
671             "blocker": "mastodon.online",
672             "csv_url": "mastodon/mastodon.online.csv",
673         },{
674             "blocker": "mastodon.social",
675             "csv_url": "mastodon/mastodon.social.csv",
676         },{
677             "blocker": "mastodon.social",
678             "csv_url": "other/missing-tier0-mastodon.social.csv",
679         },{
680             "blocker": "rage.love",
681             "csv_url": "mastodon/rage.love.csv",
682         },{
683             "blocker": "sunny.garden",
684             "csv_url": "mastodon/sunny.garden.csv",
685         },{
686             "blocker": "solarpunk.moe",
687             "csv_url": "mastodon/solarpunk.moe.csv",
688         },{
689             "blocker": "toot.wales",
690             "csv_url": "mastodon/toot.wales.csv",
691         },{
692             "blocker": "union.place",
693             "csv_url": "mastodon/union.place.csv",
694         }
695     )
696
697     domains = list()
698
699     logger.debug("Downloading %d files ...", len(blocklists))
700     for block in blocklists:
701         # Is domain given and not equal blocker?
702         if isinstance(args.domain, str) and args.domain != block["blocker"]:
703             logger.debug("Skipping blocker='%s', not matching args.domain='%s'", block['blocker'], args.domain)
704             continue
705         elif args.domain in domains:
706             logger.debug("args.domain='%s' already handled - SKIPPED!", args.domain)
707             continue
708
709         # Fetch this URL
710         logger.info("Fetching csv_url='%s' for blocker='%s' ...", block['csv_url'], block['blocker'])
711         response = utils.fetch_url(f"{base_url}/{block['csv_url']}", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
712
713         logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
714         if response.ok and response.content != "":
715             logger.debug("Fetched %d Bytes, parsing CSV ...", len(response.content))
716             reader = csv.DictReader(response.content.decode('utf-8').splitlines(), dialect="unix")
717
718             logger.debug("reader[]='%s'", type(reader))
719             for row in reader:
720                 domain = None
721                 if "#domain" in row:
722                     domain = row["#domain"]
723                 elif "domain" in row:
724                     domain = row["domain"]
725                 else:
726                     logger.debug(f"row='{row}' does not contain domain column")
727                     continue
728
729                 logger.debug("domain='%s'", domain)
730                 if not utils.is_domain_wanted(domain):
731                     logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
732                     continue
733
734                 logger.debug("Marking domain='%s' as handled", domain)
735                 domains.append(domain)
736
737                 logger.debug("Processing domain='%s' ...", domain)
738                 processed = utils.process_domain(domain, block["blocker"], inspect.currentframe().f_code.co_name)
739
740                 logger.debug("processed='%s'", processed)
741
742     logger.debug("EXIT!")
743
744 def fetch_txt(args: argparse.Namespace):
745     logger.debug("args[]='%s' - CALLED!", type(args))
746     locking.acquire()
747
748     # Static URLs
749     urls = (
750         "https://seirdy.one/pb/bsl.txt",
751     )
752
753     logger.info("Checking %d text file(s) ...", len(urls))
754     for url in urls:
755         logger.debug("Fetching url='%s' ...", url)
756         response = utils.fetch_url(url, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
757
758         logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
759         if response.ok and response.status_code < 300 and response.text != "":
760             logger.debug("Returned %d Bytes for processing", len(response.text.strip()))
761             domains = response.text.split("\n")
762
763             logger.info("Processing %d domains ...", len(domains))
764             for domain in domains:
765                 logger.debug("domain='%s'", domain)
766                 if domain == "":
767                     logger.debug("domain is empty - SKIPPED!")
768                     continue
769                 elif not utils.is_domain_wanted(domain):
770                     logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
771                     continue
772
773                 logger.debug("domain='%s'", domain)
774                 processed = utils.process_domain(domain, 'seirdy.one', inspect.currentframe().f_code.co_name)
775
776                 logger.debug("processed='%s'", processed)
777                 if not processed:
778                     logger.debug(f"domain='{domain}' was not generically processed - SKIPPED!")
779                     continue
780
781     logger.debug("EXIT!")
782
783 def fetch_fedipact(args: argparse.Namespace):
784     logger.debug("args[]='%s' - CALLED!", type(args))
785     locking.acquire()
786
787     response = utils.fetch_url("https://fedipact.online", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
788
789     logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
790     if response.ok and response.status_code < 300 and response.text != "":
791         logger.debug("Parsing %d Bytes ...", len(response.text))
792
793         doc = bs4.BeautifulSoup(response.text, "html.parser")
794         logger.debug("doc[]='%s'", type(doc))
795
796         rows = doc.findAll("li")
797         logger.info("Checking %d row(s) ...", len(rows))
798         for row in rows:
799             logger.debug("row[]='%s'", type(row))
800             domain = tidyup.domain(row.contents[0])
801
802             logger.debug("domain='%s'", domain)
803             if domain == "":
804                 logger.debug("domain is empty - SKIPPED!")
805                 continue
806             elif not utils.is_domain_wanted(domain):
807                 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
808                 continue
809             elif instances.is_registered(domain):
810                 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
811                 continue
812
813             logger.info("Fetching domain='%s' ...", domain)
814             federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
815
816     logger.debug("EXIT!")