]> git.mxchange.org Git - fba.git/blob - fba/commands.py
Continued:
[fba.git] / fba / commands.py
1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
3 #
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
8 #
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 # GNU Affero General Public License for more details.
13 #
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program.  If not, see <https://www.gnu.org/licenses/>.
16
17 import csv
18 import inspect
19 import json
20 import logging
21 import time
22
23 import argparse
24 import atoma
25 import bs4
26 import markdown
27 import reqto
28 import validators
29
30 from fba import database
31 from fba import utils
32
33 from fba.helpers import blacklist
34 from fba.helpers import config
35 from fba.helpers import cookies
36 from fba.helpers import locking
37 from fba.helpers import tidyup
38
39 from fba.http import federation
40 from fba.http import network
41
42 from fba.models import blocks
43 from fba.models import instances
44
45 from fba.networks import friendica
46 from fba.networks import lemmy
47 from fba.networks import mastodon
48 from fba.networks import misskey
49 from fba.networks import pleroma
50
51 logging.basicConfig(level=logging.INFO)
52 logger = logging.getLogger(__name__)
53
54 def check_instance(args: argparse.Namespace) -> int:
55     logger.debug("args.domain='%s' - CALLED!", args.domain)
56     status = 0
57     if not validators.domain(args.domain):
58         logger.warning("args.domain='%s' is not valid", args.domain)
59         status = 100
60     elif blacklist.is_blacklisted(args.domain):
61         logger.warning("args.domain='%s' is blacklisted", args.domain)
62         status = 101
63     elif instances.is_registered(args.domain):
64         logger.warning("args.domain='%s' is already registered", args.domain)
65         status = 102
66     else:
67         logger.info("args.domain='%s' is not known", args.domain)
68
69     logger.debug(f"status={status} - EXIT!")
70     return status
71
72 def fetch_bkali(args: argparse.Namespace) -> int:
73     logger.debug("args[]='%s' - CALLED!", type(args))
74     domains = list()
75     try:
76         fetched = network.post_json_api("gql.api.bka.li", "/v1/graphql", json.dumps({
77             "query": "query domainlist {nodeinfo(order_by: {domain: asc}) {domain}}"
78         }))
79
80         logger.debug("fetched[]='%s'", type(fetched))
81         if "error_message" in fetched:
82             logger.warning("post_json_api() for 'gql.api.bka.li' returned error message='%s", fetched['error_message'])
83             return 100
84         elif isinstance(fetched["json"], dict) and "error" in fetched["json"] and "message" in fetched["json"]["error"]:
85             logger.warning("post_json_api() returned error: '%s", fetched['error']['message'])
86             return 101
87
88         rows = fetched["json"]
89
90         logger.debug(f"rows({len(rows)})[]='{type(rows)}'")
91         if len(rows) == 0:
92             raise Exception("WARNING: Returned no records")
93         elif "data" not in rows:
94             raise Exception(f"WARNING: rows()={len(rows)} does not contain key 'data'")
95         elif "nodeinfo" not in rows["data"]:
96             raise Exception(f"WARNING: rows()={len(rows['data'])} does not contain key 'nodeinfo'")
97
98         for entry in rows["data"]["nodeinfo"]:
99             logger.debug(f"entry['{type(entry)}']='{entry}'")
100             if "domain" not in entry:
101                 logger.warning("entry()=%d does not contain 'domain' - SKIPPED!", len(entry))
102                 continue
103             elif not utils.is_domain_wanted(entry["domain"]):
104                 logger.debug("entry[domain]='%s' is not wanted - SKIPPED!")
105                 continue
106             elif instances.is_registered(entry["domain"]):
107                 logger.debug("domain='%s' is already registered - SKIPPED!", entry['domain'])
108                 continue
109
110             logger.debug(f"Adding domain='{entry['domain']}' ...")
111             domains.append(entry["domain"])
112
113     except network.exceptions as exception:
114         logger.error(f"Cannot fetch graphql,exception[{type(exception)}]:'{str(exception)}' - EXIT!")
115         return 102
116
117     logger.debug(f"domains()={len(domains)}")
118     if len(domains) > 0:
119         locking.acquire()
120
121         logger.info("Adding %d new instances ...", len(domains))
122         for domain in domains:
123             try:
124                 logger.info("Fetching instances from domain='%s' ...", domain)
125                 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
126
127                 logger.debug("Invoking cookies.clear(%s) ...", domain)
128                 cookies.clear(domain)
129             except network.exceptions as exception:
130                 logger.warning("Exception '%s' during fetching instances (fetch_bkali) from domain='%s'", type(exception), domain)
131                 instances.set_last_error(domain, exception)
132
133     logger.debug("Success - EXIT!")
134     return 0
135
136 def fetch_blocks(args: argparse.Namespace):
137     logger.debug("args[]='%s' - CALLED!", type(args))
138     if args.domain is not None and args.domain != "":
139         logger.debug(f"args.domain='{args.domain}' - checking ...")
140         if not validators.domain(args.domain):
141             logger.warning("args.domain='%s' is not valid.", args.domain)
142             return
143         elif blacklist.is_blacklisted(args.domain):
144             logger.warning("args.domain='%s' is blacklisted, won't check it!", args.domain)
145             return
146         elif not instances.is_registered(args.domain):
147             logger.warning("args.domain='%s' is not registered, please run ./utils.py fetch_instances '%s' first.", args.domain, args.domain)
148             return
149
150     locking.acquire()
151
152     if args.domain is not None and args.domain != "":
153         # Re-check single domain
154         logger.debug(f"Querying database for single args.domain='{args.domain}' ...")
155         database.cursor.execute(
156             "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE domain = ?", [args.domain]
157         )
158     elif args.software is not None and args.software != "":
159         # Re-check single software
160         logger.debug(f"Querying database for args.software='{args.software}' ...")
161         database.cursor.execute(
162             "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software = ?", [args.software]
163         )
164     else:
165         # Re-check after "timeout" (aka. minimum interval)
166         database.cursor.execute(
167             "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'lemmy', 'friendica', 'misskey', 'peertube') AND (last_blocked IS NULL OR last_blocked < ?) ORDER BY rowid DESC", [time.time() - config.get("recheck_block")]
168         )
169
170     rows = database.cursor.fetchall()
171     logger.info("Checking %d entries ...", len(rows))
172     for blocker, software, origin, nodeinfo_url in rows:
173         logger.debug("BEFORE blocker='%s',software='%s',origin='%s',nodeinfo_url='%s'", blocker, software, origin, nodeinfo_url)
174         blockdict = list()
175         blocker = tidyup.domain(blocker)
176         logger.debug("AFTER blocker='%s',software='%s'", blocker, software)
177
178         if blocker == "":
179             logger.warning("blocker is now empty!")
180             continue
181         elif nodeinfo_url is None or nodeinfo_url == "":
182             logger.debug(f"blocker='{blocker}',software='{software}' has empty nodeinfo_url")
183             continue
184         elif not utils.is_domain_wanted(blocker):
185             logger.warning("blocker='%s' is not wanted - SKIPPED!", blocker)
186             continue
187
188         logger.debug(f"blocker='{blocker}'")
189         instances.set_last_blocked(blocker)
190
191         if software == "pleroma":
192             logger.info("blocker='%s',software='%s'", blocker, software)
193             pleroma.fetch_blocks(blocker, origin, nodeinfo_url)
194         elif software == "mastodon":
195             logger.info("blocker='%s',software='%s'", blocker, software)
196             mastodon.fetch_blocks(blocker, origin, nodeinfo_url)
197         elif software == "lemmy":
198             logger.info("blocker='%s',software='%s'", blocker, software)
199             lemmy.fetch_blocks(blocker, origin, nodeinfo_url)
200         elif software == "friendica" or software == "misskey":
201             logger.info("blocker='%s',software='%s'", blocker, software)
202
203             blocking = list()
204             if software == "friendica":
205                 blocking = friendica.fetch_blocks(blocker)
206             elif software == "misskey":
207                 blocking = misskey.fetch_blocks(blocker)
208
209             logger.info("Checking %s entries from blocker='%s',software='%s' ...", len(blocking.items()), blocker, software)
210             for block_level, blocklist in blocking.items():
211                 logger.debug("blocker='%s',block_level='%s',blocklist()=%d", blocker, block_level, len(blocklist))
212                 block_level = tidyup.domain(block_level)
213                 logger.debug("AFTER-block_level='%s'", block_level)
214                 if block_level == "":
215                     logger.warning("block_level is empty, blocker='%s'", blocker)
216                     continue
217
218                 logger.debug(f"Checking {len(blocklist)} entries from blocker='{blocker}',software='{software}',block_level='{block_level}' ...")
219                 for block in blocklist:
220                     blocked, reason = block.values()
221                     logger.debug(f"blocked='{blocked}',reason='{reason}' - BEFORE!")
222                     blocked = tidyup.domain(blocked)
223                     reason  = tidyup.reason(reason) if reason is not None and reason != "" else None
224                     logger.debug("blocked='%s',reason='%s' - AFTER!", blocked, reason)
225
226                     if blocked == "":
227                         logger.warning("blocked is empty, blocker='%s'", blocker)
228                         continue
229                     elif blocked.count("*") > 0:
230                         # Some friendica servers also obscure domains without hash
231                         row = instances.deobscure("*", blocked)
232
233                         logger.debug("row[]='%s'", type(row))
234                         if row is None:
235                             logger.warning("Cannot deobsfucate blocked='%s',blocker='%s',software='%s' - SKIPPED!", blocked, blocker, software)
236                             continue
237
238                         blocked      = row[0]
239                         origin       = row[1]
240                         nodeinfo_url = row[2]
241                     elif blocked.count("?") > 0:
242                         # Some obscure them with question marks, not sure if that's dependent on version or not
243                         row = instances.deobscure("?", blocked)
244
245                         logger.debug("row[]='%s'", type(row))
246                         if row is None:
247                             logger.warning("Cannot deobsfucate blocked='%s',blocker='%s',software='%s' - SKIPPED!", blocked, blocker, software)
248                             continue
249
250                         blocked      = row[0]
251                         origin       = row[1]
252                         nodeinfo_url = row[2]
253
254                     logger.debug("Looking up instance by domainm, blocked='%s'", blocked)
255                     if not utils.is_domain_wanted(blocked):
256                         logger.debug("blocked='%s' is not wanted - SKIPPED!", blocked)
257                         continue
258                     elif not instances.is_registered(blocked):
259                         logger.debug("Hash wasn't found, adding: blocked='%s',blocker='%s'", blocked, blocker)
260                         try:
261                             instances.add(blocked, blocker, inspect.currentframe().f_code.co_name, nodeinfo_url)
262                         except network.exceptions as exception:
263                             print(f"Exception during adding blocked='{blocked}',blocker='{blocker}': '{type(exception)}'")
264                             continue
265
266                     if not blocks.is_instance_blocked(blocker, blocked, block_level):
267                         blocks.add_instance(blocker, blocked, reason, block_level)
268
269                         if block_level == "reject":
270                             blockdict.append({
271                                 "blocked": blocked,
272                                 "reason" : reason
273                             })
274                     else:
275                         logger.debug(f"Updating block last seen and reason for blocker='{blocker}',blocked='{blocked}' ...")
276                         blocks.update_last_seen(blocker, blocked, block_level)
277                         blocks.update_reason(reason, blocker, blocked, block_level)
278
279                     logger.debug(f"Invoking cookies.clear({blocked}) ...")
280                     cookies.clear(blocked)
281
282             logger.debug("Invoking commit() ...")
283             database.connection.commit()
284         else:
285             logger.warning("Unknown software: blocker='%s',software='%s'", blocker, software)
286
287         if instances.has_pending(blocker):
288             logger.debug("Invoking instances.update_data(%s) ...", blocker)
289             instances.update_data(blocker)
290
291         if config.get("bot_enabled") and len(blockdict) > 0:
292             network.send_bot_post(blocker, blockdict)
293
294         logger.debug(f"Invoking cookies.clear({blocker}) ...")
295         cookies.clear(blocker)
296
297     logger.debug("EXIT!")
298
299 def fetch_observer(args: argparse.Namespace):
300     logger.debug("args[]='%s' - CALLED!", type(args))
301     types = [
302         "akoma",
303         "birdsitelive",
304         "bookwyrm",
305         "calckey",
306         "diaspora",
307         "foundkey",
308         "friendica",
309         "funkwhale",
310         "gancio",
311         "gnusocial",
312         "gotosocial",
313         "hometown",
314         "hubzilla",
315         "kbin",
316         "ktistec",
317         "lemmy",
318         "mastodon",
319         "microblogpub",
320         "misskey",
321         "mitra",
322         "mobilizon",
323         "owncast",
324         "peertube",
325         "pixelfed",
326         "pleroma",
327         "plume",
328         "snac",
329         "takahe",
330         "wildebeest",
331         "writefreely"
332     ]
333
334     locking.acquire()
335
336     logger.info("Fetching %d different table data ...", len(types))
337     for software in types:
338         doc = None
339
340         try:
341             logger.debug(f"Fetching table data for software='{software}' ...")
342             raw = utils.fetch_url(f"https://fediverse.observer/app/views/tabledata.php?software={software}", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))).text
343             logger.debug("raw[%s]()=%d", type(raw), len(raw))
344
345             doc = bs4.BeautifulSoup(raw, features='html.parser')
346             logger.debug("doc[]='%s'", type(doc))
347         except network.exceptions as exception:
348             logger.warning("Cannot fetch software='%s' from fediverse.observer: '%s'", software, type(exception))
349             continue
350
351         items = doc.findAll("a", {"class": "url"})
352         logger.info("Checking %d items,software='%s' ...", len(items), software)
353         for item in items:
354             logger.debug("item[]='%s'", type(item))
355             domain = item.decode_contents()
356
357             logger.debug("domain='%s'", domain)
358             if not utils.is_domain_wanted(domain):
359                 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
360                 continue
361             elif instances.is_registered(domain):
362                 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
363                 continue
364
365             logger.info("Fetching instances for domain='%s',software='%s'", domain, software)
366             federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
367
368             logger.debug("Invoking cookies.clear(%s) ...", domain)
369             cookies.clear(domain)
370
371     logger.debug("EXIT!")
372
373 def fetch_todon_wiki(args: argparse.Namespace):
374     logger.debug("args[]='%s' - CALLED!", type(args))
375
376     locking.acquire()
377     blocklist = {
378         "silenced": list(),
379         "reject": list(),
380     }
381
382     raw = utils.fetch_url("https://wiki.todon.eu/todon/domainblocks", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))).text
383     logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
384
385     doc = bs4.BeautifulSoup(raw, "html.parser")
386     logger.debug("doc[]='%s'", type(doc))
387
388     silenced = doc.find("h3", {"id": "silencedlimited_servers"}).find_next("ul").findAll("li")
389     logger.info("Checking %d silenced/limited entries ...", len(silenced))
390     blocklist["silenced"] = utils.find_domains(silenced, "div")
391
392     suspended = doc.find("h3", {"id": "suspended_servers"}).find_next("ul").findAll("li")
393     logger.info("Checking %d suspended entries ...", len(suspended))
394     blocklist["reject"] = utils.find_domains(suspended, "div")
395
396     for block_level in blocklist:
397         blockers = blocklist[block_level]
398
399         logger.debug("block_level='%s',blockers()=%d'", block_level, len(blockers))
400         for blocked in blockers:
401             logger.debug("blocked='%s'", blocked)
402
403             if not instances.is_registered(blocked):
404                 try:
405                     logger.info("Fetching instances from domain='%s' ...", blocked)
406                     federation.fetch_instances(blocked, 'chaos.social', None, inspect.currentframe().f_code.co_name)
407
408                     logger.debug("Invoking cookies.clear(%s) ...", blocked)
409                     cookies.clear(blocked)
410                 except network.exceptions as exception:
411                     logger.warning("Exception '%s' during fetching instances (fetch_cs) from blocked='%s'", type(exception), blocked)
412                     instances.set_last_error(blocked, exception)
413
414             if blocks.is_instance_blocked("todon.eu", blocked, block_level):
415                 logger.debug("blocked='%s',block_level='%s' is already blocked - SKIPPED!", blocked, block_level)
416                 continue
417
418             logger.info("Adding new block: blocked='%s',block_level='%s'", blocked, block_level)
419             blocks.add_instance("todon.eu", blocked, None, block_level)
420
421         logger.debug("Invoking commit() ...")
422         database.connection.commit()
423
424     logger.debug("EXIT!")
425
426 def fetch_cs(args: argparse.Namespace):
427     logger.debug("args[]='%s' - CALLED!", type(args))
428     extensions = [
429         "extra",
430         "abbr",
431         "attr_list",
432         "def_list",
433         "fenced_code",
434         "footnotes",
435         "md_in_html",
436         "admonition",
437         "codehilite",
438         "legacy_attrs",
439         "legacy_em",
440         "meta",
441         "nl2br",
442         "sane_lists",
443         "smarty",
444         "toc",
445         "wikilinks"
446     ]
447
448     domains = {
449         "silenced": list(),
450         "reject"  : list(),
451     }
452
453     raw = utils.fetch_url("https://raw.githubusercontent.com/chaossocial/meta/master/federation.md", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))).text
454     logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
455
456     doc = bs4.BeautifulSoup(markdown.markdown(raw, extensions=extensions), features='html.parser')
457     logger.debug(f"doc()={len(doc)}[]='{type(doc)}'")
458
459     silenced = doc.find("h2", {"id": "silenced-instances"}).findNext("table").find("tbody")
460     logger.debug("silenced[%s]()=%d", type(silenced), len(silenced))
461     domains["silenced"] = federation.find_domains(silenced)
462
463     blocked = doc.find("h2", {"id": "blocked-instances"}).findNext("table").find("tbody")
464     logger.debug("blocked[%s]()=%d", type(blocked), len(blocked))
465     domains["reject"] = federation.find_domains(blocked)
466
467     logger.debug("domains[silenced]()=%d,domains[reject]()=%d", len(domains["silenced"]), len(domains["reject"]))
468     if len(domains) > 0:
469         locking.acquire()
470
471         for block_level in domains:
472             logger.info("block_level='%s' has %d row(s)", block_level, len(domains[block_level]))
473
474             for row in domains[block_level]:
475                 logger.debug(f"row='{row}'")
476                 if not instances.is_registered(row["domain"]):
477                     try:
478                         logger.info("Fetching instances from domain='%s' ...", row["domain"])
479                         federation.fetch_instances(row["domain"], 'chaos.social', None, inspect.currentframe().f_code.co_name)
480
481                         logger.debug("Invoking cookies.clear(%s) ...", row["domain"])
482                         cookies.clear(row["domain"])
483                     except network.exceptions as exception:
484                         logger.warning("Exception '%s' during fetching instances (fetch_cs) from row[domain]='%s'", type(exception), row["domain"])
485                         instances.set_last_error(row["domain"], exception)
486
487                 if not blocks.is_instance_blocked('chaos.social', row["domain"], block_level):
488                     logger.debug("domain='%s',block_level='%s' blocked by chaos.social, adding ...", row["domain"], block_level)
489                     blocks.add_instance('chaos.social', row["domain"], row["reason"], block_level)
490
491         logger.debug("Invoking commit() ...")
492         database.connection.commit()
493
494     logger.debug("EXIT!")
495
496 def fetch_fba_rss(args: argparse.Namespace):
497     logger.debug("args[]='%s' - CALLED!", type(args))
498     domains = list()
499
500     logger.info("Fetch FBA-specific RSS args.feed='%s' ...", args.feed)
501     response = utils.fetch_url(args.feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
502
503     logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
504     if response.ok and response.status_code < 300 and len(response.text) > 0:
505         logger.debug(f"Parsing RSS feed ({len(response.text)} Bytes) ...")
506         rss = atoma.parse_rss_bytes(response.content)
507
508         logger.debug(f"rss[]='{type(rss)}'")
509         for item in rss.items:
510             logger.debug(f"item={item}")
511             domain = item.link.split("=")[1]
512
513             if blacklist.is_blacklisted(domain):
514                 logger.debug("domain='%s' is blacklisted - SKIPPED!", domain)
515                 continue
516             elif domain in domains:
517                 logger.debug("domain='%s' is already added - SKIPPED!", domain)
518                 continue
519             elif instances.is_registered(domain):
520                 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
521                 continue
522
523             logger.debug(f"Adding domain='{domain}'")
524             domains.append(domain)
525
526     logger.debug(f"domains()={len(domains)}")
527     if len(domains) > 0:
528         locking.acquire()
529
530         logger.info("Adding %d new instances ...", len(domains))
531         for domain in domains:
532             try:
533                 logger.info("Fetching instances from domain='%s' ...", domain)
534                 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
535
536                 logger.debug("Invoking cookies.clear(%s) ...", domain)
537                 cookies.clear(domain)
538             except network.exceptions as exception:
539                 logger.warning("Exception '%s' during fetching instances (fetch_fba_rss) from domain='%s'", type(exception), domain)
540                 instances.set_last_error(domain, exception)
541
542     logger.debug("EXIT!")
543
544 def fetch_fbabot_atom(args: argparse.Namespace):
545     logger.debug("args[]='%s' - CALLED!", type(args))
546     feed = "https://ryona.agency/users/fba/feed.atom"
547
548     domains = list()
549
550     logger.info("Fetching ATOM feed='%s' from FBA bot account ...", feed)
551     response = utils.fetch_url(feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
552
553     logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
554     if response.ok and response.status_code < 300 and len(response.text) > 0:
555         logger.debug(f"Parsing ATOM feed ({len(response.text)} Bytes) ...")
556         atom = atoma.parse_atom_bytes(response.content)
557
558         logger.debug(f"atom[]='{type(atom)}'")
559         for entry in atom.entries:
560             logger.debug(f"entry[]='{type(entry)}'")
561             doc = bs4.BeautifulSoup(entry.content.value, "html.parser")
562             logger.debug("doc[]='%s'", type(doc))
563             for element in doc.findAll("a"):
564                 for href in element["href"].split(","):
565                     logger.debug("href[%s]='%s", type(href), href)
566                     domain = tidyup.domain(href)
567
568                     logger.debug("domain='%s'", domain)
569                     if blacklist.is_blacklisted(domain):
570                         logger.debug("domain='%s' is blacklisted - SKIPPED!", domain)
571                         continue
572                     elif domain in domains:
573                         logger.debug("domain='%s' is already added - SKIPPED!", domain)
574                         continue
575                     elif instances.is_registered(domain):
576                         logger.debug("domain='%s' is already registered - SKIPPED!", domain)
577                         continue
578
579                     logger.debug(f"Adding domain='{domain}',domains()={len(domains)}")
580                     domains.append(domain)
581
582     logger.debug(f"domains({len(domains)})={domains}")
583     if len(domains) > 0:
584         locking.acquire()
585
586         logger.info("Adding %d new instances ...", len(domains))
587         for domain in domains:
588             try:
589                 logger.info("Fetching instances from domain='%s' ...", domain)
590                 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
591
592                 logger.debug("Invoking cookies.clear(%s) ...", domain)
593                 cookies.clear(domain)
594             except network.exceptions as exception:
595                 logger.warning("Exception '%s' during fetching instances (fetch_fbabot_atom) from domain='%s'", type(exception), domain)
596                 instances.set_last_error(domain, exception)
597
598     logger.debug("EXIT!")
599
600 def fetch_instances(args: argparse.Namespace) -> int:
601     logger.debug("args[]='%s' - CALLED!", type(args))
602     locking.acquire()
603
604     # Initial fetch
605     try:
606         logger.info("Fetching instances from args.domain='%s' ...", args.domain)
607         federation.fetch_instances(args.domain, None, None, inspect.currentframe().f_code.co_name)
608
609         logger.debug(f"Invoking cookies.clear({args.domain}) ...")
610         cookies.clear(args.domain)
611     except network.exceptions as exception:
612         logger.warning("Exception '%s' during fetching instances (fetch_instances) from args.domain='%s'", type(exception), args.domain)
613         instances.set_last_error(args.domain, exception)
614         return 100
615
616     if args.single:
617         logger.debug("Not fetching more instances - EXIT!")
618         return 0
619
620     # Loop through some instances
621     database.cursor.execute(
622         "SELECT domain, origin, software, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'lemmy', 'peertube') AND (last_instance_fetch IS NULL OR last_instance_fetch < ?) ORDER BY rowid DESC", [time.time() - config.get("recheck_instance")]
623     )
624
625     rows = database.cursor.fetchall()
626     logger.info("Checking %d entries ...", len(rows))
627     for row in rows:
628         logger.debug("domain='%s'", row[0])
629         if blacklist.is_blacklisted(row[0]):
630             logger.warning("domain is blacklisted: row[0]='%s'", row[0])
631             continue
632
633         try:
634             logger.info("Fetching instances for instance domain='%s',software='%s',origin='%s',nodeinfo_url='%s'", row[0], row[2], row[1], row[3])
635             federation.fetch_instances(row[0], row[1], row[2], inspect.currentframe().f_code.co_name, row[3])
636
637             logger.debug(f"Invoking cookies.clear({row[0]}) ...")
638             cookies.clear(row[0])
639         except network.exceptions as exception:
640             logger.warning("Exception '%s' during fetching instances (fetch_instances) from row[0]='%s'", type(exception), row[0])
641             instances.set_last_error(row[0], exception)
642
643     logger.debug("Success - EXIT!")
644     return 0
645
646 def fetch_oliphant(args: argparse.Namespace):
647     logger.debug("args[]='%s' - CALLED!", type(args))
648     locking.acquire()
649
650     # Base URL
651     base_url = "https://codeberg.org/oliphant/blocklists/raw/branch/main/blocklists"
652
653     # URLs to fetch
654     blocklists = (
655         {
656             "blocker": "artisan.chat",
657             "csv_url": "mastodon/artisan.chat.csv",
658         },{
659             "blocker": "mastodon.art",
660             "csv_url": "mastodon/mastodon.art.csv",
661         },{
662             "blocker": "pleroma.envs.net",
663             "csv_url": "mastodon/pleroma.envs.net.csv",
664         },{
665             "blocker": "oliphant.social",
666             "csv_url": "mastodon/_unified_tier3_blocklist.csv",
667         },{
668             "blocker": "mastodon.online",
669             "csv_url": "mastodon/mastodon.online.csv",
670         },{
671             "blocker": "mastodon.social",
672             "csv_url": "mastodon/mastodon.social.csv",
673         },{
674             "blocker": "mastodon.social",
675             "csv_url": "other/missing-tier0-mastodon.social.csv",
676         },{
677             "blocker": "rage.love",
678             "csv_url": "mastodon/rage.love.csv",
679         },{
680             "blocker": "sunny.garden",
681             "csv_url": "mastodon/sunny.garden.csv",
682         },{
683             "blocker": "solarpunk.moe",
684             "csv_url": "mastodon/solarpunk.moe.csv",
685         },{
686             "blocker": "toot.wales",
687             "csv_url": "mastodon/toot.wales.csv",
688         },{
689             "blocker": "union.place",
690             "csv_url": "mastodon/union.place.csv",
691         }
692     )
693
694     domains = list()
695
696     logger.debug("Downloading %d files ...", len(blocklists))
697     for block in blocklists:
698         # Is domain given and not equal blocker?
699         if isinstance(args.domain, str) and args.domain != block["blocker"]:
700             logger.debug("Skipping blocker='%s', not matching args.domain='%s'", block['blocker'], args.domain)
701             continue
702         elif args.domain in domains:
703             logger.debug("args.domain='%s' already handled - SKIPPED!", args.domain)
704             continue
705
706         # Fetch this URL
707         logger.info("Fetching csv_url='%s' for blocker='%s' ...", block['csv_url'], block['blocker'])
708         response = utils.fetch_url(f"{base_url}/{block['csv_url']}", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
709
710         logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
711         if response.ok and response.content != "":
712             logger.debug("Fetched %d Bytes, parsing CSV ...", len(response.content))
713             reader = csv.DictReader(response.content.decode('utf-8').splitlines(), dialect="unix")
714
715             logger.debug("reader[]='%s'", type(reader))
716             for row in reader:
717                 domain = None
718                 if "#domain" in row:
719                     domain = row["#domain"]
720                 elif "domain" in row:
721                     domain = row["domain"]
722                 else:
723                     logger.debug(f"row='{row}' does not contain domain column")
724                     continue
725
726                 logger.debug("domain='%s'", domain)
727                 if not utils.is_domain_wanted(domain):
728                     logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
729                     continue
730
731                 logger.debug("Marking domain='%s' as handled", domain)
732                 domains.append(domain)
733
734                 logger.debug("Processing domain='%s' ...", domain)
735                 processed = utils.process_domain(domain, block["blocker"], inspect.currentframe().f_code.co_name)
736
737                 logger.debug("processed='%s'", processed)
738
739     logger.debug("EXIT!")
740
741 def fetch_txt(args: argparse.Namespace):
742     logger.debug("args[]='%s' - CALLED!", type(args))
743     locking.acquire()
744
745     # Static URLs
746     urls = (
747         "https://seirdy.one/pb/bsl.txt",
748     )
749
750     logger.info("Checking %d text file(s) ...", len(urls))
751     for url in urls:
752         logger.debug("Fetching url='%s' ...", url)
753         response = utils.fetch_url(url, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
754
755         logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
756         if response.ok and response.status_code < 300 and response.text != "":
757             logger.debug("Returned %d Bytes for processing", len(response.text.strip()))
758             domains = response.text.split("\n")
759
760             logger.info("Processing %d domains ...", len(domains))
761             for domain in domains:
762                 logger.debug("domain='%s'", domain)
763                 if domain == "":
764                     logger.debug("domain is empty - SKIPPED!")
765                     continue
766                 elif not utils.is_domain_wanted(domain):
767                     logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
768                     continue
769
770                 logger.debug("domain='%s'", domain)
771                 processed = utils.process_domain(domain, 'seirdy.one', inspect.currentframe().f_code.co_name)
772
773                 logger.debug("processed='%s'", processed)
774                 if not processed:
775                     logger.debug(f"domain='{domain}' was not generically processed - SKIPPED!")
776                     continue
777
778     logger.debug("EXIT!")
779
780 def fetch_fedipact(args: argparse.Namespace):
781     logger.debug("args[]='%s' - CALLED!", type(args))
782     locking.acquire()
783
784     response = utils.fetch_url("https://fedipact.online", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
785
786     logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
787     if response.ok and response.status_code < 300 and response.text != "":
788         logger.debug("Parsing %d Bytes ...", len(response.text))
789
790         doc = bs4.BeautifulSoup(response.text, "html.parser")
791         logger.debug("doc[]='%s'", type(doc))
792
793         rows = doc.findAll("li")
794         logger.info("Checking %d row(s) ...", len(rows))
795         for row in rows:
796             logger.debug("row[]='%s'", type(row))
797             domain = tidyup.domain(row.contents[0])
798
799             logger.debug("domain='%s'", domain)
800             if domain == "":
801                 logger.debug("domain is empty - SKIPPED!")
802                 continue
803             elif not utils.is_domain_wanted(domain):
804                 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
805                 continue
806             elif instances.is_registered(domain):
807                 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
808                 continue
809
810             logger.info("Fetching domain='%s' ...", domain)
811             federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
812
813     logger.debug("EXIT!")