]> git.mxchange.org Git - fba.git/blob - fba/commands.py
Continued:
[fba.git] / fba / commands.py
1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
3 #
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
8 #
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 # GNU Affero General Public License for more details.
13 #
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program.  If not, see <https://www.gnu.org/licenses/>.
16
17 import csv
18 import inspect
19 import json
20 import logging
21 import time
22
23 import argparse
24 import atoma
25 import bs4
26 import markdown
27 import reqto
28 import validators
29
30 from fba import database
31 from fba import utils
32
33 from fba.helpers import blacklist
34 from fba.helpers import config
35 from fba.helpers import cookies
36 from fba.helpers import locking
37 from fba.helpers import tidyup
38
39 from fba.http import federation
40 from fba.http import network
41
42 from fba.models import blocks
43 from fba.models import instances
44
45 from fba.networks import friendica
46 from fba.networks import lemmy
47 from fba.networks import mastodon
48 from fba.networks import misskey
49 from fba.networks import pleroma
50
51 logging.basicConfig(level=logging.INFO)
52 logger = logging.getLogger(__name__)
53
54 def check_instance(args: argparse.Namespace) -> int:
55     logger.debug("args.domain='%s' - CALLED!", args.domain)
56     status = 0
57     if not validators.domain(args.domain):
58         logger.warning("args.domain='%s' is not valid", args.domain)
59         status = 100
60     elif blacklist.is_blacklisted(args.domain):
61         logger.warning("args.domain='%s' is blacklisted", args.domain)
62         status = 101
63     elif instances.is_registered(args.domain):
64         logger.warning("args.domain='%s' is already registered", args.domain)
65         status = 102
66     else:
67         logger.info("args.domain='%s' is not known", args.domain)
68
69     logger.debug(f"status={status} - EXIT!")
70     return status
71
72 def fetch_bkali(args: argparse.Namespace) -> int:
73     logger.debug("args[]='%s' - CALLED!", type(args))
74     domains = list()
75     try:
76         fetched = network.post_json_api("gql.api.bka.li", "/v1/graphql", json.dumps({
77             "query": "query domainlist {nodeinfo(order_by: {domain: asc}) {domain}}"
78         }))
79
80         logger.debug("fetched[]='%s'", type(fetched))
81         if "error_message" in fetched:
82             logger.warning("post_json_api() for 'gql.api.bka.li' returned error message='%s", fetched['error_message'])
83             return 100
84         elif isinstance(fetched["json"], dict) and "error" in fetched["json"] and "message" in fetched["json"]["error"]:
85             logger.warning("post_json_api() returned error: '%s", fetched['error']['message'])
86             return 101
87
88         rows = fetched["json"]
89
90         logger.debug(f"rows({len(rows)})[]='{type(rows)}'")
91         if len(rows) == 0:
92             raise Exception("WARNING: Returned no records")
93         elif "data" not in rows:
94             raise Exception(f"WARNING: rows()={len(rows)} does not contain key 'data'")
95         elif "nodeinfo" not in rows["data"]:
96             raise Exception(f"WARNING: rows()={len(rows['data'])} does not contain key 'nodeinfo'")
97
98         for entry in rows["data"]["nodeinfo"]:
99             logger.debug(f"entry['{type(entry)}']='{entry}'")
100             if "domain" not in entry:
101                 logger.warning("entry()=%d does not contain 'domain' - SKIPPED!", len(entry))
102                 continue
103             elif not utils.is_domain_wanted(entry["domain"]):
104                 logger.debug("entry[domain]='%s' is not wanted - SKIPPED!")
105                 continue
106             elif instances.is_registered(entry["domain"]):
107                 logger.debug("domain='%s' is already registered - SKIPPED!", entry['domain'])
108                 continue
109
110             logger.debug(f"Adding domain='{entry['domain']}' ...")
111             domains.append(entry["domain"])
112
113     except network.exceptions as exception:
114         logger.error(f"Cannot fetch graphql,exception[{type(exception)}]:'{str(exception)}' - EXIT!")
115         return 102
116
117     logger.debug(f"domains()={len(domains)}")
118     if len(domains) > 0:
119         locking.acquire()
120
121         logger.info("Adding %d new instances ...", len(domains))
122         for domain in domains:
123             try:
124                 logger.info("Fetching instances from domain='%s' ...", domain)
125                 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
126
127                 logger.debug("Invoking cookies.clear(%s) ...", domain)
128                 cookies.clear(domain)
129             except network.exceptions as exception:
130                 logger.warning("Exception '%s' during fetching instances (fetch_bkali) from domain='%s'", type(exception), domain)
131                 instances.set_last_error(domain, exception)
132
133     logger.debug("Success - EXIT!")
134     return 0
135
136 def fetch_blocks(args: argparse.Namespace):
137     logger.debug("args[]='%s' - CALLED!", type(args))
138     if args.domain is not None and args.domain != "":
139         logger.debug(f"args.domain='{args.domain}' - checking ...")
140         if not validators.domain(args.domain):
141             logger.warning("args.domain='%s' is not valid.", args.domain)
142             return
143         elif blacklist.is_blacklisted(args.domain):
144             logger.warning("args.domain='%s' is blacklisted, won't check it!", args.domain)
145             return
146         elif not instances.is_registered(args.domain):
147             logger.warning("args.domain='%s' is not registered, please run ./utils.py fetch_instances '%s' first.", args.domain, args.domain)
148             return
149
150     locking.acquire()
151
152     if args.domain is not None and args.domain != "":
153         # Re-check single domain
154         logger.debug(f"Querying database for single args.domain='{args.domain}' ...")
155         database.cursor.execute(
156             "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE domain = ?", [args.domain]
157         )
158     elif args.software is not None and args.software != "":
159         # Re-check single software
160         logger.debug(f"Querying database for args.software='{args.software}' ...")
161         database.cursor.execute(
162             "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software = ?", [args.software]
163         )
164     else:
165         # Re-check after "timeout" (aka. minimum interval)
166         database.cursor.execute(
167             "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'lemmy', 'friendica', 'misskey', 'peertube') AND (last_blocked IS NULL OR last_blocked < ?) ORDER BY rowid DESC", [time.time() - config.get("recheck_block")]
168         )
169
170     rows = database.cursor.fetchall()
171     logger.info("Checking %d entries ...", len(rows))
172     for blocker, software, origin, nodeinfo_url in rows:
173         logger.debug("BEFORE blocker='%s',software='%s',origin='%s',nodeinfo_url='%s'", blocker, software, origin, nodeinfo_url)
174         blockdict = list()
175         blocker = tidyup.domain(blocker)
176         logger.debug("AFTER blocker='%s',software='%s'", blocker, software)
177
178         if blocker == "":
179             logger.warning("blocker is now empty!")
180             continue
181         elif nodeinfo_url is None or nodeinfo_url == "":
182             logger.debug(f"blocker='{blocker}',software='{software}' has empty nodeinfo_url")
183             continue
184         elif not utils.is_domain_wanted(blocker):
185             logger.warning("blocker='%s' is not wanted - SKIPPED!", blocker)
186             continue
187
188         logger.debug(f"blocker='{blocker}'")
189         instances.set_last_blocked(blocker)
190
191         if software == "pleroma":
192             logger.info("blocker='%s',software='%s'", blocker, software)
193             pleroma.fetch_blocks(blocker, origin, nodeinfo_url)
194         elif software == "mastodon":
195             logger.info("blocker='%s',software='%s'", blocker, software)
196             mastodon.fetch_blocks(blocker, origin, nodeinfo_url)
197         elif software == "lemmy":
198             logger.info("blocker='%s',software='%s'", blocker, software)
199             lemmy.fetch_blocks(blocker, origin, nodeinfo_url)
200         elif software == "friendica" or software == "misskey":
201             logger.info("blocker='%s',software='%s'", blocker, software)
202
203             blocking = list()
204             if software == "friendica":
205                 blocking = friendica.fetch_blocks(blocker)
206             elif software == "misskey":
207                 blocking = misskey.fetch_blocks(blocker)
208
209             logger.info("Checking %s entries from blocker='%s',software='%s' ...", len(blocking.items()), blocker, software)
210             for block_level, blocklist in blocking.items():
211                 logger.debug("blocker='%s',block_level='%s',blocklist()=%d", blocker, block_level, len(blocklist))
212                 block_level = tidyup.domain(block_level)
213                 logger.debug("AFTER-block_level='%s'", block_level)
214                 if block_level == "":
215                     logger.warning("block_level is empty, blocker='%s'", blocker)
216                     continue
217
218                 logger.debug(f"Checking {len(blocklist)} entries from blocker='{blocker}',software='{software}',block_level='{block_level}' ...")
219                 for block in blocklist:
220                     blocked, reason = block.values()
221                     logger.debug(f"blocked='{blocked}',reason='{reason}' - BEFORE!")
222                     blocked = tidyup.domain(blocked)
223                     reason  = tidyup.reason(reason) if reason is not None and reason != "" else None
224                     logger.debug("blocked='%s',reason='%s' - AFTER!", blocked, reason)
225
226                     if blocked == "":
227                         logger.warning("blocked is empty, blocker='%s'", blocker)
228                         continue
229                     elif blacklist.is_blacklisted(blocked):
230                         logger.debug("blocked='%s' is blacklisted - SKIPPED!", blocked)
231                         continue
232                     elif blocked.count("*") > 0:
233                         # Some friendica servers also obscure domains without hash
234                         row = instances.deobscure("*", blocked)
235
236                         logger.debug("row[]='%s'", type(row))
237                         if row is None:
238                             logger.warning("Cannot deobsfucate blocked='%s',blocker='%s',software='%s' - SKIPPED!", blocked, blocker, software)
239                             continue
240
241                         blocked      = row[0]
242                         origin       = row[1]
243                         nodeinfo_url = row[2]
244                     elif blocked.count("?") > 0:
245                         # Some obscure them with question marks, not sure if that's dependent on version or not
246                         row = instances.deobscure("?", blocked)
247
248                         logger.debug("row[]='%s'", type(row))
249                         if row is None:
250                             logger.warning("Cannot deobsfucate blocked='%s',blocker='%s',software='%s' - SKIPPED!", blocked, blocker, software)
251                             continue
252
253                         blocked      = row[0]
254                         origin       = row[1]
255                         nodeinfo_url = row[2]
256
257                     logger.debug("Looking up instance by domainm, blocked='%s'", blocked)
258                     if not utils.is_domain_wanted(blocked):
259                         logger.debug("blocked='%s' is not wanted - SKIPPED!", blocked)
260                         continue
261                     elif not instances.is_registered(blocked):
262                         logger.debug("Hash wasn't found, adding: blocked='%s',blocker='%s'", blocked, blocker)
263                         try:
264                             instances.add(blocked, blocker, inspect.currentframe().f_code.co_name, nodeinfo_url)
265                         except network.exceptions as exception:
266                             print(f"Exception during adding blocked='{blocked}',blocker='{blocker}': '{type(exception)}'")
267                             continue
268
269                     if not blocks.is_instance_blocked(blocker, blocked, block_level):
270                         blocks.add_instance(blocker, blocked, reason, block_level)
271
272                         if block_level == "reject":
273                             blockdict.append({
274                                 "blocked": blocked,
275                                 "reason" : reason
276                             })
277                     else:
278                         logger.debug(f"Updating block last seen and reason for blocker='{blocker}',blocked='{blocked}' ...")
279                         blocks.update_last_seen(blocker, blocked, block_level)
280                         blocks.update_reason(reason, blocker, blocked, block_level)
281
282                     logger.debug(f"Invoking cookies.clear({blocked}) ...")
283                     cookies.clear(blocked)
284
285             logger.debug("Invoking commit() ...")
286             database.connection.commit()
287         else:
288             logger.warning("Unknown software: blocker='%s',software='%s'", blocker, software)
289
290         if instances.has_pending(blocker):
291             logger.debug("Invoking instances.update_data(%s) ...", blocker)
292             instances.update_data(blocker)
293
294         if config.get("bot_enabled") and len(blockdict) > 0:
295             network.send_bot_post(blocker, blockdict)
296
297         logger.debug(f"Invoking cookies.clear({blocker}) ...")
298         cookies.clear(blocker)
299
300     logger.debug("EXIT!")
301
302 def fetch_observer(args: argparse.Namespace):
303     logger.debug("args[]='%s' - CALLED!", type(args))
304     types = [
305         "akoma",
306         "birdsitelive",
307         "bookwyrm",
308         "calckey",
309         "diaspora",
310         "foundkey",
311         "friendica",
312         "funkwhale",
313         "gancio",
314         "gnusocial",
315         "gotosocial",
316         "hometown",
317         "hubzilla",
318         "kbin",
319         "ktistec",
320         "lemmy",
321         "mastodon",
322         "microblogpub",
323         "misskey",
324         "mitra",
325         "mobilizon",
326         "owncast",
327         "peertube",
328         "pixelfed",
329         "pleroma",
330         "plume",
331         "snac",
332         "takahe",
333         "wildebeest",
334         "writefreely"
335     ]
336
337     locking.acquire()
338
339     logger.info("Fetching %d different table data ...", len(types))
340     for software in types:
341         doc = None
342
343         try:
344             logger.debug(f"Fetching table data for software='{software}' ...")
345             raw = utils.fetch_url(f"https://fediverse.observer/app/views/tabledata.php?software={software}", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))).text
346             logger.debug("raw[%s]()=%d", type(raw), len(raw))
347
348             doc = bs4.BeautifulSoup(raw, features='html.parser')
349             logger.debug("doc[]='%s'", type(doc))
350         except network.exceptions as exception:
351             logger.warning("Cannot fetch software='%s' from fediverse.observer: '%s'", software, type(exception))
352             continue
353
354         items = doc.findAll("a", {"class": "url"})
355         logger.info("Checking %d items,software='%s' ...", len(items), software)
356         for item in items:
357             logger.debug("item[]='%s'", type(item))
358             domain = item.decode_contents()
359
360             logger.debug("domain='%s'", domain)
361             if not utils.is_domain_wanted(domain):
362                 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
363                 continue
364             elif instances.is_registered(domain):
365                 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
366                 continue
367
368             logger.info("Fetching instances for domain='%s',software='%s'", domain, software)
369             federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
370
371             logger.debug("Invoking cookies.clear(%s) ...", domain)
372             cookies.clear(domain)
373
374     logger.debug("EXIT!")
375
376 def fetch_todon_wiki(args: argparse.Namespace):
377     logger.debug("args[]='%s' - CALLED!", type(args))
378
379     locking.acquire()
380     blocklist = {
381         "silenced": list(),
382         "reject": list(),
383     }
384
385     raw = utils.fetch_url("https://wiki.todon.eu/todon/domainblocks", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))).text
386     logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
387
388     doc = bs4.BeautifulSoup(raw, "html.parser")
389     logger.debug("doc[]='%s'", type(doc))
390
391     silenced = doc.find("h3", {"id": "silencedlimited_servers"}).find_next("ul").findAll("li")
392     logger.info("Checking %d silenced/limited entries ...", len(silenced))
393     blocklist["silenced"] = utils.find_domains(silenced, "div")
394
395     suspended = doc.find("h3", {"id": "suspended_servers"}).find_next("ul").findAll("li")
396     logger.info("Checking %d suspended entries ...", len(suspended))
397     blocklist["reject"] = utils.find_domains(suspended, "div")
398
399     for block_level in blocklist:
400         blockers = blocklist[block_level]
401
402         logger.debug("block_level='%s',blockers()=%d'", block_level, len(blockers))
403         for blocked in blockers:
404             logger.debug("blocked='%s'", blocked)
405
406             if not instances.is_registered(blocked):
407                 try:
408                     logger.info("Fetching instances from domain='%s' ...", blocked)
409                     federation.fetch_instances(blocked, 'chaos.social', None, inspect.currentframe().f_code.co_name)
410
411                     logger.debug("Invoking cookies.clear(%s) ...", blocked)
412                     cookies.clear(blocked)
413                 except network.exceptions as exception:
414                     logger.warning("Exception '%s' during fetching instances (fetch_cs) from blocked='%s'", type(exception), blocked)
415                     instances.set_last_error(blocked, exception)
416
417             if blocks.is_instance_blocked("todon.eu", blocked, block_level):
418                 logger.debug("blocked='%s',block_level='%s' is already blocked - SKIPPED!", blocked, block_level)
419                 continue
420
421             logger.info("Adding new block: blocked='%s',block_level='%s'", blocked, block_level)
422             blocks.add_instance("todon.eu", blocked, None, block_level)
423
424         logger.debug("Invoking commit() ...")
425         database.connection.commit()
426
427     logger.debug("EXIT!")
428
429 def fetch_cs(args: argparse.Namespace):
430     logger.debug("args[]='%s' - CALLED!", type(args))
431     extensions = [
432         "extra",
433         "abbr",
434         "attr_list",
435         "def_list",
436         "fenced_code",
437         "footnotes",
438         "md_in_html",
439         "admonition",
440         "codehilite",
441         "legacy_attrs",
442         "legacy_em",
443         "meta",
444         "nl2br",
445         "sane_lists",
446         "smarty",
447         "toc",
448         "wikilinks"
449     ]
450
451     domains = {
452         "silenced": list(),
453         "reject"  : list(),
454     }
455
456     raw = utils.fetch_url("https://raw.githubusercontent.com/chaossocial/meta/master/federation.md", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))).text
457     logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
458
459     doc = bs4.BeautifulSoup(markdown.markdown(raw, extensions=extensions), features='html.parser')
460     logger.debug(f"doc()={len(doc)}[]='{type(doc)}'")
461
462     silenced = doc.find("h2", {"id": "silenced-instances"}).findNext("table").find("tbody")
463     logger.debug("silenced[%s]()=%d", type(silenced), len(silenced))
464     domains["silenced"] = federation.find_domains(silenced)
465
466     blocked = doc.find("h2", {"id": "blocked-instances"}).findNext("table").find("tbody")
467     logger.debug("blocked[%s]()=%d", type(blocked), len(blocked))
468     domains["reject"] = federation.find_domains(blocked)
469
470     logger.debug("domains[silenced]()=%d,domains[reject]()=%d", len(domains["silenced"]), len(domains["reject"]))
471     if len(domains) > 0:
472         locking.acquire()
473
474         for block_level in domains:
475             logger.info("block_level='%s' has %d row(s)", block_level, len(domains[block_level]))
476
477             for row in domains[block_level]:
478                 logger.debug(f"row='{row}'")
479                 if not instances.is_registered(row["domain"]):
480                     try:
481                         logger.info("Fetching instances from domain='%s' ...", row["domain"])
482                         federation.fetch_instances(row["domain"], 'chaos.social', None, inspect.currentframe().f_code.co_name)
483
484                         logger.debug("Invoking cookies.clear(%s) ...", row["domain"])
485                         cookies.clear(row["domain"])
486                     except network.exceptions as exception:
487                         logger.warning("Exception '%s' during fetching instances (fetch_cs) from row[domain]='%s'", type(exception), row["domain"])
488                         instances.set_last_error(row["domain"], exception)
489
490                 if not blocks.is_instance_blocked('chaos.social', row["domain"], block_level):
491                     logger.debug("domain='%s',block_level='%s' blocked by chaos.social, adding ...", row["domain"], block_level)
492                     blocks.add_instance('chaos.social', row["domain"], row["reason"], block_level)
493
494         logger.debug("Invoking commit() ...")
495         database.connection.commit()
496
497     logger.debug("EXIT!")
498
499 def fetch_fba_rss(args: argparse.Namespace):
500     logger.debug("args[]='%s' - CALLED!", type(args))
501     domains = list()
502
503     logger.info("Fetch FBA-specific RSS args.feed='%s' ...", args.feed)
504     response = utils.fetch_url(args.feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
505
506     logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
507     if response.ok and response.status_code < 300 and len(response.text) > 0:
508         logger.debug(f"Parsing RSS feed ({len(response.text)} Bytes) ...")
509         rss = atoma.parse_rss_bytes(response.content)
510
511         logger.debug(f"rss[]='{type(rss)}'")
512         for item in rss.items:
513             logger.debug(f"item={item}")
514             domain = item.link.split("=")[1]
515
516             if blacklist.is_blacklisted(domain):
517                 logger.debug("domain='%s' is blacklisted - SKIPPED!", domain)
518                 continue
519             elif domain in domains:
520                 logger.debug("domain='%s' is already added - SKIPPED!", domain)
521                 continue
522             elif instances.is_registered(domain):
523                 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
524                 continue
525
526             logger.debug(f"Adding domain='{domain}'")
527             domains.append(domain)
528
529     logger.debug(f"domains()={len(domains)}")
530     if len(domains) > 0:
531         locking.acquire()
532
533         logger.info("Adding %d new instances ...", len(domains))
534         for domain in domains:
535             try:
536                 logger.info("Fetching instances from domain='%s' ...", domain)
537                 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
538
539                 logger.debug("Invoking cookies.clear(%s) ...", domain)
540                 cookies.clear(domain)
541             except network.exceptions as exception:
542                 logger.warning("Exception '%s' during fetching instances (fetch_fba_rss) from domain='%s'", type(exception), domain)
543                 instances.set_last_error(domain, exception)
544
545     logger.debug("EXIT!")
546
547 def fetch_fbabot_atom(args: argparse.Namespace):
548     logger.debug("args[]='%s' - CALLED!", type(args))
549     feed = "https://ryona.agency/users/fba/feed.atom"
550
551     domains = list()
552
553     logger.info(f"Fetching ATOM feed='{feed}' from FBA bot account ...")
554     response = utils.fetch_url(feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
555
556     logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
557     if response.ok and response.status_code < 300 and len(response.text) > 0:
558         logger.debug(f"Parsing ATOM feed ({len(response.text)} Bytes) ...")
559         atom = atoma.parse_atom_bytes(response.content)
560
561         logger.debug(f"atom[]='{type(atom)}'")
562         for entry in atom.entries:
563             logger.debug(f"entry[]='{type(entry)}'")
564             doc = bs4.BeautifulSoup(entry.content.value, "html.parser")
565             logger.debug("doc[]='%s'", type(doc))
566             for element in doc.findAll("a"):
567                 for href in element["href"].split(","):
568                     logger.debug("href[%s]='%s", type(href), href)
569                     domain = tidyup.domain(href)
570
571                     logger.debug("domain='%s'", domain)
572                     if blacklist.is_blacklisted(domain):
573                         logger.debug("domain='%s' is blacklisted - SKIPPED!", domain)
574                         continue
575                     elif domain in domains:
576                         logger.debug("domain='%s' is already added - SKIPPED!", domain)
577                         continue
578                     elif instances.is_registered(domain):
579                         logger.debug("domain='%s' is already registered - SKIPPED!", domain)
580                         continue
581
582                     logger.debug(f"Adding domain='{domain}',domains()={len(domains)}")
583                     domains.append(domain)
584
585     logger.debug(f"domains({len(domains)})={domains}")
586     if len(domains) > 0:
587         locking.acquire()
588
589         logger.info(f"Adding {len(domains)} new instances ...")
590         for domain in domains:
591             try:
592                 logger.info(f"Fetching instances from domain='{domain}' ...")
593                 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
594
595                 logger.debug("Invoking cookies.clear(%s) ...", domain)
596                 cookies.clear(domain)
597             except network.exceptions as exception:
598                 logger.warning("Exception '%s' during fetching instances (fetch_fbabot_atom) from domain='%s'", type(exception), domain)
599                 instances.set_last_error(domain, exception)
600
601     logger.debug("EXIT!")
602
603 def fetch_instances(args: argparse.Namespace) -> int:
604     logger.debug("args[]='%s' - CALLED!", type(args))
605     locking.acquire()
606
607     # Initial fetch
608     try:
609         logger.info(f"Fetching instances from args.domain='{args.domain}' ...")
610         federation.fetch_instances(args.domain, None, None, inspect.currentframe().f_code.co_name)
611
612         logger.debug(f"Invoking cookies.clear({args.domain}) ...")
613         cookies.clear(args.domain)
614     except network.exceptions as exception:
615         logger.warning("Exception '%s' during fetching instances (fetch_instances) from args.domain='%s'", type(exception), args.domain)
616         instances.set_last_error(args.domain, exception)
617
618         return 100
619
620     if args.single:
621         logger.debug("Not fetching more instances - EXIT!")
622         return 0
623
624     # Loop through some instances
625     database.cursor.execute(
626         "SELECT domain, origin, software, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'lemmy', 'peertube') AND (last_instance_fetch IS NULL OR last_instance_fetch < ?) ORDER BY rowid DESC", [time.time() - config.get("recheck_instance")]
627     )
628
629     rows = database.cursor.fetchall()
630     logger.info("Checking %d entries ...", len(rows))
631     for row in rows:
632         logger.debug(f"domain='{row[0]}'")
633         if blacklist.is_blacklisted(row[0]):
634             logger.warning("domain is blacklisted: row[0]='%s'", row[0])
635             continue
636
637         try:
638             logger.info(f"Fetching instances for instance '{row[0]}' ('{row[2]}') of origin='{row[1]}',nodeinfo_url='{row[3]}'")
639             federation.fetch_instances(row[0], row[1], row[2], inspect.currentframe().f_code.co_name, row[3])
640
641             logger.debug(f"Invoking cookies.clear({row[0]}) ...")
642             cookies.clear(row[0])
643         except network.exceptions as exception:
644             logger.warning("Exception '%s' during fetching instances (fetch_instances) from row[0]='%s'", type(exception), row[0])
645             instances.set_last_error(row[0], exception)
646
647     logger.debug("Success - EXIT!")
648     return 0
649
650 def fetch_oliphant(args: argparse.Namespace):
651     logger.debug("args[]='%s' - CALLED!", type(args))
652     locking.acquire()
653
654     # Base URL
655     base_url = "https://codeberg.org/oliphant/blocklists/raw/branch/main/blocklists"
656
657     # URLs to fetch
658     blocklists = (
659         {
660             "blocker": "artisan.chat",
661             "csv_url": "mastodon/artisan.chat.csv",
662         },{
663             "blocker": "mastodon.art",
664             "csv_url": "mastodon/mastodon.art.csv",
665         },{
666             "blocker": "pleroma.envs.net",
667             "csv_url": "mastodon/pleroma.envs.net.csv",
668         },{
669             "blocker": "oliphant.social",
670             "csv_url": "mastodon/_unified_tier3_blocklist.csv",
671         },{
672             "blocker": "mastodon.online",
673             "csv_url": "mastodon/mastodon.online.csv",
674         },{
675             "blocker": "mastodon.social",
676             "csv_url": "mastodon/mastodon.social.csv",
677         },{
678             "blocker": "mastodon.social",
679             "csv_url": "other/missing-tier0-mastodon.social.csv",
680         },{
681             "blocker": "rage.love",
682             "csv_url": "mastodon/rage.love.csv",
683         },{
684             "blocker": "sunny.garden",
685             "csv_url": "mastodon/sunny.garden.csv",
686         },{
687             "blocker": "solarpunk.moe",
688             "csv_url": "mastodon/solarpunk.moe.csv",
689         },{
690             "blocker": "toot.wales",
691             "csv_url": "mastodon/toot.wales.csv",
692         },{
693             "blocker": "union.place",
694             "csv_url": "mastodon/union.place.csv",
695         }
696     )
697
698     domains = list()
699     for block in blocklists:
700         # Is domain given and not equal blocker?
701         if isinstance(args.domain, str) and args.domain != block["blocker"]:
702             logger.debug(f"Skipping blocker='{block['blocker']}', not matching args.domain='{args.domain}'")
703             continue
704         elif args.domain in domains:
705             logger.debug(f"args.domain='{args.domain}' already handled - SKIPPED!")
706             continue
707
708         # Fetch this URL
709         logger.info(f"Fetching csv_url='{block['csv_url']}' for blocker='{block['blocker']}' ...")
710         response = utils.fetch_url(f"{base_url}/{block['csv_url']}", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
711
712         logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
713         if response.ok and response.content != "":
714             logger.debug(f"Fetched {len(response.content)} Bytes, parsing CSV ...")
715             reader = csv.DictReader(response.content.decode('utf-8').splitlines(), dialect="unix")
716
717             logger.debug(f"reader[]='{type(reader)}'")
718             for row in reader:
719                 domain = None
720                 if "#domain" in row:
721                     domain = row["#domain"]
722                 elif "domain" in row:
723                     domain = row["domain"]
724                 else:
725                     logger.debug(f"row='{row}' does not contain domain column")
726                     continue
727
728                 if not utils.is_domain_wanted(domain):
729                     logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
730                     continue
731
732                 logger.debug(f"Marking domain='{domain}' as handled")
733                 domains.append(domain)
734
735                 logger.debug(f"Processing domain='{domain}' ...")
736                 processed = utils.process_domain(domain, block["blocker"], inspect.currentframe().f_code.co_name)
737
738                 logger.debug(f"processed='{processed}'")
739
740     logger.debug("EXIT!")
741
742 def fetch_txt(args: argparse.Namespace):
743     logger.debug("args[]='%s' - CALLED!", type(args))
744     locking.acquire()
745
746     # Static URLs
747     urls = (
748         "https://seirdy.one/pb/bsl.txt",
749     )
750
751     logger.info(f"Checking {len(urls)} text file(s) ...")
752     for url in urls:
753         logger.debug("Fetching url='%s' ...", url)
754         response = utils.fetch_url(url, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
755
756         logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
757         if response.ok and response.status_code < 300 and response.text != "":
758             logger.debug(f"Returned {len(response.text.strip())} Bytes for processing")
759             domains = response.text.split("\n")
760
761             logger.info(f"Processing {len(domains)} domains ...")
762             for domain in domains:
763                 logger.debug("domain='%s'", domain)
764                 if domain == "":
765                     logger.debug("domain is empty - SKIPPED!")
766                     continue
767                 elif not utils.is_domain_wanted(domain):
768                     logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
769                     continue
770
771                 logger.debug("domain='%s'", domain)
772                 processed = utils.process_domain(domain, 'seirdy.one', inspect.currentframe().f_code.co_name)
773
774                 logger.debug(f"processed='{processed}'")
775                 if not processed:
776                     logger.debug(f"domain='{domain}' was not generically processed - SKIPPED!")
777                     continue
778
779     logger.debug("EXIT!")
780
781 def fetch_fedipact(args: argparse.Namespace):
782     logger.debug("args[]='%s' - CALLED!", type(args))
783     locking.acquire()
784
785     response = utils.fetch_url("https://fedipact.online", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
786
787     logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
788     if response.ok and response.status_code < 300 and response.text != "":
789         logger.debug("Parsing %d Bytes ...", len(response.text))
790
791         doc = bs4.BeautifulSoup(response.text, "html.parser")
792         logger.debug("doc[]='%s'", type(doc))
793
794         rows = doc.findAll("li")
795         logger.info("Checking %d row(s) ...", len(rows))
796         for row in rows:
797             logger.debug("row[]='%s'", type(row))
798             domain = tidyup.domain(row.contents[0])
799
800             logger.debug("domain='%s'", domain)
801             if domain == "":
802                 logger.debug("domain is empty - SKIPPED!")
803                 continue
804             elif not utils.is_domain_wanted(domain):
805                 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
806                 continue
807             elif instances.is_registered(domain):
808                 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
809                 continue
810
811             logger.info("Fetching domain='%s' ...", domain)
812             federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
813
814     logger.debug("EXIT!")