]> git.mxchange.org Git - fba.git/blob - fba/commands.py
b96ab65af33bddc74f0b4567687645c10f4d4e62
[fba.git] / fba / commands.py
1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
3 #
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
8 #
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 # GNU Affero General Public License for more details.
13 #
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program.  If not, see <https://www.gnu.org/licenses/>.
16
17 import csv
18 import inspect
19 import json
20 import logging
21 import time
22
23 from urllib.parse import urlparse
24
25 import argparse
26 import atoma
27 import bs4
28 import markdown
29 import reqto
30 import validators
31
32 from fba import csrf
33 from fba import database
34 from fba import utils
35
36 from fba.helpers import blacklist
37 from fba.helpers import config
38 from fba.helpers import cookies
39 from fba.helpers import locking
40 from fba.helpers import processing
41 from fba.helpers import software as software_helper
42 from fba.helpers import tidyup
43
44 from fba.http import federation
45 from fba.http import network
46
47 from fba.models import blocks
48 from fba.models import instances
49 from fba.models import sources
50
51 from fba.networks import friendica
52 from fba.networks import lemmy
53 from fba.networks import mastodon
54 from fba.networks import misskey
55 from fba.networks import pleroma
56
57 logging.basicConfig(level=logging.INFO)
58 logger = logging.getLogger(__name__)
59 #logger.setLevel(logging.DEBUG)
60
61 def check_instance(args: argparse.Namespace) -> int:
62     logger.debug("args.domain='%s' - CALLED!", args.domain)
63     status = 0
64     if not validators.domain(args.domain):
65         logger.warning("args.domain='%s' is not valid", args.domain)
66         status = 100
67     elif blacklist.is_blacklisted(args.domain):
68         logger.warning("args.domain='%s' is blacklisted", args.domain)
69         status = 101
70     elif instances.is_registered(args.domain):
71         logger.warning("args.domain='%s' is already registered", args.domain)
72         status = 102
73     else:
74         logger.info("args.domain='%s' is not known", args.domain)
75
76     logger.debug("status=%d - EXIT!", status)
77     return status
78
79 def check_nodeinfo(args: argparse.Namespace) -> int:
80     logger.debug("args[]='%s' - CALLED!", type(args))
81
82     # Fetch rows
83     database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE nodeinfo_url IS NOT NULL ORDER BY domain ASC")
84
85     cnt = 0
86     for row in database.cursor.fetchall():
87         logger.debug("Checking row[domain]='%s',row[software]='%s',row[nodeinfo_url]='%s' ...", row["domain"], row["software"], row["nodeinfo_url"])
88         punycode = row["domain"].encode("idna").decode("utf-8")
89
90         if row["nodeinfo_url"].startswith("/"):
91             logger.debug("row[nodeinfo_url]='%s' is a relative URL and always matches", row["nodeinfo_url"])
92             continue
93         elif row["nodeinfo_url"].find(punycode) == -1 and row["nodeinfo_url"].find(row["domain"]) == -1:
94             logger.warning("punycode='%s' is not found in row[nodeinfo_url]='%s',row[software]='%s'", punycode, row["nodeinfo_url"], row["software"])
95             cnt = cnt + 1
96
97     logger.info("Found %d row(s)", cnt)
98
99     logger.debug("EXIT!")
100     return 0
101
102 def fetch_pixelfed_api(args: argparse.Namespace) -> int:
103     logger.debug("args[]='%s' - CALLED!", type(args))
104
105     # No CSRF by default, you don't have to add network.source_headers by yourself here
106     headers = tuple()
107     source_domain = "pixelfed.org"
108
109     if sources.is_recent(source_domain):
110         logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
111         return 0
112     else:
113         logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
114         sources.update(source_domain)
115
116     try:
117         logger.debug("Checking CSRF from source_domain='%s' ...", source_domain)
118         headers = csrf.determine(source_domain, dict())
119     except network.exceptions as exception:
120         logger.warning("Exception '%s' during checking CSRF (fetch_peers,%s) - EXIT!", type(exception), __name__)
121         return list()
122
123     try:
124         logger.debug("Fetching JSON from pixelfed.org API, headers()=%d ...", len(headers))
125         fetched = network.get_json_api(
126             source_domain,
127             "/api/v1/servers/all.json?scope=All&country=all&language=all",
128             headers,
129             (config.get("connection_timeout"), config.get("read_timeout"))
130         )
131
132         logger.debug("JSON API returned %d elements", len(fetched))
133         if "error_message" in fetched:
134             logger.warning("API returned error_message='%s' - EXIT!", fetched["error_message"])
135             return 101
136         elif "data" not in fetched["json"]:
137             logger.warning("API did not return JSON with 'data' element - EXIT!")
138             return 102
139
140         rows = fetched["json"]["data"]
141         logger.info("Checking %d fetched rows ...", len(rows))
142         for row in rows:
143             logger.debug("row[]='%s'", type(row))
144             if "domain" not in row:
145                 logger.warning("row='%s' does not contain element 'domain' - SKIPPED!", row)
146                 continue
147             elif row["domain"] == "":
148                 logger.debug("row[domain] is empty - SKIPPED!")
149                 continue
150
151             logger.debug("row[domain]='%s' - BEFORE!", row["domain"])
152             domain = row["domain"].encode("idna").decode("utf-8")
153             logger.debug("domain='%s' - AFTER!", domain)
154
155             if not utils.is_domain_wanted(domain):
156                 logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
157                 continue
158             elif instances.is_registered(domain):
159                 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
160                 continue
161             elif instances.is_recent(domain):
162                 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
163                 continue
164
165             logger.debug("Fetching instances from domain='%s' ...", domain)
166             federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
167
168     except network.exceptions as exception:
169         logger.warning("Cannot fetch graphql,exception[%s]:'%s' - EXIT!", type(exception), str(exception))
170         return 103
171
172     logger.debug("Success! - EXIT!")
173     return 0
174
175 def fetch_bkali(args: argparse.Namespace) -> int:
176     logger.debug("args[]='%s' - CALLED!", type(args))
177
178     logger.debug("Invoking locking.acquire() ...")
179     locking.acquire()
180
181     source_domain = "gql.api.bka.li"
182     if sources.is_recent(source_domain):
183         logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
184         return 0
185     else:
186         logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
187         sources.update(source_domain)
188
189     domains = list()
190     try:
191         logger.info("Fetching domainlist from source_domain='%s' ...", source_domain)
192         fetched = network.post_json_api(
193             source_domain,
194             "/v1/graphql",
195             json.dumps({
196                 "query": "query domainlist {nodeinfo(order_by: {domain: asc}) {domain}}"
197             })
198         )
199
200         logger.debug("fetched[]='%s'", type(fetched))
201         if "error_message" in fetched:
202             logger.warning("post_json_api() for 'gql.sources.bka.li' returned error message='%s", fetched["error_message"])
203             return 100
204         elif isinstance(fetched["json"], dict) and "error" in fetched["json"] and "message" in fetched["json"]["error"]:
205             logger.warning("post_json_api() returned error: '%s", fetched["error"]["message"])
206             return 101
207
208         rows = fetched["json"]
209
210         logger.debug("rows(%d)[]='%s'", len(rows), type(rows))
211         if len(rows) == 0:
212             raise Exception("WARNING: Returned no records")
213         elif "data" not in rows:
214             raise Exception(f"WARNING: rows()={len(rows)} does not contain key 'data'")
215         elif "nodeinfo" not in rows["data"]:
216             raise Exception(f"WARNING: rows()={len(rows['data'])} does not contain key 'nodeinfo'")
217
218         for entry in rows["data"]["nodeinfo"]:
219             logger.debug("entry[%s]='%s'", type(entry), entry)
220             if "domain" not in entry:
221                 logger.warning("entry()=%d does not contain 'domain' - SKIPPED!", len(entry))
222                 continue
223             elif entry["domain"] == "":
224                 logger.debug("entry[domain] is empty - SKIPPED!")
225                 continue
226             elif not utils.is_domain_wanted(entry["domain"]):
227                 logger.warning("entry[domain]='%s' is not wanted - SKIPPED!", entry["domain"])
228                 continue
229             elif instances.is_registered(entry["domain"]):
230                 logger.debug("entry[domain]='%s' is already registered - SKIPPED!", entry["domain"])
231                 continue
232             elif instances.is_recent(entry["domain"]):
233                 logger.debug("entry[domain]='%s' has been recently crawled - SKIPPED!", entry["domain"])
234                 continue
235
236             logger.debug("Adding domain='%s' ...", entry["domain"])
237             domains.append(entry["domain"])
238
239     except network.exceptions as exception:
240         logger.warning("Cannot fetch graphql,exception[%s]:'%s' - EXIT!", type(exception), str(exception))
241         return 102
242
243     logger.debug("domains()=%d", len(domains))
244     if len(domains) > 0:
245         logger.info("Adding %d new instances ...", len(domains))
246         for domain in domains:
247             logger.debug("domain='%s' - BEFORE!", domain)
248             domain = domain.encode("idna").decode("utf-8")
249             logger.debug("domain='%s' - AFTER!", domain)
250
251             try:
252                 logger.info("Fetching instances from domain='%s' ...", domain)
253                 federation.fetch_instances(domain, 'tak.teleyal.blog', None, inspect.currentframe().f_code.co_name)
254             except network.exceptions as exception:
255                 logger.warning("Exception '%s' during fetching instances (fetch_bkali) from domain='%s'", type(exception), domain)
256                 instances.set_last_error(domain, exception)
257                 return 100
258
259     logger.debug("Success - EXIT!")
260     return 0
261
262 def fetch_blocks(args: argparse.Namespace) -> int:
263     logger.debug("args[]='%s' - CALLED!", type(args))
264     if args.domain is not None and args.domain != "":
265         logger.debug("args.domain='%s' - checking ...", args.domain)
266         if not validators.domain(args.domain):
267             logger.warning("args.domain='%s' is not valid.", args.domain)
268             return 100
269         elif blacklist.is_blacklisted(args.domain):
270             logger.warning("args.domain='%s' is blacklisted, won't check it!", args.domain)
271             return 101
272         elif not instances.is_registered(args.domain):
273             logger.warning("args.domain='%s' is not registered, please run ./utils.py fetch_instances '%s' first.", args.domain, args.domain)
274             return 102
275
276     logger.debug("Invoking locking.acquire() ...")
277     locking.acquire()
278
279     if args.domain is not None and args.domain != "":
280         # Re-check single domain
281         logger.debug("Querying database for single args.domain='%s' ...", args.domain)
282         database.cursor.execute(
283             "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE domain = ?", [args.domain]
284         )
285     elif args.software is not None and args.software != "":
286         # Re-check single software
287         logger.debug("Querying database for args.software='%s' ...", args.software)
288         database.cursor.execute(
289             "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software = ? AND nodeinfo_url IS NOT NULL", [args.software]
290         )
291     else:
292         # Re-check after "timeout" (aka. minimum interval)
293         database.cursor.execute(
294             "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'lemmy', 'friendica', 'misskey') AND (last_blocked IS NULL OR last_blocked < ?) AND nodeinfo_url IS NOT NULL ORDER BY rowid DESC", [time.time() - config.get("recheck_block")]
295         )
296
297     rows = database.cursor.fetchall()
298     logger.info("Checking %d entries ...", len(rows))
299     for blocker, software, origin, nodeinfo_url in rows:
300         logger.debug("blocker='%s',software='%s',origin='%s',nodeinfo_url='%s'", blocker, software, origin, nodeinfo_url)
301         blocker = tidyup.domain(blocker)
302         logger.debug("blocker='%s' - AFTER!", blocker)
303
304         if blocker == "":
305             logger.warning("blocker is now empty!")
306             continue
307         elif nodeinfo_url is None or nodeinfo_url == "":
308             logger.debug("blocker='%s',software='%s' has empty nodeinfo_url", blocker, software)
309             continue
310         elif not utils.is_domain_wanted(blocker):
311             logger.warning("blocker='%s' is not wanted - SKIPPED!", blocker)
312             continue
313
314         logger.debug("blocker='%s'", blocker)
315         instances.set_last_blocked(blocker)
316         instances.set_has_obfuscation(blocker, False)
317
318         blocking = list()
319         if software == "pleroma":
320             logger.info("blocker='%s',software='%s'", blocker, software)
321             blocking = pleroma.fetch_blocks(blocker, nodeinfo_url)
322         elif software == "mastodon":
323             logger.info("blocker='%s',software='%s'", blocker, software)
324             blocking = mastodon.fetch_blocks(blocker, nodeinfo_url)
325         elif software == "lemmy":
326             logger.info("blocker='%s',software='%s'", blocker, software)
327             blocking = lemmy.fetch_blocks(blocker, nodeinfo_url)
328         elif software == "friendica":
329             logger.info("blocker='%s',software='%s'", blocker, software)
330             blocking = friendica.fetch_blocks(blocker)
331         elif software == "misskey":
332             logger.info("blocker='%s',software='%s'", blocker, software)
333             blocking = misskey.fetch_blocks(blocker)
334         else:
335             logger.warning("Unknown software: blocker='%s',software='%s'", blocker, software)
336
337         logger.debug("blocker='%s'", blocker)
338         if blocker != "chaos.social":
339             logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
340             instances.set_total_blocks(blocker, blocking)
341
342         logger.info("Checking %d entries from blocker='%s',software='%s' ...", len(blocking), blocker, software)
343         blockdict = list()
344         for block in blocking:
345             logger.debug("blocked='%s',block_level='%s',reason='%s'", block["blocked"], block["block_level"], block["reason"])
346
347             if block["block_level"] == "":
348                 logger.warning("block_level is empty, blocker='%s',blocked='%s'", block["blocker"], block["blocked"])
349                 continue
350
351             logger.debug("blocked='%s',reason='%s' - BEFORE!", block["blocked"], block["reason"])
352             block["blocked"] = tidyup.domain(block["blocked"])
353             block["reason"]  = tidyup.reason(block["reason"]) if block["reason"] is not None and block["reason"] != "" else None
354             logger.debug("blocked='%s',reason='%s' - AFTER!", block["blocked"], block["reason"])
355
356             if block["blocked"] == "":
357                 logger.warning("blocked is empty, blocker='%s'", blocker)
358                 continue
359             elif block["blocked"].endswith(".onion"):
360                 logger.debug("blocked='%s' is a TOR .onion domain - SKIPPED", block["blocked"])
361                 continue
362             elif block["blocked"].endswith(".arpa"):
363                 logger.debug("blocked='%s' is a reverse IP address - SKIPPED", block["blocked"])
364                 continue
365             elif block["blocked"].endswith(".tld"):
366                 logger.debug("blocked='%s' is a fake domain - SKIPPED", block["blocked"])
367                 continue
368             elif block["blocked"].find("*") >= 0:
369                 logger.debug("blocker='%s' uses obfuscated domains", blocker)
370
371                 # Some friendica servers also obscure domains without hash
372                 row = instances.deobfuscate("*", block["blocked"], block["hash"] if "hash" in block else None)
373
374                 logger.debug("row[]='%s'", type(row))
375                 if row is None:
376                     logger.warning("Cannot deobfuscate blocked='%s',blocker='%s',software='%s' - SKIPPED!", block["blocked"], blocker, software)
377                     instances.set_has_obfuscation(blocker, True)
378                     continue
379
380                 block["blocked"] = row["domain"]
381                 origin           = row["origin"]
382                 nodeinfo_url     = row["nodeinfo_url"]
383             elif block["blocked"].find("?") >= 0:
384                 logger.debug("blocker='%s' uses obfuscated domains", blocker)
385
386                 # Some obscure them with question marks, not sure if that's dependent on version or not
387                 row = instances.deobfuscate("?", block["blocked"], block["hash"] if "hash" in block else None)
388
389                 logger.debug("row[]='%s'", type(row))
390                 if row is None:
391                     logger.warning("Cannot deobfuscate blocked='%s',blocker='%s',software='%s' - SKIPPED!", block["blocked"], blocker, software)
392                     instances.set_has_obfuscation(blocker, True)
393                     continue
394
395                 block["blocked"] = row["domain"]
396                 origin           = row["origin"]
397                 nodeinfo_url     = row["nodeinfo_url"]
398
399             logger.debug("Looking up instance by domainm, blocked='%s'", block["blocked"])
400             if block["blocked"] == "":
401                 logger.debug("block[blocked] is empty - SKIPPED!")
402                 continue
403
404             logger.debug("block[blocked]='%s' - BEFORE!", block["blocked"])
405             block["blocked"] = block["blocked"].lstrip(".").encode("idna").decode("utf-8")
406             logger.debug("block[blocked]='%s' - AFTER!", block["blocked"])
407
408             if not utils.is_domain_wanted(block["blocked"]):
409                 logger.warning("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
410                 continue
411             elif block["block_level"] in ["accept", "accepted"]:
412                 logger.debug("blocked='%s' is accepted, not wanted here - SKIPPED!", block["blocked"])
413                 continue
414             elif not instances.is_registered(block["blocked"]):
415                 logger.debug("Hash wasn't found, adding: blocked='%s',blocker='%s'", block["blocked"], blocker)
416                 federation.fetch_instances(block["blocked"], blocker, None, inspect.currentframe().f_code.co_name)
417
418             block["block_level"] = blocks.alias_block_level(block["block_level"])
419
420             if processing.block(blocker, block["blocked"], block["reason"], block["block_level"]) and block["block_level"] == "reject" and config.get("bot_enabled"):
421                 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], blocker)
422                 blockdict.append({
423                     "blocked": block["blocked"],
424                     "reason" : block["reason"],
425                 })
426
427             logger.debug("Invoking cookies.clear(%s) ...", block["blocked"])
428             cookies.clear(block["blocked"])
429
430         logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
431         if instances.has_pending(blocker):
432             logger.debug("Flushing updates for blocker='%s' ...", blocker)
433             instances.update_data(blocker)
434
435         logger.debug("Invoking commit() ...")
436         database.connection.commit()
437
438         logger.debug("Invoking cookies.clear(%s) ...", blocker)
439         cookies.clear(blocker)
440
441         logger.debug("config.get(bot_enabled)='%s',blockdict()=%d'", config.get("bot_enabled"), len(blockdict))
442         if config.get("bot_enabled") and len(blockdict) > 0:
443             logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
444             network.send_bot_post(blocker, blockdict)
445
446     logger.debug("Success! - EXIT!")
447     return 0
448
449 def fetch_observer(args: argparse.Namespace) -> int:
450     logger.debug("args[]='%s' - CALLED!", type(args))
451
452     logger.debug("Invoking locking.acquire() ...")
453     locking.acquire()
454
455     source_domain = "fediverse.observer"
456     if sources.is_recent(source_domain):
457         logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
458         return 0
459     else:
460         logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
461         sources.update(source_domain)
462
463     types = list()
464     if args.software is None:
465         logger.info("Fetching software list ...")
466         raw = utils.fetch_url(
467             f"https://{source_domain}",
468             network.web_headers,
469             (config.get("connection_timeout"), config.get("read_timeout"))
470         ).text
471         logger.debug("raw[%s]()=%d", type(raw), len(raw))
472
473         doc = bs4.BeautifulSoup(raw, features="html.parser")
474         logger.debug("doc[]='%s'", type(doc))
475
476         items = doc.find("div", {"aria-labelledby": "navbarDropdownMenuSoftwares"}).findAll("a", {"class": "dropdown-item"})
477         logger.debug("items[]='%s'", type(items))
478
479         logger.info("Checking %d menu items ...", len(items))
480         for item in items:
481             logger.debug("item[%s]='%s'", type(item), item)
482             if item.text.lower() == "all":
483                 logger.debug("Skipping 'All' menu entry ...")
484                 continue
485
486             logger.debug("Appending item.text='%s' ...", item.text)
487             types.append(tidyup.domain(item.text))
488     else:
489         logger.info("Adding args.software='%s' as type ...", args.software)
490         types.append(args.software)
491
492     logger.info("Fetching %d different table data ...", len(types))
493     for software in types:
494         logger.debug("software='%s' - BEFORE!", software)
495         if args.software is not None and args.software != software:
496             logger.debug("args.software='%s' does not match software='%s' - SKIPPED!", args.software, software)
497             continue
498
499         doc = None
500         try:
501             logger.debug("Fetching table data for software='%s' ...", software)
502             raw = utils.fetch_url(
503                 f"https://{source_domain}/app/views/tabledata.php?software={software}",
504                 network.web_headers,
505                 (config.get("connection_timeout"), config.get("read_timeout"))
506             ).text
507             logger.debug("raw[%s]()=%d", type(raw), len(raw))
508
509             doc = bs4.BeautifulSoup(raw, features="html.parser")
510             logger.debug("doc[]='%s'", type(doc))
511         except network.exceptions as exception:
512             logger.warning("Cannot fetch software='%s' from source_domain='%s': '%s'", software, source_domain, type(exception))
513             continue
514
515         items = doc.findAll("a", {"class": "url"})
516         logger.info("Checking %d items,software='%s' ...", len(items), software)
517         for item in items:
518             logger.debug("item[]='%s'", type(item))
519             domain = item.decode_contents()
520             logger.debug("domain='%s' - AFTER!", domain)
521
522             if domain == "":
523                 logger.debug("domain is empty - SKIPPED!")
524                 continue
525
526             logger.debug("domain='%s' - BEFORE!", domain)
527             domain = domain.encode("idna").decode("utf-8")
528             logger.debug("domain='%s' - AFTER!", domain)
529
530             if not utils.is_domain_wanted(domain):
531                 logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
532                 continue
533             elif instances.is_registered(domain):
534                 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
535                 continue
536             elif instances.is_recent(domain):
537                 logger.debug("domain='%s' is recently being handled - SKIPPED!", domain)
538                 continue
539
540             software = software_helper.alias(software)
541             logger.info("Fetching instances for domain='%s'", domain)
542             federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
543
544     logger.debug("Success! - EXIT!")
545     return 0
546
547 def fetch_todon_wiki(args: argparse.Namespace) -> int:
548     logger.debug("args[]='%s' - CALLED!", type(args))
549
550     logger.debug("Invoking locking.acquire() ...")
551     locking.acquire()
552
553     source_domain = "wiki.todon.eu"
554     if sources.is_recent(source_domain):
555         logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
556         return 0
557     else:
558         logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
559         sources.update(source_domain)
560
561     blocklist = {
562         "silenced": list(),
563         "reject": list(),
564     }
565
566     raw = utils.fetch_url(f"https://{source_domain}/todon/domainblocks", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))).text
567     logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
568
569     doc = bs4.BeautifulSoup(raw, "html.parser")
570     logger.debug("doc[]='%s'", type(doc))
571
572     silenced = doc.find("h3", {"id": "silencedlimited_servers"}).find_next("ul").findAll("li")
573     logger.info("Checking %d silenced/limited entries ...", len(silenced))
574     blocklist["silenced"] = utils.find_domains(silenced, "div")
575
576     suspended = doc.find("h3", {"id": "suspended_servers"}).find_next("ul").findAll("li")
577     logger.info("Checking %d suspended entries ...", len(suspended))
578     blocklist["reject"] = utils.find_domains(suspended, "div")
579
580     blocking = blocklist["silenced"] + blocklist["reject"]
581     blocker = "todon.eu"
582
583     logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
584     instances.set_total_blocks(blocker, blocking)
585
586     blockdict = list()
587     for block_level in blocklist:
588         blockers = blocklist[block_level]
589
590         logger.debug("block_level='%s',blockers()=%d'", block_level, len(blockers))
591         for blocked in blockers:
592             logger.debug("blocked='%s'", blocked)
593
594             if not instances.is_registered(blocked):
595                 try:
596                     logger.info("Fetching instances from domain='%s' ...", blocked)
597                     federation.fetch_instances(blocked, blocker, None, inspect.currentframe().f_code.co_name)
598                 except network.exceptions as exception:
599                     logger.warning("Exception '%s' during fetching instances (fetch_cs) from blocked='%s'", type(exception), blocked)
600                     instances.set_last_error(blocked, exception)
601
602             if blocks.is_instance_blocked(blocker, blocked, block_level):
603                 logger.debug("blocked='%s',block_level='%s' is already blocked - SKIPPED!", blocked, block_level)
604                 continue
605
606             logger.info("Adding new block: blocked='%s',block_level='%s'", blocked, block_level)
607             if processing.block(blocker, blocked, None, block_level) and block_level == "reject" and config.get("bot_enabled"):
608                 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", blocked, block_level, blocker)
609                 blockdict.append({
610                     "blocked": blocked,
611                     "reason" : None,
612                 })
613
614         logger.debug("Invoking commit() ...")
615         database.connection.commit()
616
617         logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
618         if config.get("bot_enabled") and len(blockdict) > 0:
619             logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
620             network.send_bot_post(blocker, blockdict)
621
622     logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
623     if instances.has_pending(blocker):
624         logger.debug("Flushing updates for blocker='%s' ...", blocker)
625         instances.update_data(blocker)
626
627     logger.debug("Success! - EXIT!")
628     return 0
629
630 def fetch_cs(args: argparse.Namespace):
631     logger.debug("args[]='%s' - CALLED!", type(args))
632
633     logger.debug("Invoking locking.acquire() ...")
634     locking.acquire()
635
636     extensions = [
637         "extra",
638         "abbr",
639         "attr_list",
640         "def_list",
641         "fenced_code",
642         "footnotes",
643         "md_in_html",
644         "admonition",
645         "codehilite",
646         "legacy_attrs",
647         "legacy_em",
648         "meta",
649         "nl2br",
650         "sane_lists",
651         "smarty",
652         "toc",
653         "wikilinks"
654     ]
655
656     blocklist = {
657         "silenced": list(),
658         "reject"  : list(),
659     }
660
661     source_domain = "raw.githubusercontent.com"
662     if sources.is_recent(source_domain):
663         logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
664         return 0
665     else:
666         logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
667         sources.update(source_domain)
668
669     raw = utils.fetch_url(f"https://{source_domain}/chaossocial/meta/master/federation.md", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))).text
670     logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
671
672     doc = bs4.BeautifulSoup(markdown.markdown(raw, extensions=extensions), features="html.parser")
673     logger.debug("doc()=%d[]='%s'", len(doc), type(doc))
674
675     silenced = doc.find("h2", {"id": "silenced-instances"}).findNext("table").find("tbody")
676     logger.debug("silenced[%s]()=%d", type(silenced), len(silenced))
677     blocklist["silenced"] = federation.find_domains(silenced)
678
679     blocked = doc.find("h2", {"id": "blocked-instances"}).findNext("table").find("tbody")
680     logger.debug("blocked[%s]()=%d", type(blocked), len(blocked))
681     blocklist["reject"] = federation.find_domains(blocked)
682
683     blocking = blocklist["silenced"] + blocklist["reject"]
684     blocker = "chaos.social"
685
686     logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
687     instances.set_total_blocks(blocker, blocking)
688
689     logger.debug("blocklist[silenced]()=%d,blocklist[reject]()=%d", len(blocklist["silenced"]), len(blocklist["reject"]))
690     if len(blocking) > 0:
691         blockdict = list()
692         for block_level in blocklist:
693             logger.info("block_level='%s' has %d row(s)", block_level, len(blocklist[block_level]))
694
695             for row in blocklist[block_level]:
696                 logger.debug("row[%s]='%s'", type(row), row)
697                 if not "domain" in row:
698                     logger.warning("row[]='%s' has no element 'domain' - SKIPPED!", type(row))
699                     continue
700                 elif instances.is_recent(row["domain"], "last_blocked"):
701                     logger.debug("row[domain]='%s' has been recently crawled - SKIPPED!", row["domain"])
702                     continue
703                 elif not instances.is_registered(row["domain"]):
704                     try:
705                         logger.info("Fetching instances from domain='%s' ...", row["domain"])
706                         federation.fetch_instances(row["domain"], blocker, None, inspect.currentframe().f_code.co_name)
707                     except network.exceptions as exception:
708                         logger.warning("Exception '%s' during fetching instances (fetch_cs) from row[domain]='%s'", type(exception), row["domain"])
709                         instances.set_last_error(row["domain"], exception)
710
711                 if processing.block(blocker, row["domain"], row["reason"], block_level) and block_level == "reject" and config.get("bot_enabled"):
712                     logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", row["domain"], block_level, blocker)
713                     blockdict.append({
714                         "blocked": row["domain"],
715                         "reason" : row["reason"],
716                     })
717
718         logger.debug("Invoking commit() ...")
719         database.connection.commit()
720
721         logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
722         if config.get("bot_enabled") and len(blockdict) > 0:
723             logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
724             network.send_bot_post(blocker, blockdict)
725
726     logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
727     if instances.has_pending(blocker):
728         logger.debug("Flushing updates for blocker='%s' ...", blocker)
729         instances.update_data(blocker)
730
731     logger.debug("Success! - EXIT!")
732     return 0
733
734 def fetch_fba_rss(args: argparse.Namespace) -> int:
735     logger.debug("args[]='%s' - CALLED!", type(args))
736
737     domains = list()
738
739     logger.debug("Invoking locking.acquire() ...")
740     locking.acquire()
741
742     components = urlparse(args.feed)
743
744     if sources.is_recent(components.netloc):
745         logger.info("API from components.netloc='%s' has recently being accessed - EXIT!", components.netloc)
746         return 0
747     else:
748         logger.debug("components.netloc='%s' has not been recently used, marking ...", components.netloc)
749         sources.update(components.netloc)
750
751     logger.info("Fetch FBA-specific RSS args.feed='%s' ...", args.feed)
752     response = utils.fetch_url(args.feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
753
754     logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
755     if response.ok and response.status_code < 300 and len(response.text) > 0:
756         logger.debug("Parsing RSS feed (%d Bytes) ...", len(response.text))
757         rss = atoma.parse_rss_bytes(response.content)
758
759         logger.debug("rss[]='%s'", type(rss))
760         for item in rss.items:
761             logger.debug("item='%s'", item)
762             domain = tidyup.domain(item.link.split("=")[1])
763
764             logger.debug("domain='%s' - AFTER!", domain)
765             if domain == "":
766                 logger.debug("domain is empty - SKIPPED!")
767                 continue
768
769             logger.debug("domain='%s' - BEFORE!", domain)
770             domain = domain.encode("idna").decode("utf-8")
771             logger.debug("domain='%s' - AFTER!", domain)
772
773             if not utils.is_domain_wanted(domain):
774                 logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
775                 continue
776             elif domain in domains:
777                 logger.debug("domain='%s' is already added - SKIPPED!", domain)
778                 continue
779             elif instances.is_registered(domain):
780                 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
781                 continue
782             elif instances.is_recent(domain):
783                 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
784                 continue
785
786             logger.debug("Adding domain='%s'", domain)
787             domains.append(domain)
788
789     logger.debug("domains()=%d", len(domains))
790     if len(domains) > 0:
791         logger.info("Adding %d new instances ...", len(domains))
792         for domain in domains:
793             logger.debug("domain='%s'", domain)
794             try:
795                 logger.info("Fetching instances from domain='%s' ...", domain)
796                 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
797             except network.exceptions as exception:
798                 logger.warning("Exception '%s' during fetching instances (fetch_fba_rss) from domain='%s'", type(exception), domain)
799                 instances.set_last_error(domain, exception)
800                 return 100
801
802     logger.debug("Success! - EXIT!")
803     return 0
804
805 def fetch_fbabot_atom(args: argparse.Namespace) -> int:
806     logger.debug("args[]='%s' - CALLED!", type(args))
807
808     logger.debug("Invoking locking.acquire() ...")
809     locking.acquire()
810
811     source_domain = "ryona.agency"
812     if sources.is_recent(source_domain):
813         logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
814         return 0
815     else:
816         logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
817         sources.update(source_domain)
818
819     feed = f"https://{source_domain}/users/fba/feed.atom"
820
821     domains = list()
822
823     logger.info("Fetching ATOM feed='%s' from FBA bot account ...", feed)
824     response = utils.fetch_url(feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
825
826     logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
827     if response.ok and response.status_code < 300 and len(response.text) > 0:
828         logger.debug("Parsing ATOM feed (%d Bytes) ...", len(response.text))
829         atom = atoma.parse_atom_bytes(response.content)
830
831         logger.debug("atom[]='%s'", type(atom))
832         for entry in atom.entries:
833             logger.debug("entry[]='%s'", type(entry))
834             doc = bs4.BeautifulSoup(entry.content.value, "html.parser")
835             logger.debug("doc[]='%s'", type(doc))
836             for element in doc.findAll("a"):
837                 logger.debug("element[]='%s'", type(element))
838                 for href in element["href"].split(","):
839                     logger.debug("href[%s]='%s' - BEFORE!", type(href), href)
840                     domain = tidyup.domain(href)
841
842                     logger.debug("domain='%s' - AFTER!", domain)
843                     if domain == "":
844                         logger.debug("domain is empty - SKIPPED!")
845                         continue
846
847                     logger.debug("domain='%s' - BEFORE!", domain)
848                     domain = domain.encode("idna").decode("utf-8")
849                     logger.debug("domain='%s' - AFTER!", domain)
850
851                     if not utils.is_domain_wanted(domain):
852                         logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
853                         continue
854                     elif domain in domains:
855                         logger.debug("domain='%s' is already added - SKIPPED!", domain)
856                         continue
857                     elif instances.is_registered(domain):
858                         logger.debug("domain='%s' is already registered - SKIPPED!", domain)
859                         continue
860                     elif instances.is_recent(domain):
861                         logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
862                         continue
863
864                     logger.debug("Adding domain='%s',domains()=%d", domain, len(domains))
865                     domains.append(domain)
866
867     logger.debug("domains()=%d", len(domains))
868     if len(domains) > 0:
869         logger.info("Adding %d new instances ...", len(domains))
870         for domain in domains:
871             logger.debug("domain='%s'", domain)
872             try:
873                 logger.info("Fetching instances from domain='%s' ...", domain)
874                 federation.fetch_instances(domain, source_domain, None, inspect.currentframe().f_code.co_name)
875             except network.exceptions as exception:
876                 logger.warning("Exception '%s' during fetching instances (fetch_fbabot_atom) from domain='%s'", type(exception), domain)
877                 instances.set_last_error(domain, exception)
878                 return 100
879
880     logger.debug("Success! - EXIT!")
881     return 0
882
883 def fetch_instances(args: argparse.Namespace) -> int:
884     logger.debug("args[]='%s' - CALLED!", type(args))
885
886     logger.debug("args.domain='%s' - checking ...", args.domain)
887     if not validators.domain(args.domain):
888         logger.warning("args.domain='%s' is not valid.", args.domain)
889         return 100
890     elif blacklist.is_blacklisted(args.domain):
891         logger.warning("args.domain='%s' is blacklisted, won't check it!", args.domain)
892         return 101
893
894     logger.debug("Invoking locking.acquire() ...")
895     locking.acquire()
896
897     # Initial fetch
898     try:
899         logger.info("Fetching instances from args.domain='%s' ...", args.domain)
900         federation.fetch_instances(args.domain, None, None, inspect.currentframe().f_code.co_name)
901     except network.exceptions as exception:
902         logger.warning("Exception '%s' during fetching instances (fetch_instances) from args.domain='%s'", type(exception), args.domain)
903         instances.set_last_error(args.domain, exception)
904         instances.update_data(args.domain)
905         return 100
906
907     if args.single:
908         logger.debug("Not fetching more instances - EXIT!")
909         return 0
910
911     # Loop through some instances
912     database.cursor.execute(
913         "SELECT domain, origin, software, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'lemmy', 'peertube', 'takahe') AND (last_instance_fetch IS NULL OR last_instance_fetch < ?) ORDER BY rowid DESC", [time.time() - config.get("recheck_instance")]
914     )
915
916     rows = database.cursor.fetchall()
917     logger.info("Checking %d entries ...", len(rows))
918     for row in rows:
919         logger.debug("row[domain]='%s'", row["domain"])
920         if row["domain"] == "":
921             logger.debug("row[domain] is empty - SKIPPED!")
922             continue
923
924         logger.debug("row[domain]='%s' - BEFORE!", row["domain"])
925         domain = row["domain"].encode("idna").decode("utf-8")
926         logger.debug("domain='%s' - AFTER!", domain)
927
928         if not utils.is_domain_wanted(domain):
929             logger.warning("Domain domain='%s' is not wanted - SKIPPED!", domain)
930             continue
931
932         try:
933             logger.info("Fetching instances for domain='%s',origin='%s',software='%s',nodeinfo_url='%s'", domain, row["origin"], row["software"], row["nodeinfo_url"])
934             federation.fetch_instances(domain, row["origin"], row["software"], inspect.currentframe().f_code.co_name, row["nodeinfo_url"])
935         except network.exceptions as exception:
936             logger.warning("Exception '%s' during fetching instances (fetch_instances) from domain='%s'", type(exception), domain)
937             instances.set_last_error(domain, exception)
938
939     logger.debug("Success - EXIT!")
940     return 0
941
942 def fetch_oliphant(args: argparse.Namespace) -> int:
943     logger.debug("args[]='%s' - CALLED!", type(args))
944
945     logger.debug("Invoking locking.acquire() ...")
946     locking.acquire()
947
948     source_domain = "codeberg.org"
949     if sources.is_recent(source_domain):
950         logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
951         return 0
952     else:
953         logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
954         sources.update(source_domain)
955
956     # Base URL
957     base_url = f"https://{source_domain}/oliphant/blocklists/raw/branch/main/blocklists"
958
959     # URLs to fetch
960     blocklists = (
961         {
962             "blocker": "artisan.chat",
963             "csv_url": "mastodon/artisan.chat.csv",
964         },{
965             "blocker": "mastodon.art",
966             "csv_url": "mastodon/mastodon.art.csv",
967         },{
968             "blocker": "pleroma.envs.net",
969             "csv_url": "mastodon/pleroma.envs.net.csv",
970         },{
971             "blocker": "oliphant.social",
972             "csv_url": "mastodon/_unified_tier3_blocklist.csv",
973         },{
974             "blocker": "mastodon.online",
975             "csv_url": "mastodon/mastodon.online.csv",
976         },{
977             "blocker": "mastodon.social",
978             "csv_url": "mastodon/mastodon.social.csv",
979         },{
980             "blocker": "mastodon.social",
981             "csv_url": "other/missing-tier0-mastodon.social.csv",
982         },{
983             "blocker": "rage.love",
984             "csv_url": "mastodon/rage.love.csv",
985         },{
986             "blocker": "sunny.garden",
987             "csv_url": "mastodon/sunny.garden.csv",
988         },{
989             "blocker": "sunny.garden",
990             "csv_url": "mastodon/gardenfence.csv",
991         },{
992             "blocker": "solarpunk.moe",
993             "csv_url": "mastodon/solarpunk.moe.csv",
994         },{
995             "blocker": "toot.wales",
996             "csv_url": "mastodon/toot.wales.csv",
997         },{
998             "blocker": "union.place",
999             "csv_url": "mastodon/union.place.csv",
1000         },{
1001             "blocker": "oliphant.social",
1002             "csv_url": "mastodon/birdsite.csv",
1003         }
1004     )
1005
1006     domains = list()
1007
1008     logger.debug("Downloading %d files ...", len(blocklists))
1009     for block in blocklists:
1010         # Is domain given and not equal blocker?
1011         if isinstance(args.domain, str) and args.domain != block["blocker"]:
1012             logger.debug("Skipping blocker='%s', not matching args.domain='%s'", block["blocker"], args.domain)
1013             continue
1014         elif args.domain in domains:
1015             logger.debug("args.domain='%s' already handled - SKIPPED!", args.domain)
1016             continue
1017
1018         # Fetch this URL
1019         logger.info("Fetching csv_url='%s' for blocker='%s' ...", block["csv_url"], block["blocker"])
1020         response = utils.fetch_url(f"{base_url}/{block['csv_url']}", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
1021
1022         logger.debug("response.ok='%s',response.status_code=%d,response.content()=%d", response.ok, response.status_code, len(response.content))
1023         if not response.ok or response.status_code >= 300 or response.content == "":
1024             logger.warning("Could not fetch csv_url='%s' for blocker='%s' - SKIPPED!", block["csv_url"], block["blocker"])
1025             continue
1026
1027         logger.debug("Fetched %d Bytes, parsing CSV ...", len(response.content))
1028         reader = csv.DictReader(response.content.decode("utf-8").splitlines(), dialect="unix")
1029
1030         blockdict = list()
1031
1032         cnt = 0
1033         for row in reader:
1034             logger.debug("row[%s]='%s'", type(row), row)
1035             domain = severity = None
1036             reject_media = reject_reports = False
1037
1038             if "#domain" in row:
1039                 domain = row["#domain"]
1040             elif "domain" in row:
1041                 domain = row["domain"]
1042             else:
1043                 logger.debug("row='%s' does not contain domain column", row)
1044                 continue
1045
1046             if "#severity" in row:
1047                 severity = blocks.alias_block_level(row["#severity"])
1048             elif "severity" in row:
1049                 severity = blocks.alias_block_level(row["severity"])
1050             else:
1051                 logger.debug("row='%s' does not contain severity column", row)
1052                 continue
1053
1054             if "#reject_media" in row and row["#reject_media"].lower() == "true":
1055                 reject_media = True
1056             elif "reject_media" in row and row["reject_media"].lower() == "true":
1057                 reject_media = True
1058
1059             if "#reject_reports" in row and row["#reject_reports"].lower() == "true":
1060                 reject_reports = True
1061             elif "reject_reports" in row and row["reject_reports"].lower() == "true":
1062                 reject_reports = True
1063
1064             cnt = cnt + 1
1065             logger.debug("domain='%s',severity='%s',reject_media='%s',reject_reports='%s'", domain, severity, reject_media, reject_reports)
1066             if domain == "":
1067                 logger.debug("domain is empty - SKIPPED!")
1068                 continue
1069             elif domain.endswith(".onion"):
1070                 logger.debug("domain='%s' is a TOR .onion domain - SKIPPED", domain)
1071                 continue
1072             elif domain.endswith(".arpa"):
1073                 logger.debug("domain='%s' is a reverse IP address - SKIPPED", domain)
1074                 continue
1075             elif domain.endswith(".tld"):
1076                 logger.debug("domain='%s' is a fake domain - SKIPPED", domain)
1077                 continue
1078             elif domain.find("*") >= 0 or domain.find("?") >= 0:
1079                 logger.debug("domain='%s' is obfuscated - Invoking utils.deobfuscate(%s, %s) ...", domain, domain, block["blocker"])
1080                 domain = utils.deobfuscate(domain, block["blocker"])
1081                 logger.debug("domain='%s' - AFTER!", domain)
1082
1083             if not validators.domain(domain):
1084                 logger.debug("domain='%s' is not a valid domain - SKIPPED!")
1085                 continue
1086             elif blacklist.is_blacklisted(domain):
1087                 logger.warning("domain='%s' is blacklisted - SKIPPED!", domain)
1088                 continue
1089
1090             logger.debug("Marking domain='%s' as handled", domain)
1091             domains.append(domain)
1092
1093             logger.debug("Processing domain='%s' ...", domain)
1094             processed = processing.domain(domain, block["blocker"], inspect.currentframe().f_code.co_name)
1095             logger.debug("processed='%s'", processed)
1096
1097             if processing.block(block["blocker"], domain, None, severity) and config.get("bot_enabled"):
1098                 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", domain, block["block_level"], block["blocker"])
1099                 blockdict.append({
1100                     "blocked": domain,
1101                     "reason" : block["reason"],
1102                 })
1103
1104             if reject_media:
1105                 processing.block(block["blocker"], domain, None, "reject_media")
1106             if reject_reports:
1107                 processing.block(block["blocker"], domain, None, "reject_reports")
1108
1109         logger.debug("block[blocker]='%s'", block["blocker"])
1110         if block["blocker"] != "chaos.social":
1111             logger.debug("Invoking instances.set_total_blocks(%s, domains()=%d) ...", block["blocker"], len(domains))
1112             instances.set_total_blocks(block["blocker"], domains)
1113
1114         logger.debug("Checking if blocker='%s' has pending updates ...", block["blocker"])
1115         if instances.has_pending(block["blocker"]):
1116             logger.debug("Flushing updates for block[blocker]='%s' ...", block["blocker"])
1117             instances.update_data(block["blocker"])
1118
1119         logger.debug("Invoking commit() ...")
1120         database.connection.commit()
1121
1122         logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
1123         if config.get("bot_enabled") and len(blockdict) > 0:
1124             logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", block["blocker"], len(blockdict))
1125             network.send_bot_post(block["blocker"], blockdict)
1126
1127     logger.debug("Success! - EXIT!")
1128     return 0
1129
1130 def fetch_txt(args: argparse.Namespace) -> int:
1131     logger.debug("args[]='%s' - CALLED!", type(args))
1132
1133     logger.debug("Invoking locking.acquire() ...")
1134     locking.acquire()
1135
1136     # Static URLs
1137     urls = ({
1138         "blocker": "seirdy.one",
1139         "url"    : "https://seirdy.one/pb/bsl.txt",
1140     },)
1141
1142     logger.info("Checking %d text file(s) ...", len(urls))
1143     for row in urls:
1144         logger.debug("Fetching row[url]='%s' ...", row["url"])
1145         response = utils.fetch_url(row["url"], network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
1146
1147         logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
1148         if response.ok and response.status_code < 300 and response.text != "":
1149             logger.debug("Returned %d Bytes for processing", len(response.text.strip()))
1150             domains = response.text.split("\n")
1151
1152             logger.info("Processing %d domains ...", len(domains))
1153             for domain in domains:
1154                 logger.debug("domain='%s' - BEFORE!", domain)
1155                 domain = tidyup.domain(domain)
1156
1157                 logger.debug("domain='%s' - AFTER!", domain)
1158                 if domain == "":
1159                     logger.debug("domain is empty - SKIPPED!")
1160                     continue
1161                 elif not utils.is_domain_wanted(domain):
1162                     logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
1163                     continue
1164                 elif instances.is_recent(domain):
1165                     logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1166                     continue
1167
1168                 logger.debug("Processing domain='%s',row[blocker]='%s'", domain, row["blocker"])
1169                 processed = processing.domain(domain, row["blocker"], inspect.currentframe().f_code.co_name)
1170
1171                 logger.debug("processed='%s'", processed)
1172                 if not processed:
1173                     logger.debug("domain='%s' was not generically processed - SKIPPED!", domain)
1174                     continue
1175
1176     logger.debug("Success! - EXIT!")
1177     return 0
1178
1179 def fetch_fedipact(args: argparse.Namespace) -> int:
1180     logger.debug("args[]='%s' - CALLED!", type(args))
1181
1182     logger.debug("Invoking locking.acquire() ...")
1183     locking.acquire()
1184
1185     source_domain = "fedipact.online"
1186     if sources.is_recent(source_domain):
1187         logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1188         return 0
1189     else:
1190         logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1191         sources.update(source_domain)
1192
1193     response = utils.fetch_url(
1194         f"https://{source_domain}",
1195         network.web_headers,
1196         (config.get("connection_timeout"), config.get("read_timeout"))
1197     )
1198
1199     logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
1200     if response.ok and response.status_code < 300 and response.text != "":
1201         logger.debug("Parsing %d Bytes ...", len(response.text))
1202
1203         doc = bs4.BeautifulSoup(response.text, "html.parser")
1204         logger.debug("doc[]='%s'", type(doc))
1205
1206         rows = doc.findAll("li")
1207         logger.info("Checking %d row(s) ...", len(rows))
1208         for row in rows:
1209             logger.debug("row[]='%s'", type(row))
1210             domain = tidyup.domain(row.contents[0])
1211
1212             logger.debug("domain='%s' - AFTER!", domain)
1213             if domain == "":
1214                 logger.debug("domain is empty - SKIPPED!")
1215                 continue
1216
1217             logger.debug("domain='%s' - BEFORE!", domain)
1218             domain = domain.encode("idna").decode("utf-8")
1219             logger.debug("domain='%s' - AFTER!", domain)
1220
1221             if not utils.is_domain_wanted(domain):
1222                 logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
1223                 continue
1224             elif instances.is_registered(domain):
1225                 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
1226                 continue
1227             elif instances.is_recent(domain):
1228                 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1229                 continue
1230
1231             logger.info("Fetching domain='%s' ...", domain)
1232             federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
1233
1234     logger.debug("Success! - EXIT!")
1235     return 0
1236
1237 def fetch_joinfediverse(args: argparse.Namespace) -> int:
1238     logger.debug("args[]='%s' - CALLED!", type(args))
1239
1240     logger.debug("Invoking locking.acquire() ...")
1241     locking.acquire()
1242
1243     source_domain = "joinfediverse.wiki"
1244     if sources.is_recent(source_domain):
1245         logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1246         return 0
1247     else:
1248         logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1249         sources.update(source_domain)
1250
1251     raw = utils.fetch_url(
1252         f"https://{source_domain}/FediBlock",
1253         network.web_headers,
1254         (config.get("connection_timeout"), config.get("read_timeout"))
1255     ).text
1256     logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
1257
1258     doc = bs4.BeautifulSoup(raw, "html.parser")
1259     logger.debug("doc[]='%s'", type(doc))
1260
1261     tables = doc.findAll("table", {"class": "wikitable"})
1262
1263     logger.info("Analyzing %d table(s) ...", len(tables))
1264     blocklist = list()
1265     for table in tables:
1266         logger.debug("table[]='%s'", type(table))
1267
1268         rows = table.findAll("tr")
1269         logger.info("Checking %d row(s) ...", len(rows))
1270         block_headers = dict()
1271         for row in rows:
1272             logger.debug("row[%s]='%s'", type(row), row)
1273
1274             headers = row.findAll("th")
1275             logger.debug("Found headers()=%d header(s)", len(headers))
1276             if len(headers) > 1:
1277                 block_headers = dict()
1278                 cnt = 0
1279                 for header in headers:
1280                     cnt = cnt + 1
1281                     logger.debug("header[]='%s',cnt=%d", type(header), cnt)
1282                     text = header.contents[0]
1283
1284                     logger.debug("text[]='%s'", type(text))
1285                     if not isinstance(text, str):
1286                         logger.debug("text[]='%s' is not of type 'str' - SKIPPED!", type(text))
1287                         continue
1288                     elif validators.domain(text.strip()):
1289                         logger.debug("text='%s' is a domain - SKIPPED!", text.strip())
1290                         continue
1291
1292                     text = tidyup.domain(text.strip())
1293                     logger.debug("text='%s' - AFTER!", text)
1294                     if text in ["domain", "instance", "subdomain(s)", "block reason(s)"]:
1295                         logger.debug("Found header: '%s'=%d", text, cnt)
1296                         block_headers[cnt] = text
1297
1298             elif len(block_headers) == 0:
1299                 logger.debug("row is not scrapable - SKIPPED!")
1300                 continue
1301             elif len(block_headers) > 0:
1302                 logger.debug("Found a row with %d scrapable headers ...", len(block_headers))
1303                 cnt = 0
1304                 block = dict()
1305
1306                 for element in row.find_all(["th", "td"]):
1307                     cnt = cnt + 1
1308                     logger.debug("element[]='%s',cnt=%d", type(element), cnt)
1309                     if cnt in block_headers:
1310                         logger.debug("block_headers[%d]='%s'", cnt, block_headers[cnt])
1311
1312                         text = element.text.strip()
1313                         key = block_headers[cnt] if block_headers[cnt] not in ["domain", "instance"] else "blocked"
1314
1315                         logger.debug("cnt=%d is wanted: key='%s',text[%s]='%s'", cnt, key, type(text), text)
1316                         if key in ["domain", "instance"]:
1317                             block[key] = text
1318                         elif key == "reason":
1319                             block[key] = tidyup.reason(text)
1320                         elif key == "subdomain(s)":
1321                             block[key] = list()
1322                             if text != "":
1323                                 block[key] = text.split("/")
1324                         else:
1325                             logger.debug("key='%s'", key)
1326                             block[key] = text
1327
1328                 logger.debug("block()=%d ...", len(block))
1329                 if len(block) > 0:
1330                     logger.debug("Appending block()=%d ...", len(block))
1331                     blocklist.append(block)
1332
1333     logger.debug("blocklist()=%d", len(blocklist))
1334
1335     database.cursor.execute("SELECT domain FROM instances WHERE domain LIKE 'climatejustice.%'")
1336     domains = database.cursor.fetchall()
1337
1338     logger.debug("domains(%d)[]='%s'", len(domains), type(domains))
1339     blocking = list()
1340     for block in blocklist:
1341         logger.debug("block='%s'", block)
1342         if "subdomain(s)" in block and len(block["subdomain(s)"]) > 0:
1343             origin = block["blocked"]
1344             logger.debug("origin='%s'", origin)
1345             for subdomain in block["subdomain(s)"]:
1346                 block["blocked"] = subdomain + "." + origin
1347                 logger.debug("block[blocked]='%s'", block["blocked"])
1348                 blocking.append(block)
1349         else:
1350             blocking.append(block)
1351
1352     logger.debug("blocking()=%d", blocking)
1353     for block in blocking:
1354         logger.debug("block[]='%s'", type(block))
1355         if "blocked" not in block:
1356             raise KeyError(f"block()={len(block)} does not have element 'blocked'")
1357
1358         block["blocked"] = tidyup.domain(block["blocked"]).encode("idna").decode("utf-8")
1359         logger.debug("block[blocked]='%s' - AFTER!", block["blocked"])
1360
1361         if block["blocked"] == "":
1362             logger.debug("block[blocked] is empty - SKIPPED!")
1363             continue
1364         elif not utils.is_domain_wanted(block["blocked"]):
1365             logger.warning("block[blocked]='%s' is not wanted - SKIPPED!", block["blocked"])
1366             continue
1367         elif instances.is_recent(block["blocked"]):
1368             logger.debug("block[blocked]='%s' has been recently checked - SKIPPED!", block["blocked"])
1369             continue
1370
1371         logger.info("Proccessing blocked='%s' ...", block["blocked"])
1372         processing.domain(block["blocked"], "climatejustice.social", inspect.currentframe().f_code.co_name)
1373
1374     blockdict = list()
1375     for blocker in domains:
1376         blocker = blocker[0]
1377         logger.debug("blocker[%s]='%s'", type(blocker), blocker)
1378
1379         for block in blocking:
1380             logger.debug("block[blocked]='%s',block[block reason(s)]='%s' - BEFORE!", block["blocked"], block["block reason(s)"] if "block reason(s)" in block else None)
1381             block["reason"] = tidyup.reason(block["block reason(s)"]) if "block reason(s)" in block else None
1382
1383             logger.debug("block[blocked]='%s',block[reason]='%s' - AFTER!", block["blocked"], block["reason"])
1384             if block["blocked"] == "":
1385                 logger.debug("block[blocked] is empty - SKIPPED!")
1386                 continue
1387             elif not utils.is_domain_wanted(block["blocked"]):
1388                 logger.warning("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
1389                 continue
1390
1391             logger.debug("blocked='%s',reason='%s'", block["blocked"], block["reason"])
1392             if processing.block(blocker, block["blocked"], block["reason"], "reject") and config.get("bot_enabled"):
1393                 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], blocker)
1394                 blockdict.append({
1395                     "blocked": block["blocked"],
1396                     "reason" : block["reason"],
1397                 })
1398
1399         if instances.has_pending(blocker):
1400             logger.debug("Flushing updates for blocker='%s' ...", blocker)
1401             instances.update_data(blocker)
1402
1403         logger.debug("Invoking commit() ...")
1404         database.connection.commit()
1405
1406         logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
1407         if config.get("bot_enabled") and len(blockdict) > 0:
1408             logger.info("Sending bot POST for blocker='%s,blockdict()=%d ...", blocker, len(blockdict))
1409             network.send_bot_post(blocker, blockdict)
1410
1411     logger.debug("Success! - EXIT!")
1412     return 0
1413
1414 def recheck_obfuscation(args: argparse.Namespace) -> int:
1415     logger.debug("args[]='%s' - CALLED!", type(args))
1416
1417     logger.debug("Invoking locking.acquire() ...")
1418     locking.acquire()
1419
1420     if isinstance(args.domain, str) and args.domain != "" and utils.is_domain_wanted(args.domain):
1421         database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1 AND domain = ?", [args.domain])
1422     elif isinstance(args.software, str) and args.software != "" and validators.domain(args.software) == args.software:
1423         database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1 AND software = ?", [args.software])
1424     else:
1425         database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1")
1426
1427     rows = database.cursor.fetchall()
1428     logger.info("Checking %d domains ...", len(rows))
1429     for row in rows:
1430         logger.debug("Fetching peers from domain='%s',software='%s',nodeinfo_url='%s' ...", row["domain"], row["software"], row["nodeinfo_url"])
1431         if (args.all is None or not args.all) and instances.is_recent(row["domain"]) and args.domain is None and args.software is None:
1432             logger.debug("row[domain]='%s' has been recently checked, args.all[]='%s' - SKIPPED!", row["domain"], type(args.all))
1433             continue
1434
1435         blocking = list()
1436         if row["software"] == "pleroma":
1437             logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1438             blocking = pleroma.fetch_blocks(row["domain"], row["nodeinfo_url"])
1439         elif row["software"] == "mastodon":
1440             logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1441             blocking = mastodon.fetch_blocks(row["domain"], row["nodeinfo_url"])
1442         elif row["software"] == "lemmy":
1443             logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1444             blocking = lemmy.fetch_blocks(row["domain"], row["nodeinfo_url"])
1445         elif row["software"] == "friendica":
1446             logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1447             blocking = friendica.fetch_blocks(row["domain"])
1448         elif row["software"] == "misskey":
1449             logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1450             blocking = misskey.fetch_blocks(row["domain"])
1451         else:
1452             logger.warning("Unknown sofware: domain='%s',software='%s'", row["domain"], row["software"])
1453
1454         logger.debug("row[domain]='%s'", row["domain"])
1455         if row["domain"] != "chaos.social":
1456             logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", row["domain"], len(blocking))
1457             instances.set_total_blocks(row["domain"], blocking)
1458
1459         obfuscated = 0
1460         blockdict = list()
1461
1462         logger.info("Checking %d block(s) from domain='%s' ...", len(blocking), row["domain"])
1463         for block in blocking:
1464             logger.debug("block[blocked]='%s'", block["blocked"])
1465             blocked = None
1466
1467             if block["blocked"] == "":
1468                 logger.debug("block[blocked] is empty - SKIPPED!")
1469                 continue
1470             elif block["blocked"].endswith(".arpa"):
1471                 logger.debug("blocked='%s' is a reversed IP address - SKIPPED!", block["blocked"])
1472                 continue
1473             elif block["blocked"].endswith(".tld"):
1474                 logger.debug("blocked='%s' is a fake domain name - SKIPPED!", block["blocked"])
1475                 continue
1476             elif block["blocked"].endswith(".onion"):
1477                 logger.debug("blocked='%s' is a TOR onion domain name - SKIPPED!", block["blocked"])
1478                 continue
1479             elif block["blocked"].find("*") >= 0 or block["blocked"].find("?") >= 0:
1480                 logger.debug("block='%s' is obfuscated.", block["blocked"])
1481                 obfuscated = obfuscated + 1
1482                 blocked = utils.deobfuscate(block["blocked"], row["domain"], block["hash"] if "hash" in block else None)
1483             elif not utils.is_domain_wanted(block["blocked"]):
1484                 logger.warning("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
1485                 continue
1486             elif blocks.is_instance_blocked(row["domain"], block["blocked"]):
1487                 logger.debug("blocked='%s' is already blocked - SKIPPED!", block["blocked"])
1488                 continue
1489
1490             logger.debug("blocked[%s]='%s',block[blocked]='%s'", type(blocked), blocked, block["blocked"])
1491             if blocked is not None and blocked != block["blocked"]:
1492                 logger.debug("blocked='%s' was deobfuscated to blocked='%s'", block["blocked"], blocked)
1493                 obfuscated = obfuscated - 1
1494                 if blocks.is_instance_blocked(row["domain"], blocked):
1495                     logger.debug("blocked='%s' is already blocked by domain='%s' - SKIPPED!", blocked, row["domain"])
1496                     continue
1497
1498                 block["block_level"] = blocks.alias_block_level(block["block_level"])
1499
1500                 logger.info("blocked='%s' has been deobfuscated to blocked='%s', adding ...", block["blocked"], blocked)
1501                 if processing.block(row["domain"], blocked, block["reason"], block["block_level"]) and block["block_level"] == "reject" and config.get("bot_enabled"):
1502                     logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], row["domain"])
1503                     blockdict.append({
1504                         "blocked": blocked,
1505                         "reason" : block["reason"],
1506                     })
1507
1508         logger.info("domain='%s' has %d obfuscated domain(s)", row["domain"], obfuscated)
1509         if obfuscated == 0 and len(blocking) > 0:
1510             logger.info("Block list from domain='%s' has been fully deobfuscated.", row["domain"])
1511             instances.set_has_obfuscation(row["domain"], False)
1512
1513         if instances.has_pending(row["domain"]):
1514             logger.debug("Flushing updates for blocker='%s' ...", row["domain"])
1515             instances.update_data(row["domain"])
1516
1517         logger.debug("Invoking commit() ...")
1518         database.connection.commit()
1519
1520         logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
1521         if config.get("bot_enabled") and len(blockdict) > 0:
1522             logger.info("Sending bot POST for blocker='%s,blockdict()=%d ...", row["domain"], len(blockdict))
1523             network.send_bot_post(row["domain"], blockdict)
1524
1525     logger.debug("Success! - EXIT!")
1526     return 0
1527
1528 def fetch_fedilist(args: argparse.Namespace) -> int:
1529     logger.debug("args[]='%s' - CALLED!", type(args))
1530
1531     logger.debug("Invoking locking.acquire() ...")
1532     locking.acquire()
1533
1534     source_domain = "demo.fedilist.com"
1535     if sources.is_recent(source_domain):
1536         logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1537         return 0
1538     else:
1539         logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1540         sources.update(source_domain)
1541
1542     url = f"http://{source_domain}/instance/csv?onion=not"
1543     if args.software is not None and args.software != "":
1544         logger.debug("args.software='%s'", args.software)
1545         url = f"http://{source_domain}/instance/csv?software={args.software}&onion=not"
1546
1547     logger.info("Fetching url='%s' ...", url)
1548     response = reqto.get(
1549         url,
1550         headers=network.web_headers,
1551         timeout=(config.get("connection_timeout"), config.get("read_timeout")),
1552         allow_redirects=False
1553     )
1554
1555     logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
1556     if not response.ok or response.status_code >= 300 or len(response.content) == 0:
1557         logger.warning("Failed fetching url='%s': response.ok='%s',response.status_code=%d,response.content()=%d - EXIT!", url, response.ok, response.status_code, len(response.text))
1558         return 1
1559
1560     reader = csv.DictReader(response.content.decode("utf-8").splitlines(), dialect="unix")
1561
1562     logger.debug("reader[]='%s'", type(reader))
1563     for row in reader:
1564         logger.debug("row[]='%s'", type(row))
1565         domain = tidyup.domain(row["hostname"])
1566         logger.debug("domain='%s' - AFTER!", domain)
1567
1568         if domain == "":
1569             logger.debug("domain is empty after tidyup: row[hostname]='%s' - SKIPPED!", row["hostname"])
1570             continue
1571
1572         logger.debug("domain='%s' - BEFORE!", domain)
1573         domain = domain.encode("idna").decode("utf-8")
1574         logger.debug("domain='%s' - AFTER!", domain)
1575
1576         if not utils.is_domain_wanted(domain):
1577             logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
1578             continue
1579         elif (args.all is None or not args.all) and instances.is_registered(domain):
1580             logger.debug("domain='%s' is already registered, --all not specified: args.all[]='%s'", domain, type(args.all))
1581             continue
1582         elif instances.is_recent(domain):
1583             logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1584             continue
1585
1586         logger.info("Fetching instances from domain='%s' ...", domain)
1587         federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
1588
1589     logger.debug("Success! - EXIT!")
1590     return 0
1591
1592 def update_nodeinfo(args: argparse.Namespace) -> int:
1593     logger.debug("args[]='%s' - CALLED!", type(args))
1594
1595     logger.debug("Invoking locking.acquire() ...")
1596     locking.acquire()
1597
1598     if args.domain is not None and args.domain != "":
1599         logger.debug("Fetching args.domain='%s'", args.domain)
1600         database.cursor.execute("SELECT domain, software FROM instances WHERE domain = ?", [args.domain])
1601     elif args.software is not None and args.software != "":
1602         logger.info("Fetching domains for args.software='%s'", args.software)
1603         database.cursor.execute("SELECT domain, software FROM instances WHERE software = ?", [args.software])
1604     else:
1605         logger.info("Fetching domains for recently updated ...")
1606         database.cursor.execute("SELECT domain, software FROM instances WHERE last_nodeinfo < ? OR last_nodeinfo IS NULL", [time.time() - config.get("recheck_nodeinfo")])
1607
1608     domains = database.cursor.fetchall()
1609
1610     logger.info("Checking %d domain(s) ...", len(domains))
1611     cnt = 0
1612     for row in domains:
1613         logger.debug("row[]='%s'", type(row))
1614         try:
1615             logger.info("Checking nodeinfo for row[domain]='%s',row[software]='%s' (%s%%) ...", row["domain"], row["software"], "{:5.1f}".format(cnt / len(domains) * 100))
1616             software = federation.determine_software(row["domain"])
1617
1618             logger.debug("Determined software='%s'", software)
1619             if (software != row["software"] and software is not None) or args.force is True:
1620                 logger.warning("Software type for row[domain]='%s' has changed from '%s' to '%s'!", row["domain"], row["software"], software)
1621                 instances.set_software(row["domain"], software)
1622
1623             instances.set_success(row["domain"])
1624         except network.exceptions as exception:
1625             logger.warning("Exception '%s' during updating nodeinfo for row[domain]='%s'", type(exception), row["domain"])
1626             instances.set_last_error(row["domain"], exception)
1627
1628         instances.set_last_nodeinfo(row["domain"])
1629         instances.update_data(row["domain"])
1630         cnt = cnt + 1
1631
1632     logger.debug("Success! - EXIT!")
1633     return 0
1634
1635 def fetch_instances_social(args: argparse.Namespace) -> int:
1636     logger.debug("args[]='%s' - CALLED!", type(args))
1637
1638     logger.debug("Invoking locking.acquire() ...")
1639     locking.acquire()
1640
1641     source_domain = "instances.social"
1642
1643     if config.get("instances_social_api_key") == "":
1644         logger.error("API key not set. Please set in your config.json file.")
1645         return 1
1646     elif sources.is_recent(source_domain):
1647         logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1648         return 0
1649     else:
1650         logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1651         sources.update(source_domain)
1652
1653     headers = {
1654         "Authorization": f"Bearer {config.get('instances_social_api_key')}",
1655     }
1656
1657     fetched = network.get_json_api(
1658         source_domain,
1659         "/api/1.0/instances/list?count=0&sort_by=name",
1660         headers,
1661         (config.get("connection_timeout"), config.get("read_timeout"))
1662     )
1663     logger.debug("fetched[]='%s'", type(fetched))
1664
1665     if "error_message" in fetched:
1666         logger.warning("Error during fetching API result: '%s' - EXIT!", fetched["error_message"])
1667         return 2
1668     elif "exception" in fetched:
1669         logger.warning("Exception '%s' during fetching API result - EXIT!", type(fetched["exception"]))
1670         return 3
1671     elif "json" not in fetched:
1672         logger.warning("fetched has no element 'json' - EXIT!")
1673         return 4
1674     elif "instances" not in fetched["json"]:
1675         logger.warning("fetched[row] has no element 'instances' - EXIT!")
1676         return 5
1677
1678     domains = list()
1679     rows = fetched["json"]["instances"]
1680
1681     logger.info("Checking %d row(s) ...", len(rows))
1682     for row in rows:
1683         logger.debug("row[]='%s'", type(row))
1684         domain = tidyup.domain(row["name"])
1685         logger.debug("domain='%s' - AFTER!", domain)
1686
1687         if domain == "":
1688             logger.debug("domain is empty - SKIPPED!")
1689             continue
1690
1691         logger.debug("domain='%s' - BEFORE!", domain)
1692         domain = domain.encode("idna").decode("utf-8")
1693         logger.debug("domain='%s' - AFTER!", domain)
1694
1695         if not utils.is_domain_wanted(domain):
1696             logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
1697             continue
1698         elif domain in domains:
1699             logger.debug("domain='%s' is already added - SKIPPED!", domain)
1700             continue
1701         elif instances.is_registered(domain):
1702             logger.debug("domain='%s' is already registered - SKIPPED!", domain)
1703             continue
1704         elif instances.is_recent(domain):
1705             logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1706             continue
1707
1708         logger.info("Fetching instances from domain='%s'", domain)
1709         federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
1710
1711     logger.debug("Success! - EXIT!")
1712     return 0
1713
1714 def convert_idna(args: argparse.Namespace) -> int:
1715     logger.debug("args[]='%s' - CALLED!", type(args))
1716
1717     database.cursor.execute("SELECT domain FROM instances WHERE domain NOT LIKE '%xn--%' ORDER BY domain ASC")
1718     rows = database.cursor.fetchall()
1719
1720     logger.debug("rows[]='%s'", type(rows))
1721     instances.translate_idnas(rows, "domain")
1722
1723     database.cursor.execute("SELECT origin FROM instances WHERE origin NOT LIKE '%xn--%' ORDER BY origin ASC")
1724     rows = database.cursor.fetchall()
1725
1726     logger.debug("rows[]='%s'", type(rows))
1727     instances.translate_idnas(rows, "origin")
1728
1729     database.cursor.execute("SELECT blocker FROM blocks WHERE blocker NOT LIKE '%xn--%' ORDER BY blocker ASC")
1730     rows = database.cursor.fetchall()
1731
1732     logger.debug("rows[]='%s'", type(rows))
1733     blocks.translate_idnas(rows, "blocker")
1734
1735     database.cursor.execute("SELECT blocked FROM blocks WHERE blocked NOT LIKE '%xn--%' ORDER BY blocked ASC")
1736     rows = database.cursor.fetchall()
1737
1738     logger.debug("rows[]='%s'", type(rows))
1739     blocks.translate_idnas(rows, "blocked")
1740
1741     logger.debug("Success! - EXIT!")
1742     return 0