]> git.mxchange.org Git - fba.git/blob - fba/commands.py
a58440b77dff126cf6b4978f13530c699780877d
[fba.git] / fba / commands.py
1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
3 #
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
8 #
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 # GNU Affero General Public License for more details.
13 #
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program.  If not, see <https://www.gnu.org/licenses/>.
16
17 import csv
18 import inspect
19 import json
20 import logging
21 import time
22
23 from urllib.parse import urlparse
24
25 import argparse
26 import atoma
27 import bs4
28 import markdown
29 import reqto
30 import validators
31
32 from fba import csrf
33 from fba import database
34 from fba import utils
35
36 from fba.helpers import blacklist
37 from fba.helpers import config
38 from fba.helpers import cookies
39 from fba.helpers import locking
40 from fba.helpers import processing
41 from fba.helpers import software as software_helper
42 from fba.helpers import tidyup
43
44 from fba.http import federation
45 from fba.http import network
46
47 from fba.models import blocks
48 from fba.models import instances
49 from fba.models import sources
50
51 from fba.networks import friendica
52 from fba.networks import lemmy
53 from fba.networks import mastodon
54 from fba.networks import misskey
55 from fba.networks import pleroma
56
57 logging.basicConfig(level=logging.INFO)
58 logger = logging.getLogger(__name__)
59 #logger.setLevel(logging.DEBUG)
60
61 def check_instance(args: argparse.Namespace) -> int:
62     logger.debug("args.domain='%s' - CALLED!", args.domain)
63     status = 0
64     if not validators.domain(args.domain):
65         logger.warning("args.domain='%s' is not valid", args.domain)
66         status = 100
67     elif blacklist.is_blacklisted(args.domain):
68         logger.warning("args.domain='%s' is blacklisted", args.domain)
69         status = 101
70     elif instances.is_registered(args.domain):
71         logger.warning("args.domain='%s' is already registered", args.domain)
72         status = 102
73     else:
74         logger.info("args.domain='%s' is not known", args.domain)
75
76     logger.debug("status=%d - EXIT!", status)
77     return status
78
79 def check_nodeinfo(args: argparse.Namespace) -> int:
80     logger.debug("args[]='%s' - CALLED!", type(args))
81
82     # Fetch rows
83     database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE nodeinfo_url IS NOT NULL ORDER BY domain ASC")
84
85     cnt = 0
86     for row in database.cursor.fetchall():
87         logger.debug("Checking row[domain]='%s',row[software]='%s',row[nodeinfo_url]='%s' ...", row["domain"], row["software"], row["nodeinfo_url"])
88         punycode = row["domain"].encode("idna").decode("utf-8")
89
90         if row["nodeinfo_url"].startswith("/"):
91             logger.debug("row[nodeinfo_url]='%s' is a relative URL and always matches", row["nodeinfo_url"])
92             continue
93         elif row["nodeinfo_url"].find(punycode) == -1 and row["nodeinfo_url"].find(row["domain"]) == -1:
94             logger.warning("punycode='%s' is not found in row[nodeinfo_url]='%s',row[software]='%s'", punycode, row["nodeinfo_url"], row["software"])
95             cnt = cnt + 1
96
97     logger.info("Found %d row(s)", cnt)
98
99     logger.debug("EXIT!")
100     return 0
101
102 def fetch_pixelfed_api(args: argparse.Namespace) -> int:
103     logger.debug("args[]='%s' - CALLED!", type(args))
104
105     # No CSRF by default, you don't have to add network.source_headers by yourself here
106     headers = tuple()
107     source_domain = "pixelfed.org"
108
109     if sources.is_recent(source_domain):
110         logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
111         return 0
112     else:
113         logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
114         sources.update(source_domain)
115
116     try:
117         logger.debug("Checking CSRF from source_domain='%s' ...", source_domain)
118         headers = csrf.determine(source_domain, dict())
119     except network.exceptions as exception:
120         logger.warning("Exception '%s' during checking CSRF (fetch_peers,%s) - EXIT!", type(exception), __name__)
121         return list()
122
123     try:
124         logger.debug("Fetching JSON from pixelfed.org API, headers()=%d ...", len(headers))
125         fetched = network.get_json_api(
126             source_domain,
127             "/api/v1/servers/all.json?scope=All&country=all&language=all",
128             headers,
129             (config.get("connection_timeout"), config.get("read_timeout"))
130         )
131
132         logger.debug("JSON API returned %d elements", len(fetched))
133         if "error_message" in fetched:
134             logger.warning("API returned error_message='%s' - EXIT!", fetched["error_message"])
135             return 101
136         elif "data" not in fetched["json"]:
137             logger.warning("API did not return JSON with 'data' element - EXIT!")
138             return 102
139
140         rows = fetched["json"]["data"]
141         logger.info("Checking %d fetched rows ...", len(rows))
142         for row in rows:
143             logger.debug("row[]='%s'", type(row))
144             if "domain" not in row:
145                 logger.warning("row='%s' does not contain element 'domain' - SKIPPED!", row)
146                 continue
147             elif row["domain"] == "":
148                 logger.debug("row[domain] is empty - SKIPPED!")
149                 continue
150
151             logger.debug("row[domain]='%s' - BEFORE!", row["domain"])
152             domain = row["domain"].encode("idna").decode("utf-8")
153             logger.debug("domain='%s' - AFTER!", domain)
154
155             if not utils.is_domain_wanted(domain):
156                 logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
157                 continue
158             elif instances.is_registered(domain):
159                 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
160                 continue
161             elif instances.is_recent(domain):
162                 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
163                 continue
164
165             logger.debug("Fetching instances from domain='%s' ...", domain)
166             federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
167
168     except network.exceptions as exception:
169         logger.warning("Cannot fetch graphql,exception[%s]:'%s' - EXIT!", type(exception), str(exception))
170         return 103
171
172     logger.debug("Success! - EXIT!")
173     return 0
174
175 def fetch_bkali(args: argparse.Namespace) -> int:
176     logger.debug("args[]='%s' - CALLED!", type(args))
177
178     logger.debug("Invoking locking.acquire() ...")
179     locking.acquire()
180
181     source_domain = "gql.api.bka.li"
182     if sources.is_recent(source_domain):
183         logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
184         return 0
185     else:
186         logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
187         sources.update(source_domain)
188
189     domains = list()
190     try:
191         logger.info("Fetching domainlist from source_domain='%s' ...", source_domain)
192         fetched = network.post_json_api(
193             source_domain,
194             "/v1/graphql",
195             json.dumps({
196                 "query": "query domainlist {nodeinfo(order_by: {domain: asc}) {domain}}"
197             })
198         )
199
200         logger.debug("fetched[]='%s'", type(fetched))
201         if "error_message" in fetched:
202             logger.warning("post_json_api() for 'gql.sources.bka.li' returned error message='%s", fetched["error_message"])
203             return 100
204         elif isinstance(fetched["json"], dict) and "error" in fetched["json"] and "message" in fetched["json"]["error"]:
205             logger.warning("post_json_api() returned error: '%s", fetched["error"]["message"])
206             return 101
207
208         rows = fetched["json"]
209
210         logger.debug("rows(%d)[]='%s'", len(rows), type(rows))
211         if len(rows) == 0:
212             raise Exception("WARNING: Returned no records")
213         elif "data" not in rows:
214             raise Exception(f"WARNING: rows()={len(rows)} does not contain key 'data'")
215         elif "nodeinfo" not in rows["data"]:
216             raise Exception(f"WARNING: rows()={len(rows['data'])} does not contain key 'nodeinfo'")
217
218         for entry in rows["data"]["nodeinfo"]:
219             logger.debug("entry[%s]='%s'", type(entry), entry)
220             if "domain" not in entry:
221                 logger.warning("entry()=%d does not contain 'domain' - SKIPPED!", len(entry))
222                 continue
223             elif entry["domain"] == "":
224                 logger.debug("entry[domain] is empty - SKIPPED!")
225                 continue
226             elif not utils.is_domain_wanted(entry["domain"]):
227                 logger.warning("entry[domain]='%s' is not wanted - SKIPPED!", entry["domain"])
228                 continue
229             elif instances.is_registered(entry["domain"]):
230                 logger.debug("entry[domain]='%s' is already registered - SKIPPED!", entry["domain"])
231                 continue
232             elif instances.is_recent(entry["domain"]):
233                 logger.debug("entry[domain]='%s' has been recently crawled - SKIPPED!", entry["domain"])
234                 continue
235
236             logger.debug("Adding domain='%s' ...", entry["domain"])
237             domains.append(entry["domain"])
238
239     except network.exceptions as exception:
240         logger.warning("Cannot fetch graphql,exception[%s]:'%s' - EXIT!", type(exception), str(exception))
241         return 102
242
243     logger.debug("domains()=%d", len(domains))
244     if len(domains) > 0:
245         logger.info("Adding %d new instances ...", len(domains))
246         for domain in domains:
247             logger.debug("domain='%s' - BEFORE!", domain)
248             domain = domain.encode("idna").decode("utf-8")
249             logger.debug("domain='%s' - AFTER!", domain)
250
251             try:
252                 logger.info("Fetching instances from domain='%s' ...", domain)
253                 federation.fetch_instances(domain, 'tak.teleyal.blog', None, inspect.currentframe().f_code.co_name)
254             except network.exceptions as exception:
255                 logger.warning("Exception '%s' during fetching instances (fetch_bkali) from domain='%s'", type(exception), domain)
256                 instances.set_last_error(domain, exception)
257                 return 100
258
259     logger.debug("Success - EXIT!")
260     return 0
261
262 def fetch_blocks(args: argparse.Namespace) -> int:
263     logger.debug("args[]='%s' - CALLED!", type(args))
264     if args.domain is not None and args.domain != "":
265         logger.debug("args.domain='%s' - checking ...", args.domain)
266         if not validators.domain(args.domain):
267             logger.warning("args.domain='%s' is not valid.", args.domain)
268             return 100
269         elif blacklist.is_blacklisted(args.domain):
270             logger.warning("args.domain='%s' is blacklisted, won't check it!", args.domain)
271             return 101
272         elif not instances.is_registered(args.domain):
273             logger.warning("args.domain='%s' is not registered, please run ./utils.py fetch_instances '%s' first.", args.domain, args.domain)
274             return 102
275
276     logger.debug("Invoking locking.acquire() ...")
277     locking.acquire()
278
279     if args.domain is not None and args.domain != "":
280         # Re-check single domain
281         logger.debug("Querying database for single args.domain='%s' ...", args.domain)
282         database.cursor.execute(
283             "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE domain = ?", [args.domain]
284         )
285     elif args.software is not None and args.software != "":
286         # Re-check single software
287         logger.debug("Querying database for args.software='%s' ...", args.software)
288         database.cursor.execute(
289             "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software = ? AND nodeinfo_url IS NOT NULL", [args.software]
290         )
291     else:
292         # Re-check after "timeout" (aka. minimum interval)
293         database.cursor.execute(
294             "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'lemmy', 'friendica', 'misskey') AND (last_blocked IS NULL OR last_blocked < ?) AND nodeinfo_url IS NOT NULL ORDER BY rowid DESC", [time.time() - config.get("recheck_block")]
295         )
296
297     rows = database.cursor.fetchall()
298     logger.info("Checking %d entries ...", len(rows))
299     for blocker, software, origin, nodeinfo_url in rows:
300         logger.debug("blocker='%s',software='%s',origin='%s',nodeinfo_url='%s'", blocker, software, origin, nodeinfo_url)
301         blocker = tidyup.domain(blocker)
302         logger.debug("blocker='%s' - AFTER!", blocker)
303
304         if blocker == "":
305             logger.warning("blocker is now empty!")
306             continue
307         elif nodeinfo_url is None or nodeinfo_url == "":
308             logger.debug("blocker='%s',software='%s' has empty nodeinfo_url", blocker, software)
309             continue
310         elif not utils.is_domain_wanted(blocker):
311             logger.warning("blocker='%s' is not wanted - SKIPPED!", blocker)
312             continue
313
314         logger.debug("blocker='%s'", blocker)
315         instances.set_last_blocked(blocker)
316         instances.set_has_obfuscation(blocker, False)
317
318         blocking = list()
319         blockdict = list()
320         if software == "pleroma":
321             logger.info("blocker='%s',software='%s'", blocker, software)
322             blocking = pleroma.fetch_blocks(blocker, nodeinfo_url)
323         elif software == "mastodon":
324             logger.info("blocker='%s',software='%s'", blocker, software)
325             blocking = mastodon.fetch_blocks(blocker, nodeinfo_url)
326         elif software == "lemmy":
327             logger.info("blocker='%s',software='%s'", blocker, software)
328             blocking = lemmy.fetch_blocks(blocker, nodeinfo_url)
329         elif software == "friendica":
330             logger.info("blocker='%s',software='%s'", blocker, software)
331             blocking = friendica.fetch_blocks(blocker)
332         elif software == "misskey":
333             logger.info("blocker='%s',software='%s'", blocker, software)
334             blocking = misskey.fetch_blocks(blocker)
335         else:
336             logger.warning("Unknown software: blocker='%s',software='%s'", blocker, software)
337
338         logger.debug("blocker='%s'", blocker)
339         if blocker != "chaos.social":
340             logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
341             instances.set_total_blocks(blocker, blocking)
342
343         logger.info("Checking %d entries from blocker='%s',software='%s' ...", len(blocking), blocker, software)
344         blockdict = list()
345         for block in blocking:
346             logger.debug("blocked='%s',block_level='%s',reason='%s'", block["blocked"], block["block_level"], block["reason"])
347
348             if block["block_level"] == "":
349                 logger.warning("block_level is empty, blocker='%s',blocked='%s'", block["blocker"], block["blocked"])
350                 continue
351
352             logger.debug("blocked='%s',reason='%s' - BEFORE!", block["blocked"], block["reason"])
353             block["blocked"] = tidyup.domain(block["blocked"])
354             block["reason"]  = tidyup.reason(block["reason"]) if block["reason"] is not None and block["reason"] != "" else None
355             logger.debug("blocked='%s',reason='%s' - AFTER!", block["blocked"], block["reason"])
356
357             if block["blocked"] == "":
358                 logger.warning("blocked is empty, blocker='%s'", blocker)
359                 continue
360             elif block["blocked"].endswith(".onion"):
361                 logger.debug("blocked='%s' is a TOR .onion domain - SKIPPED", block["blocked"])
362                 continue
363             elif block["blocked"].endswith(".arpa"):
364                 logger.debug("blocked='%s' is a reverse IP address - SKIPPED", block["blocked"])
365                 continue
366             elif block["blocked"].endswith(".tld"):
367                 logger.debug("blocked='%s' is a fake domain - SKIPPED", block["blocked"])
368                 continue
369             elif block["blocked"].find("*") >= 0:
370                 logger.debug("blocker='%s' uses obfuscated domains", blocker)
371
372                 # Some friendica servers also obscure domains without hash
373                 row = instances.deobfuscate("*", block["blocked"], block["hash"] if "hash" in block else None)
374
375                 logger.debug("row[]='%s'", type(row))
376                 if row is None:
377                     logger.warning("Cannot deobfuscate blocked='%s',blocker='%s',software='%s' - SKIPPED!", block["blocked"], blocker, software)
378                     instances.set_has_obfuscation(blocker, True)
379                     continue
380
381                 block["blocked"] = row["domain"]
382                 origin           = row["origin"]
383                 nodeinfo_url     = row["nodeinfo_url"]
384             elif block["blocked"].find("?") >= 0:
385                 logger.debug("blocker='%s' uses obfuscated domains", blocker)
386
387                 # Some obscure them with question marks, not sure if that's dependent on version or not
388                 row = instances.deobfuscate("?", block["blocked"], block["hash"] if "hash" in block else None)
389
390                 logger.debug("row[]='%s'", type(row))
391                 if row is None:
392                     logger.warning("Cannot deobfuscate blocked='%s',blocker='%s',software='%s' - SKIPPED!", block["blocked"], blocker, software)
393                     instances.set_has_obfuscation(blocker, True)
394                     continue
395
396                 block["blocked"] = row["domain"]
397                 origin           = row["origin"]
398                 nodeinfo_url     = row["nodeinfo_url"]
399
400             logger.debug("Looking up instance by domainm, blocked='%s'", block["blocked"])
401             if block["blocked"] == "":
402                 logger.debug("block[blocked] is empty - SKIPPED!")
403                 continue
404
405             logger.debug("block[blocked]='%s' - BEFORE!", block["blocked"])
406             block["blocked"] = block["blocked"].lstrip(".").encode("idna").decode("utf-8")
407             logger.debug("block[blocked]='%s' - AFTER!", block["blocked"])
408
409             if not utils.is_domain_wanted(block["blocked"]):
410                 logger.warning("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
411                 continue
412             elif block["block_level"] in ["accept", "accepted"]:
413                 logger.debug("blocked='%s' is accepted, not wanted here - SKIPPED!", block["blocked"])
414                 continue
415             elif not instances.is_registered(block["blocked"]):
416                 logger.debug("Hash wasn't found, adding: blocked='%s',blocker='%s'", block["blocked"], blocker)
417                 federation.fetch_instances(block["blocked"], blocker, None, inspect.currentframe().f_code.co_name)
418
419             block["block_level"] = blocks.alias_block_level(block["block_level"])
420
421             if processing.block(blocker, block["blocked"], block["reason"], block["block_level"]) and block["block_level"] == "reject" and config.get("bot_enabled"):
422                 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], blocker)
423                 blockdict.append({
424                     "blocked": block["blocked"],
425                     "reason" : block["reason"],
426                 })
427
428             logger.debug("Invoking cookies.clear(%s) ...", block["blocked"])
429             cookies.clear(block["blocked"])
430
431         logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
432         if instances.has_pending(blocker):
433             logger.debug("Flushing updates for blocker='%s' ...", blocker)
434             instances.update_data(blocker)
435
436         logger.debug("Invoking commit() ...")
437         database.connection.commit()
438
439         logger.debug("Invoking cookies.clear(%s) ...", blocker)
440         cookies.clear(blocker)
441
442         logger.debug("config.get(bot_enabled)='%s',blockdict()=%d'", config.get("bot_enabled"), len(blockdict))
443         if config.get("bot_enabled") and len(blockdict) > 0:
444             logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
445             network.send_bot_post(blocker, blockdict)
446
447     logger.debug("Success! - EXIT!")
448     return 0
449
450 def fetch_observer(args: argparse.Namespace) -> int:
451     logger.debug("args[]='%s' - CALLED!", type(args))
452
453     logger.debug("Invoking locking.acquire() ...")
454     locking.acquire()
455
456     source_domain = "fediverse.observer"
457     if sources.is_recent(source_domain):
458         logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
459         return 0
460     else:
461         logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
462         sources.update(source_domain)
463
464     types = list()
465     if args.software is None:
466         logger.info("Fetching software list ...")
467         raw = utils.fetch_url(
468             f"https://{source_domain}",
469             network.web_headers,
470             (config.get("connection_timeout"), config.get("read_timeout"))
471         ).text
472         logger.debug("raw[%s]()=%d", type(raw), len(raw))
473
474         doc = bs4.BeautifulSoup(raw, features="html.parser")
475         logger.debug("doc[]='%s'", type(doc))
476
477         items = doc.find("div", {"aria-labelledby": "navbarDropdownMenuSoftwares"}).findAll("a", {"class": "dropdown-item"})
478         logger.debug("items[]='%s'", type(items))
479
480         logger.info("Checking %d menu items ...", len(items))
481         for item in items:
482             logger.debug("item[%s]='%s'", type(item), item)
483             if item.text.lower() == "all":
484                 logger.debug("Skipping 'All' menu entry ...")
485                 continue
486
487             logger.debug("Appending item.text='%s' ...", item.text)
488             types.append(tidyup.domain(item.text))
489     else:
490         logger.info("Adding args.software='%s' as type ...", args.software)
491         types.append(args.software)
492
493     logger.info("Fetching %d different table data ...", len(types))
494     for software in types:
495         logger.debug("software='%s' - BEFORE!", software)
496         if args.software is not None and args.software != software:
497             logger.debug("args.software='%s' does not match software='%s' - SKIPPED!", args.software, software)
498             continue
499
500         doc = None
501         try:
502             logger.debug("Fetching table data for software='%s' ...", software)
503             raw = utils.fetch_url(
504                 f"https://{source_domain}/app/views/tabledata.php?software={software}",
505                 network.web_headers,
506                 (config.get("connection_timeout"), config.get("read_timeout"))
507             ).text
508             logger.debug("raw[%s]()=%d", type(raw), len(raw))
509
510             doc = bs4.BeautifulSoup(raw, features="html.parser")
511             logger.debug("doc[]='%s'", type(doc))
512         except network.exceptions as exception:
513             logger.warning("Cannot fetch software='%s' from source_domain='%s': '%s'", software, source_domain, type(exception))
514             continue
515
516         items = doc.findAll("a", {"class": "url"})
517         logger.info("Checking %d items,software='%s' ...", len(items), software)
518         for item in items:
519             logger.debug("item[]='%s'", type(item))
520             domain = item.decode_contents()
521             logger.debug("domain='%s' - AFTER!", domain)
522
523             if domain == "":
524                 logger.debug("domain is empty - SKIPPED!")
525                 continue
526
527             logger.debug("domain='%s' - BEFORE!", domain)
528             domain = domain.encode("idna").decode("utf-8")
529             logger.debug("domain='%s' - AFTER!", domain)
530
531             if not utils.is_domain_wanted(domain):
532                 logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
533                 continue
534             elif instances.is_registered(domain):
535                 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
536                 continue
537             elif instances.is_recent(domain):
538                 logger.debug("domain='%s' is recently being handled - SKIPPED!", domain)
539                 continue
540
541             software = software_helper.alias(software)
542             logger.info("Fetching instances for domain='%s'", domain)
543             federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
544
545     logger.debug("Success! - EXIT!")
546     return 0
547
548 def fetch_todon_wiki(args: argparse.Namespace) -> int:
549     logger.debug("args[]='%s' - CALLED!", type(args))
550
551     logger.debug("Invoking locking.acquire() ...")
552     locking.acquire()
553
554     source_domain = "wiki.todon.eu"
555     if sources.is_recent(source_domain):
556         logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
557         return 0
558     else:
559         logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
560         sources.update(source_domain)
561
562     blocklist = {
563         "silenced": list(),
564         "reject": list(),
565     }
566
567     raw = utils.fetch_url(f"https://{source_domain}/todon/domainblocks", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))).text
568     logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
569
570     doc = bs4.BeautifulSoup(raw, "html.parser")
571     logger.debug("doc[]='%s'", type(doc))
572
573     silenced = doc.find("h3", {"id": "silencedlimited_servers"}).find_next("ul").findAll("li")
574     logger.info("Checking %d silenced/limited entries ...", len(silenced))
575     blocklist["silenced"] = utils.find_domains(silenced, "div")
576
577     suspended = doc.find("h3", {"id": "suspended_servers"}).find_next("ul").findAll("li")
578     logger.info("Checking %d suspended entries ...", len(suspended))
579     blocklist["reject"] = utils.find_domains(suspended, "div")
580
581     blocking = blocklist["silenced"] + blocklist["reject"]
582     blocker = "todon.eu"
583
584     logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
585     instances.set_total_blocks(blocker, blocking)
586
587     blockdict = list()
588     for block_level in blocklist:
589         blockers = blocklist[block_level]
590
591         logger.debug("block_level='%s',blockers()=%d'", block_level, len(blockers))
592         for blocked in blockers:
593             logger.debug("blocked='%s'", blocked)
594
595             if not instances.is_registered(blocked):
596                 try:
597                     logger.info("Fetching instances from domain='%s' ...", blocked)
598                     federation.fetch_instances(blocked, blocker, None, inspect.currentframe().f_code.co_name)
599                 except network.exceptions as exception:
600                     logger.warning("Exception '%s' during fetching instances (fetch_cs) from blocked='%s'", type(exception), blocked)
601                     instances.set_last_error(blocked, exception)
602
603             if blocks.is_instance_blocked(blocker, blocked, block_level):
604                 logger.debug("blocked='%s',block_level='%s' is already blocked - SKIPPED!", blocked, block_level)
605                 continue
606
607             logger.info("Adding new block: blocked='%s',block_level='%s'", blocked, block_level)
608             if processing.block(blocker, blocked, None, block_level) and block_level == "reject" and config.get("bot_enabled"):
609                 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", blocked, block_level, blocker)
610                 blockdict.append({
611                     "blocked": blocked,
612                     "reason" : None,
613                 })
614
615         logger.debug("Invoking commit() ...")
616         database.connection.commit()
617
618         logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
619         if config.get("bot_enabled") and len(blockdict) > 0:
620             logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
621             network.send_bot_post(blocker, blockdict)
622
623     logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
624     if instances.has_pending(blocker):
625         logger.debug("Flushing updates for blocker='%s' ...", blocker)
626         instances.update_data(blocker)
627
628     logger.debug("Success! - EXIT!")
629     return 0
630
631 def fetch_cs(args: argparse.Namespace):
632     logger.debug("args[]='%s' - CALLED!", type(args))
633
634     logger.debug("Invoking locking.acquire() ...")
635     locking.acquire()
636
637     extensions = [
638         "extra",
639         "abbr",
640         "attr_list",
641         "def_list",
642         "fenced_code",
643         "footnotes",
644         "md_in_html",
645         "admonition",
646         "codehilite",
647         "legacy_attrs",
648         "legacy_em",
649         "meta",
650         "nl2br",
651         "sane_lists",
652         "smarty",
653         "toc",
654         "wikilinks"
655     ]
656
657     blocklist = {
658         "silenced": list(),
659         "reject"  : list(),
660     }
661
662     source_domain = "raw.githubusercontent.com"
663     if sources.is_recent(source_domain):
664         logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
665         return 0
666     else:
667         logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
668         sources.update(source_domain)
669
670     raw = utils.fetch_url(f"https://{source_domain}/chaossocial/meta/master/federation.md", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))).text
671     logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
672
673     doc = bs4.BeautifulSoup(markdown.markdown(raw, extensions=extensions), features="html.parser")
674     logger.debug("doc()=%d[]='%s'", len(doc), type(doc))
675
676     silenced = doc.find("h2", {"id": "silenced-instances"}).findNext("table").find("tbody")
677     logger.debug("silenced[%s]()=%d", type(silenced), len(silenced))
678     blocklist["silenced"] = federation.find_domains(silenced)
679
680     blocked = doc.find("h2", {"id": "blocked-instances"}).findNext("table").find("tbody")
681     logger.debug("blocked[%s]()=%d", type(blocked), len(blocked))
682     blocklist["reject"] = federation.find_domains(blocked)
683
684     blocking = blocklist["silenced"] + blocklist["reject"]
685     blocker = "chaos.social"
686
687     logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
688     instances.set_total_blocks(blocker, blocking)
689
690     logger.debug("blocklist[silenced]()=%d,blocklist[reject]()=%d", len(blocklist["silenced"]), len(blocklist["reject"]))
691     if len(blocking) > 0:
692         blockdict = list()
693         for block_level in blocklist:
694             logger.info("block_level='%s' has %d row(s)", block_level, len(blocklist[block_level]))
695
696             for row in blocklist[block_level]:
697                 logger.debug("row[%s]='%s'", type(row), row)
698                 if not "domain" in row:
699                     logger.warning("row[]='%s' has no element 'domain' - SKIPPED!", type(row))
700                     continue
701                 elif instances.is_recent(row["domain"], "last_blocked"):
702                     logger.debug("row[domain]='%s' has been recently crawled - SKIPPED!", row["domain"])
703                     continue
704                 elif not instances.is_registered(row["domain"]):
705                     try:
706                         logger.info("Fetching instances from domain='%s' ...", row["domain"])
707                         federation.fetch_instances(row["domain"], blocker, None, inspect.currentframe().f_code.co_name)
708                     except network.exceptions as exception:
709                         logger.warning("Exception '%s' during fetching instances (fetch_cs) from row[domain]='%s'", type(exception), row["domain"])
710                         instances.set_last_error(row["domain"], exception)
711
712                 if processing.block(blocker, row["domain"], row["reason"], block_level) and block_level == "reject" and config.get("bot_enabled"):
713                     logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", row["domain"], block_level, blocker)
714                     blockdict.append({
715                         "blocked": row["domain"],
716                         "reason" : row["reason"],
717                     })
718
719         logger.debug("Invoking commit() ...")
720         database.connection.commit()
721
722         logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
723         if config.get("bot_enabled") and len(blockdict) > 0:
724             logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
725             network.send_bot_post(blocker, blockdict)
726
727     logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
728     if instances.has_pending(blocker):
729         logger.debug("Flushing updates for blocker='%s' ...", blocker)
730         instances.update_data(blocker)
731
732     logger.debug("Success! - EXIT!")
733     return 0
734
735 def fetch_fba_rss(args: argparse.Namespace) -> int:
736     logger.debug("args[]='%s' - CALLED!", type(args))
737
738     domains = list()
739
740     logger.debug("Invoking locking.acquire() ...")
741     locking.acquire()
742
743     components = urlparse(args.feed)
744
745     if sources.is_recent(components.netloc):
746         logger.info("API from components.netloc='%s' has recently being accessed - EXIT!", components.netloc)
747         return 0
748     else:
749         logger.debug("components.netloc='%s' has not been recently used, marking ...", components.netloc)
750         sources.update(components.netloc)
751
752     logger.info("Fetch FBA-specific RSS args.feed='%s' ...", args.feed)
753     response = utils.fetch_url(args.feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
754
755     logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
756     if response.ok and response.status_code < 300 and len(response.text) > 0:
757         logger.debug("Parsing RSS feed (%d Bytes) ...", len(response.text))
758         rss = atoma.parse_rss_bytes(response.content)
759
760         logger.debug("rss[]='%s'", type(rss))
761         for item in rss.items:
762             logger.debug("item='%s'", item)
763             domain = tidyup.domain(item.link.split("=")[1])
764
765             logger.debug("domain='%s' - AFTER!", domain)
766             if domain == "":
767                 logger.debug("domain is empty - SKIPPED!")
768                 continue
769
770             logger.debug("domain='%s' - BEFORE!", domain)
771             domain = domain.encode("idna").decode("utf-8")
772             logger.debug("domain='%s' - AFTER!", domain)
773
774             if not utils.is_domain_wanted(domain):
775                 logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
776                 continue
777             elif domain in domains:
778                 logger.debug("domain='%s' is already added - SKIPPED!", domain)
779                 continue
780             elif instances.is_registered(domain):
781                 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
782                 continue
783             elif instances.is_recent(domain):
784                 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
785                 continue
786
787             logger.debug("Adding domain='%s'", domain)
788             domains.append(domain)
789
790     logger.debug("domains()=%d", len(domains))
791     if len(domains) > 0:
792         logger.info("Adding %d new instances ...", len(domains))
793         for domain in domains:
794             logger.debug("domain='%s'", domain)
795             try:
796                 logger.info("Fetching instances from domain='%s' ...", domain)
797                 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
798             except network.exceptions as exception:
799                 logger.warning("Exception '%s' during fetching instances (fetch_fba_rss) from domain='%s'", type(exception), domain)
800                 instances.set_last_error(domain, exception)
801                 return 100
802
803     logger.debug("Success! - EXIT!")
804     return 0
805
806 def fetch_fbabot_atom(args: argparse.Namespace) -> int:
807     logger.debug("args[]='%s' - CALLED!", type(args))
808
809     logger.debug("Invoking locking.acquire() ...")
810     locking.acquire()
811
812     source_domain = "ryona.agency"
813     if sources.is_recent(source_domain):
814         logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
815         return 0
816     else:
817         logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
818         sources.update(source_domain)
819
820     feed = f"https://{source_domain}/users/fba/feed.atom"
821
822     domains = list()
823
824     logger.info("Fetching ATOM feed='%s' from FBA bot account ...", feed)
825     response = utils.fetch_url(feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
826
827     logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
828     if response.ok and response.status_code < 300 and len(response.text) > 0:
829         logger.debug("Parsing ATOM feed (%d Bytes) ...", len(response.text))
830         atom = atoma.parse_atom_bytes(response.content)
831
832         logger.debug("atom[]='%s'", type(atom))
833         for entry in atom.entries:
834             logger.debug("entry[]='%s'", type(entry))
835             doc = bs4.BeautifulSoup(entry.content.value, "html.parser")
836             logger.debug("doc[]='%s'", type(doc))
837             for element in doc.findAll("a"):
838                 logger.debug("element[]='%s'", type(element))
839                 for href in element["href"].split(","):
840                     logger.debug("href[%s]='%s' - BEFORE!", type(href), href)
841                     domain = tidyup.domain(href)
842
843                     logger.debug("domain='%s' - AFTER!", domain)
844                     if domain == "":
845                         logger.debug("domain is empty - SKIPPED!")
846                         continue
847
848                     logger.debug("domain='%s' - BEFORE!", domain)
849                     domain = domain.encode("idna").decode("utf-8")
850                     logger.debug("domain='%s' - AFTER!", domain)
851
852                     if not utils.is_domain_wanted(domain):
853                         logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
854                         continue
855                     elif domain in domains:
856                         logger.debug("domain='%s' is already added - SKIPPED!", domain)
857                         continue
858                     elif instances.is_registered(domain):
859                         logger.debug("domain='%s' is already registered - SKIPPED!", domain)
860                         continue
861                     elif instances.is_recent(domain):
862                         logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
863                         continue
864
865                     logger.debug("Adding domain='%s',domains()=%d", domain, len(domains))
866                     domains.append(domain)
867
868     logger.debug("domains()=%d", len(domains))
869     if len(domains) > 0:
870         logger.info("Adding %d new instances ...", len(domains))
871         for domain in domains:
872             logger.debug("domain='%s'", domain)
873             try:
874                 logger.info("Fetching instances from domain='%s' ...", domain)
875                 federation.fetch_instances(domain, source_domain, None, inspect.currentframe().f_code.co_name)
876             except network.exceptions as exception:
877                 logger.warning("Exception '%s' during fetching instances (fetch_fbabot_atom) from domain='%s'", type(exception), domain)
878                 instances.set_last_error(domain, exception)
879                 return 100
880
881     logger.debug("Success! - EXIT!")
882     return 0
883
884 def fetch_instances(args: argparse.Namespace) -> int:
885     logger.debug("args[]='%s' - CALLED!", type(args))
886
887     logger.debug("args.domain='%s' - checking ...", args.domain)
888     if not validators.domain(args.domain):
889         logger.warning("args.domain='%s' is not valid.", args.domain)
890         return 100
891     elif blacklist.is_blacklisted(args.domain):
892         logger.warning("args.domain='%s' is blacklisted, won't check it!", args.domain)
893         return 101
894
895     logger.debug("Invoking locking.acquire() ...")
896     locking.acquire()
897
898     # Initial fetch
899     try:
900         logger.info("Fetching instances from args.domain='%s' ...", args.domain)
901         federation.fetch_instances(args.domain, None, None, inspect.currentframe().f_code.co_name)
902     except network.exceptions as exception:
903         logger.warning("Exception '%s' during fetching instances (fetch_instances) from args.domain='%s'", type(exception), args.domain)
904         instances.set_last_error(args.domain, exception)
905         instances.update_data(args.domain)
906         return 100
907
908     if args.single:
909         logger.debug("Not fetching more instances - EXIT!")
910         return 0
911
912     # Loop through some instances
913     database.cursor.execute(
914         "SELECT domain, origin, software, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'lemmy', 'peertube', 'takahe') AND (last_instance_fetch IS NULL OR last_instance_fetch < ?) ORDER BY rowid DESC", [time.time() - config.get("recheck_instance")]
915     )
916
917     rows = database.cursor.fetchall()
918     logger.info("Checking %d entries ...", len(rows))
919     for row in rows:
920         logger.debug("row[domain]='%s'", row["domain"])
921         if row["domain"] == "":
922             logger.debug("row[domain] is empty - SKIPPED!")
923             continue
924
925         logger.debug("row[domain]='%s' - BEFORE!", row["domain"])
926         domain = row["domain"].encode("idna").decode("utf-8")
927         logger.debug("domain='%s' - AFTER!", domain)
928
929         if not utils.is_domain_wanted(domain):
930             logger.warning("Domain domain='%s' is not wanted - SKIPPED!", domain)
931             continue
932
933         try:
934             logger.info("Fetching instances for domain='%s',origin='%s',software='%s',nodeinfo_url='%s'", domain, row["origin"], row["software"], row["nodeinfo_url"])
935             federation.fetch_instances(domain, row["origin"], row["software"], inspect.currentframe().f_code.co_name, row["nodeinfo_url"])
936         except network.exceptions as exception:
937             logger.warning("Exception '%s' during fetching instances (fetch_instances) from domain='%s'", type(exception), domain)
938             instances.set_last_error(domain, exception)
939
940     logger.debug("Success - EXIT!")
941     return 0
942
943 def fetch_oliphant(args: argparse.Namespace) -> int:
944     logger.debug("args[]='%s' - CALLED!", type(args))
945
946     logger.debug("Invoking locking.acquire() ...")
947     locking.acquire()
948
949     source_domain = "codeberg.org"
950     if sources.is_recent(source_domain):
951         logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
952         return 0
953     else:
954         logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
955         sources.update(source_domain)
956
957     # Base URL
958     base_url = f"https://{source_domain}/oliphant/blocklists/raw/branch/main/blocklists"
959
960     # URLs to fetch
961     blocklists = (
962         {
963             "blocker": "artisan.chat",
964             "csv_url": "mastodon/artisan.chat.csv",
965         },{
966             "blocker": "mastodon.art",
967             "csv_url": "mastodon/mastodon.art.csv",
968         },{
969             "blocker": "pleroma.envs.net",
970             "csv_url": "mastodon/pleroma.envs.net.csv",
971         },{
972             "blocker": "oliphant.social",
973             "csv_url": "mastodon/_unified_tier3_blocklist.csv",
974         },{
975             "blocker": "mastodon.online",
976             "csv_url": "mastodon/mastodon.online.csv",
977         },{
978             "blocker": "mastodon.social",
979             "csv_url": "mastodon/mastodon.social.csv",
980         },{
981             "blocker": "mastodon.social",
982             "csv_url": "other/missing-tier0-mastodon.social.csv",
983         },{
984             "blocker": "rage.love",
985             "csv_url": "mastodon/rage.love.csv",
986         },{
987             "blocker": "sunny.garden",
988             "csv_url": "mastodon/sunny.garden.csv",
989         },{
990             "blocker": "sunny.garden",
991             "csv_url": "mastodon/gardenfence.csv",
992         },{
993             "blocker": "solarpunk.moe",
994             "csv_url": "mastodon/solarpunk.moe.csv",
995         },{
996             "blocker": "toot.wales",
997             "csv_url": "mastodon/toot.wales.csv",
998         },{
999             "blocker": "union.place",
1000             "csv_url": "mastodon/union.place.csv",
1001         },{
1002             "blocker": "oliphant.social",
1003             "csv_url": "mastodon/birdsite.csv",
1004         }
1005     )
1006
1007     domains = list()
1008
1009     logger.debug("Downloading %d files ...", len(blocklists))
1010     for block in blocklists:
1011         # Is domain given and not equal blocker?
1012         if isinstance(args.domain, str) and args.domain != block["blocker"]:
1013             logger.debug("Skipping blocker='%s', not matching args.domain='%s'", block["blocker"], args.domain)
1014             continue
1015         elif args.domain in domains:
1016             logger.debug("args.domain='%s' already handled - SKIPPED!", args.domain)
1017             continue
1018
1019         # Fetch this URL
1020         logger.info("Fetching csv_url='%s' for blocker='%s' ...", block["csv_url"], block["blocker"])
1021         response = utils.fetch_url(f"{base_url}/{block['csv_url']}", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
1022
1023         logger.debug("response.ok='%s',response.status_code=%d,response.content()=%d", response.ok, response.status_code, len(response.content))
1024         if not response.ok or response.status_code >= 300 or response.content == "":
1025             logger.warning("Could not fetch csv_url='%s' for blocker='%s' - SKIPPED!", block["csv_url"], block["blocker"])
1026             continue
1027
1028         logger.debug("Fetched %d Bytes, parsing CSV ...", len(response.content))
1029         reader = csv.DictReader(response.content.decode("utf-8").splitlines(), dialect="unix")
1030
1031         blockdict = list()
1032
1033         cnt = 0
1034         for row in reader:
1035             logger.debug("row[%s]='%s'", type(row), row)
1036             domain = severity = None
1037             reject_media = reject_reports = False
1038
1039             if "#domain" in row:
1040                 domain = row["#domain"]
1041             elif "domain" in row:
1042                 domain = row["domain"]
1043             else:
1044                 logger.debug("row='%s' does not contain domain column", row)
1045                 continue
1046
1047             if "#severity" in row:
1048                 severity = blocks.alias_block_level(row["#severity"])
1049             elif "severity" in row:
1050                 severity = blocks.alias_block_level(row["severity"])
1051             else:
1052                 logger.debug("row='%s' does not contain severity column", row)
1053                 continue
1054
1055             if "#reject_media" in row and row["#reject_media"].lower() == "true":
1056                 reject_media = True
1057             elif "reject_media" in row and row["reject_media"].lower() == "true":
1058                 reject_media = True
1059
1060             if "#reject_reports" in row and row["#reject_reports"].lower() == "true":
1061                 reject_reports = True
1062             elif "reject_reports" in row and row["reject_reports"].lower() == "true":
1063                 reject_reports = True
1064
1065             cnt = cnt + 1
1066             logger.debug("domain='%s',severity='%s',reject_media='%s',reject_reports='%s'", domain, severity, reject_media, reject_reports)
1067             if domain == "":
1068                 logger.debug("domain is empty - SKIPPED!")
1069                 continue
1070             elif domain.endswith(".onion"):
1071                 logger.debug("domain='%s' is a TOR .onion domain - SKIPPED", domain)
1072                 continue
1073             elif domain.endswith(".arpa"):
1074                 logger.debug("domain='%s' is a reverse IP address - SKIPPED", domain)
1075                 continue
1076             elif domain.endswith(".tld"):
1077                 logger.debug("domain='%s' is a fake domain - SKIPPED", domain)
1078                 continue
1079             elif domain.find("*") >= 0 or domain.find("?") >= 0:
1080                 logger.debug("domain='%s' is obfuscated - Invoking utils.deobfuscate(%s, %s) ...", domain, domain, block["blocker"])
1081                 domain = utils.deobfuscate(domain, block["blocker"])
1082                 logger.debug("domain='%s' - AFTER!", domain)
1083
1084             if not validators.domain(domain):
1085                 logger.debug("domain='%s' is not a valid domain - SKIPPED!")
1086                 continue
1087             elif blacklist.is_blacklisted(domain):
1088                 logger.warning("domain='%s' is blacklisted - SKIPPED!", domain)
1089                 continue
1090
1091             logger.debug("Marking domain='%s' as handled", domain)
1092             domains.append(domain)
1093
1094             logger.debug("Processing domain='%s' ...", domain)
1095             processed = processing.domain(domain, block["blocker"], inspect.currentframe().f_code.co_name)
1096             logger.debug("processed='%s'", processed)
1097
1098             if processing.block(block["blocker"], domain, None, severity) and config.get("bot_enabled"):
1099                 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", domain, block["block_level"], block["blocker"])
1100                 blockdict.append({
1101                     "blocked": domain,
1102                     "reason" : block["reason"],
1103                 })
1104
1105             if reject_media:
1106                 processing.block(block["blocker"], domain, None, "reject_media")
1107             if reject_reports:
1108                 processing.block(block["blocker"], domain, None, "reject_reports")
1109
1110         logger.debug("block[blocker]='%s'", block["blocker"])
1111         if block["blocker"] != "chaos.social":
1112             logger.debug("Invoking instances.set_total_blocks(%s, domains()=%d) ...", block["blocker"], len(domains))
1113             instances.set_total_blocks(block["blocker"], domains)
1114
1115         logger.debug("Checking if blocker='%s' has pending updates ...", block["blocker"])
1116         if instances.has_pending(block["blocker"]):
1117             logger.debug("Flushing updates for block[blocker]='%s' ...", block["blocker"])
1118             instances.update_data(block["blocker"])
1119
1120         logger.debug("Invoking commit() ...")
1121         database.connection.commit()
1122
1123         logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
1124         if config.get("bot_enabled") and len(blockdict) > 0:
1125             logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", block["blocker"], len(blockdict))
1126             network.send_bot_post(block["blocker"], blockdict)
1127
1128     logger.debug("Success! - EXIT!")
1129     return 0
1130
1131 def fetch_txt(args: argparse.Namespace) -> int:
1132     logger.debug("args[]='%s' - CALLED!", type(args))
1133
1134     logger.debug("Invoking locking.acquire() ...")
1135     locking.acquire()
1136
1137     # Static URLs
1138     urls = ({
1139         "blocker": "seirdy.one",
1140         "url"    : "https://seirdy.one/pb/bsl.txt",
1141     },)
1142
1143     logger.info("Checking %d text file(s) ...", len(urls))
1144     for row in urls:
1145         logger.debug("Fetching row[url]='%s' ...", row["url"])
1146         response = utils.fetch_url(row["url"], network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
1147
1148         logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
1149         if response.ok and response.status_code < 300 and response.text != "":
1150             logger.debug("Returned %d Bytes for processing", len(response.text.strip()))
1151             domains = response.text.split("\n")
1152
1153             logger.info("Processing %d domains ...", len(domains))
1154             for domain in domains:
1155                 logger.debug("domain='%s' - BEFORE!", domain)
1156                 domain = tidyup.domain(domain)
1157
1158                 logger.debug("domain='%s' - AFTER!", domain)
1159                 if domain == "":
1160                     logger.debug("domain is empty - SKIPPED!")
1161                     continue
1162                 elif not utils.is_domain_wanted(domain):
1163                     logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
1164                     continue
1165                 elif instances.is_recent(domain):
1166                     logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1167                     continue
1168
1169                 logger.debug("Processing domain='%s',row[blocker]='%s'", domain, row["blocker"])
1170                 processed = processing.domain(domain, row["blocker"], inspect.currentframe().f_code.co_name)
1171
1172                 logger.debug("processed='%s'", processed)
1173                 if not processed:
1174                     logger.debug("domain='%s' was not generically processed - SKIPPED!", domain)
1175                     continue
1176
1177     logger.debug("Success! - EXIT!")
1178     return 0
1179
1180 def fetch_fedipact(args: argparse.Namespace) -> int:
1181     logger.debug("args[]='%s' - CALLED!", type(args))
1182
1183     logger.debug("Invoking locking.acquire() ...")
1184     locking.acquire()
1185
1186     source_domain = "fedipact.online"
1187     if sources.is_recent(source_domain):
1188         logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1189         return 0
1190     else:
1191         logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1192         sources.update(source_domain)
1193
1194     response = utils.fetch_url(
1195         f"https://{source_domain}",
1196         network.web_headers,
1197         (config.get("connection_timeout"), config.get("read_timeout"))
1198     )
1199
1200     logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
1201     if response.ok and response.status_code < 300 and response.text != "":
1202         logger.debug("Parsing %d Bytes ...", len(response.text))
1203
1204         doc = bs4.BeautifulSoup(response.text, "html.parser")
1205         logger.debug("doc[]='%s'", type(doc))
1206
1207         rows = doc.findAll("li")
1208         logger.info("Checking %d row(s) ...", len(rows))
1209         for row in rows:
1210             logger.debug("row[]='%s'", type(row))
1211             domain = tidyup.domain(row.contents[0])
1212
1213             logger.debug("domain='%s' - AFTER!", domain)
1214             if domain == "":
1215                 logger.debug("domain is empty - SKIPPED!")
1216                 continue
1217
1218             logger.debug("domain='%s' - BEFORE!", domain)
1219             domain = domain.encode("idna").decode("utf-8")
1220             logger.debug("domain='%s' - AFTER!", domain)
1221
1222             if not utils.is_domain_wanted(domain):
1223                 logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
1224                 continue
1225             elif instances.is_registered(domain):
1226                 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
1227                 continue
1228             elif instances.is_recent(domain):
1229                 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1230                 continue
1231
1232             logger.info("Fetching domain='%s' ...", domain)
1233             federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
1234
1235     logger.debug("Success! - EXIT!")
1236     return 0
1237
1238 def fetch_joinfediverse(args: argparse.Namespace) -> int:
1239     logger.debug("args[]='%s' - CALLED!", type(args))
1240
1241     logger.debug("Invoking locking.acquire() ...")
1242     locking.acquire()
1243
1244     source_domain = "joinfediverse.wiki"
1245     if sources.is_recent(source_domain):
1246         logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1247         return 0
1248     else:
1249         logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1250         sources.update(source_domain)
1251
1252     raw = utils.fetch_url(
1253         f"https://{source_domain}/FediBlock",
1254         network.web_headers,
1255         (config.get("connection_timeout"), config.get("read_timeout"))
1256     ).text
1257     logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
1258
1259     doc = bs4.BeautifulSoup(raw, "html.parser")
1260     logger.debug("doc[]='%s'", type(doc))
1261
1262     tables = doc.findAll("table", {"class": "wikitable"})
1263
1264     logger.info("Analyzing %d table(s) ...", len(tables))
1265     blocklist = list()
1266     for table in tables:
1267         logger.debug("table[]='%s'", type(table))
1268
1269         rows = table.findAll("tr")
1270         logger.info("Checking %d row(s) ...", len(rows))
1271         block_headers = dict()
1272         for row in rows:
1273             logger.debug("row[%s]='%s'", type(row), row)
1274
1275             headers = row.findAll("th")
1276             logger.debug("Found headers()=%d header(s)", len(headers))
1277             if len(headers) > 1:
1278                 block_headers = dict()
1279                 cnt = 0
1280                 for header in headers:
1281                     cnt = cnt + 1
1282                     logger.debug("header[]='%s',cnt=%d", type(header), cnt)
1283                     text = header.contents[0]
1284
1285                     logger.debug("text[]='%s'", type(text))
1286                     if not isinstance(text, str):
1287                         logger.debug("text[]='%s' is not of type 'str' - SKIPPED!", type(text))
1288                         continue
1289                     elif validators.domain(text.strip()):
1290                         logger.debug("text='%s' is a domain - SKIPPED!", text.strip())
1291                         continue
1292
1293                     text = tidyup.domain(text.strip())
1294                     logger.debug("text='%s'", text)
1295                     if text in ["domain", "instance", "subdomain(s)", "block reason(s)"]:
1296                         logger.debug("Found header: '%s'=%d", text, cnt)
1297                         block_headers[cnt] = text
1298
1299             elif len(block_headers) == 0:
1300                 logger.debug("row is not scrapable - SKIPPED!")
1301                 continue
1302             elif len(block_headers) > 0:
1303                 logger.debug("Found a row with %d scrapable headers ...", len(block_headers))
1304                 cnt = 0
1305                 block = dict()
1306
1307                 for element in row.find_all(["th", "td"]):
1308                     cnt = cnt + 1
1309                     logger.debug("element[]='%s',cnt=%d", type(element), cnt)
1310                     if cnt in block_headers:
1311                         logger.debug("block_headers[%d]='%s'", cnt, block_headers[cnt])
1312
1313                         text = element.text.strip()
1314                         key = block_headers[cnt] if block_headers[cnt] not in ["domain", "instance"] else "blocked"
1315
1316                         logger.debug("cnt=%d is wanted: key='%s',text[%s]='%s'", cnt, key, type(text), text)
1317                         if key in ["domain", "instance"]:
1318                             block[key] = text
1319                         elif key == "reason":
1320                             block[key] = tidyup.reason(text)
1321                         elif key == "subdomain(s)":
1322                             block[key] = list()
1323                             if text != "":
1324                                 block[key] = text.split("/")
1325                         else:
1326                             logger.debug("key='%s'", key)
1327                             block[key] = text
1328
1329                 logger.debug("block()=%d ...", len(block))
1330                 if len(block) > 0:
1331                     logger.debug("Appending block()=%d ...", len(block))
1332                     blocklist.append(block)
1333
1334     logger.debug("blocklist()=%d", len(blocklist))
1335
1336     database.cursor.execute("SELECT domain FROM instances WHERE domain LIKE 'climatejustice.%'")
1337     domains = database.cursor.fetchall()
1338
1339     logger.debug("domains(%d)[]='%s'", len(domains), type(domains))
1340     blocking = list()
1341     for block in blocklist:
1342         logger.debug("block='%s'", block)
1343         if "subdomain(s)" in block and len(block["subdomain(s)"]) > 0:
1344             origin = block["blocked"]
1345             logger.debug("origin='%s'", origin)
1346             for subdomain in block["subdomain(s)"]:
1347                 block["blocked"] = subdomain + "." + origin
1348                 logger.debug("block[blocked]='%s'", block["blocked"])
1349                 blocking.append(block)
1350         else:
1351             blocking.append(block)
1352
1353     logger.debug("blocking()=%d", blocking)
1354     for block in blocking:
1355         logger.debug("block[]='%s'", type(block))
1356         block["blocked"] = tidyup.domain(block["blocked"])
1357
1358         logger.debug("block[blocked]='%s' - AFTER!", block["blocked"])
1359         if block["blocked"] == "":
1360             logger.debug("block[blocked] is empty - SKIPPED!")
1361             continue
1362         elif not utils.is_domain_wanted(block["blocked"]):
1363             logger.warning("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
1364             continue
1365         elif instances.is_recent(block["blocked"]):
1366             logger.debug("blocked='%s' has been recently checked - SKIPPED!", block["blocked"])
1367             continue
1368
1369         logger.info("Proccessing blocked='%s' ...", block["blocked"])
1370         processing.domain(block["blocked"], "climatejustice.social", inspect.currentframe().f_code.co_name)
1371
1372     blockdict = list()
1373     for blocker in domains:
1374         blocker = blocker[0]
1375         logger.debug("blocker[%s]='%s'", type(blocker), blocker)
1376
1377         for block in blocking:
1378             logger.debug("block[blocked]='%s',block[block reason(s)]='%s' - BEFORE!", block["blocked"], block["block reason(s)"] if "block reason(s)" in block else None)
1379             block["reason"] = tidyup.reason(block["block reason(s)"]) if "block reason(s)" in block else None
1380
1381             logger.debug("block[blocked]='%s',block[reason]='%s' - AFTER!", block["blocked"], block["reason"])
1382             if block["blocked"] == "":
1383                 logger.debug("block[blocked] is empty - SKIPPED!")
1384                 continue
1385             elif not utils.is_domain_wanted(block["blocked"]):
1386                 logger.warning("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
1387                 continue
1388
1389             logger.debug("blocked='%s',reason='%s'", block["blocked"], block["reason"])
1390             if processing.block(blocker, block["blocked"], block["reason"], "reject") and config.get("bot_enabled"):
1391                 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], blocker)
1392                 blockdict.append({
1393                     "blocked": block["blocked"],
1394                     "reason" : block["reason"],
1395                 })
1396
1397         if instances.has_pending(blocker):
1398             logger.debug("Flushing updates for blocker='%s' ...", blocker)
1399             instances.update_data(blocker)
1400
1401         logger.debug("Invoking commit() ...")
1402         database.connection.commit()
1403
1404         logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
1405         if config.get("bot_enabled") and len(blockdict) > 0:
1406             logger.info("Sending bot POST for blocker='%s,blockdict()=%d ...", blocker, len(blockdict))
1407             network.send_bot_post(blocker, blockdict)
1408
1409     logger.debug("Success! - EXIT!")
1410     return 0
1411
1412 def recheck_obfuscation(args: argparse.Namespace) -> int:
1413     logger.debug("args[]='%s' - CALLED!", type(args))
1414
1415     logger.debug("Invoking locking.acquire() ...")
1416     locking.acquire()
1417
1418     if isinstance(args.domain, str) and args.domain != "" and utils.is_domain_wanted(args.domain):
1419         database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1 AND domain = ?", [args.domain])
1420     elif isinstance(args.software, str) and args.software != "" and validators.domain(args.software) == args.software:
1421         database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1 AND software = ?", [args.software])
1422     else:
1423         database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1")
1424
1425     rows = database.cursor.fetchall()
1426     logger.info("Checking %d domains ...", len(rows))
1427     for row in rows:
1428         logger.debug("Fetching peers from domain='%s',software='%s',nodeinfo_url='%s' ...", row["domain"], row["software"], row["nodeinfo_url"])
1429         if (args.all is None or not args.all) and instances.is_recent(row["domain"]) and args.domain is None and args.software is None:
1430             logger.debug("row[domain]='%s' has been recently checked, args.all[]='%s' - SKIPPED!", row["domain"], type(args.all))
1431             continue
1432
1433         blocking = list()
1434         if row["software"] == "pleroma":
1435             logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1436             blocking = pleroma.fetch_blocks(row["domain"], row["nodeinfo_url"])
1437         elif row["software"] == "mastodon":
1438             logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1439             blocking = mastodon.fetch_blocks(row["domain"], row["nodeinfo_url"])
1440         elif row["software"] == "lemmy":
1441             logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1442             blocking = lemmy.fetch_blocks(row["domain"], row["nodeinfo_url"])
1443         elif row["software"] == "friendica":
1444             logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1445             blocking = friendica.fetch_blocks(row["domain"])
1446         elif row["software"] == "misskey":
1447             logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1448             blocking = misskey.fetch_blocks(row["domain"])
1449         else:
1450             logger.warning("Unknown sofware: domain='%s',software='%s'", row["domain"], row["software"])
1451
1452         logger.debug("row[domain]='%s'", row["domain"])
1453         if row["domain"] != "chaos.social":
1454             logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", row["domain"], len(blocking))
1455             instances.set_total_blocks(row["domain"], blocking)
1456
1457         logger.info("Checking %d block(s) from domain='%s' ...", len(blocking), row["domain"])
1458         obfuscated = 0
1459         blockdict = list()
1460         for block in blocking:
1461             logger.debug("block[blocked]='%s'", block["blocked"])
1462             blocked = None
1463
1464             if block["blocked"] == "":
1465                 logger.debug("block[blocked] is empty - SKIPPED!")
1466                 continue
1467             elif block["blocked"].endswith(".arpa"):
1468                 logger.debug("blocked='%s' is a reversed IP address - SKIPPED!", block["blocked"])
1469                 continue
1470             elif block["blocked"].endswith(".tld"):
1471                 logger.debug("blocked='%s' is a fake domain name - SKIPPED!", block["blocked"])
1472                 continue
1473             elif block["blocked"].endswith(".onion"):
1474                 logger.debug("blocked='%s' is a TOR onion domain name - SKIPPED!", block["blocked"])
1475                 continue
1476             elif block["blocked"].find("*") >= 0 or block["blocked"].find("?") >= 0:
1477                 logger.debug("block='%s' is obfuscated.", block["blocked"])
1478                 obfuscated = obfuscated + 1
1479                 blocked = utils.deobfuscate(block["blocked"], row["domain"], block["hash"] if "hash" in block else None)
1480             elif not utils.is_domain_wanted(block["blocked"]):
1481                 logger.warning("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
1482                 continue
1483             elif blocks.is_instance_blocked(row["domain"], block["blocked"]):
1484                 logger.debug("blocked='%s' is already blocked - SKIPPED!", block["blocked"])
1485                 continue
1486
1487             logger.debug("blocked[%s]='%s',block[blocked]='%s'", type(blocked), blocked, block["blocked"])
1488             if blocked is not None and blocked != block["blocked"]:
1489                 logger.debug("blocked='%s' was deobfuscated to blocked='%s'", block["blocked"], blocked)
1490                 obfuscated = obfuscated - 1
1491                 if blocks.is_instance_blocked(row["domain"], blocked):
1492                     logger.debug("blocked='%s' is already blocked by domain='%s' - SKIPPED!", blocked, row["domain"])
1493                     continue
1494
1495                 block["block_level"] = blocks.alias_block_level(block["block_level"])
1496
1497                 logger.info("blocked='%s' has been deobfuscated to blocked='%s', adding ...", block["blocked"], blocked)
1498                 if processing.block(row["domain"], blocked, block["reason"], block["block_level"]) and block["block_level"] == "reject" and config.get("bot_enabled"):
1499                     logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], row["domain"])
1500                     blockdict.append({
1501                         "blocked": blocked,
1502                         "reason" : block["reason"],
1503                     })
1504
1505         logger.info("domain='%s' has %d obfuscated domain(s)", row["domain"], obfuscated)
1506         if obfuscated == 0 and len(blocking) > 0:
1507             logger.info("Block list from domain='%s' has been fully deobfuscated.", row["domain"])
1508             instances.set_has_obfuscation(row["domain"], False)
1509
1510         if instances.has_pending(row["domain"]):
1511             logger.debug("Flushing updates for blocker='%s' ...", row["domain"])
1512             instances.update_data(row["domain"])
1513
1514         logger.debug("Invoking commit() ...")
1515         database.connection.commit()
1516
1517         logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
1518         if config.get("bot_enabled") and len(blockdict) > 0:
1519             logger.info("Sending bot POST for blocker='%s,blockdict()=%d ...", row["domain"], len(blockdict))
1520             network.send_bot_post(row["domain"], blockdict)
1521
1522     logger.debug("Success! - EXIT!")
1523     return 0
1524
1525 def fetch_fedilist(args: argparse.Namespace) -> int:
1526     logger.debug("args[]='%s' - CALLED!", type(args))
1527
1528     logger.debug("Invoking locking.acquire() ...")
1529     locking.acquire()
1530
1531     source_domain = "demo.fedilist.com"
1532     if sources.is_recent(source_domain):
1533         logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1534         return 0
1535     else:
1536         logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1537         sources.update(source_domain)
1538
1539     url = f"http://{source_domain}/instance/csv?onion=not"
1540     if args.software is not None and args.software != "":
1541         logger.debug("args.software='%s'", args.software)
1542         url = f"http://{source_domain}/instance/csv?software={args.software}&onion=not"
1543
1544     logger.info("Fetching url='%s' ...", url)
1545     response = reqto.get(
1546         url,
1547         headers=network.web_headers,
1548         timeout=(config.get("connection_timeout"), config.get("read_timeout")),
1549         allow_redirects=False
1550     )
1551
1552     logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
1553     if not response.ok or response.status_code >= 300 or len(response.content) == 0:
1554         logger.warning("Failed fetching url='%s': response.ok='%s',response.status_code=%d,response.content()=%d - EXIT!", response.ok, response.status_code, len(response.text))
1555         return 1
1556
1557     reader = csv.DictReader(response.content.decode("utf-8").splitlines(), dialect="unix")
1558
1559     logger.debug("reader[]='%s'", type(reader))
1560     blockdict = list()
1561     for row in reader:
1562         logger.debug("row[]='%s'", type(row))
1563         domain = tidyup.domain(row["hostname"])
1564         logger.debug("domain='%s' - AFTER!", domain)
1565
1566         if domain == "":
1567             logger.debug("domain is empty after tidyup: row[hostname]='%s' - SKIPPED!", row["hostname"])
1568             continue
1569
1570         logger.debug("domain='%s' - BEFORE!", domain)
1571         domain = domain.encode("idna").decode("utf-8")
1572         logger.debug("domain='%s' - AFTER!", domain)
1573
1574         if not utils.is_domain_wanted(domain):
1575             logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
1576             continue
1577         elif (args.all is None or not args.all) and instances.is_registered(domain):
1578             logger.debug("domain='%s' is already registered, --all not specified: args.all[]='%s'", type(args.all))
1579             continue
1580         elif instances.is_recent(domain):
1581             logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1582             continue
1583
1584         logger.info("Fetching instances from domain='%s' ...", domain)
1585         federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
1586
1587     logger.debug("Success! - EXIT!")
1588     return 0
1589
1590 def update_nodeinfo(args: argparse.Namespace) -> int:
1591     logger.debug("args[]='%s' - CALLED!", type(args))
1592
1593     logger.debug("Invoking locking.acquire() ...")
1594     locking.acquire()
1595
1596     if args.domain is not None and args.domain != "":
1597         logger.debug("Fetching args.domain='%s'", args.domain)
1598         database.cursor.execute("SELECT domain, software FROM instances WHERE domain = ?", [args.domain])
1599     elif args.software is not None and args.software != "":
1600         logger.info("Fetching domains for args.software='%s'", args.software)
1601         database.cursor.execute("SELECT domain, software FROM instances WHERE software = ?", [args.software])
1602     else:
1603         logger.info("Fetching domains for recently updated ...")
1604         database.cursor.execute("SELECT domain, software FROM instances WHERE last_nodeinfo < ? OR last_nodeinfo IS NULL", [time.time() - config.get("recheck_nodeinfo")])
1605
1606     domains = database.cursor.fetchall()
1607
1608     logger.info("Checking %d domain(s) ...", len(domains))
1609     cnt = 0
1610     for row in domains:
1611         logger.debug("row[]='%s'", type(row))
1612         try:
1613             logger.info("Checking nodeinfo for row[domain]='%s',row[software]='%s' (%s%%) ...", row["domain"], row["software"], "{:5.1f}".format(cnt / len(domains) * 100))
1614             software = federation.determine_software(row["domain"])
1615
1616             logger.debug("Determined software='%s'", software)
1617             if (software != row["software"] and software is not None) or args.force is True:
1618                 logger.warning("Software type for row[domain]='%s' has changed from '%s' to '%s'!", row["domain"], row["software"], software)
1619                 instances.set_software(row["domain"], software)
1620
1621             instances.set_success(row["domain"])
1622         except network.exceptions as exception:
1623             logger.warning("Exception '%s' during updating nodeinfo for row[domain]='%s'", type(exception), row["domain"])
1624             instances.set_last_error(row["domain"], exception)
1625
1626         instances.set_last_nodeinfo(row["domain"])
1627         instances.update_data(row["domain"])
1628         cnt = cnt + 1
1629
1630     logger.debug("Success! - EXIT!")
1631     return 0
1632
1633 def fetch_instances_social(args: argparse.Namespace) -> int:
1634     logger.debug("args[]='%s' - CALLED!", type(args))
1635
1636     logger.debug("Invoking locking.acquire() ...")
1637     locking.acquire()
1638
1639     source_domain = "instances.social"
1640
1641     if config.get("instances_social_api_key") == "":
1642         logger.error("API key not set. Please set in your config.json file.")
1643         return 1
1644     elif sources.is_recent(source_domain):
1645         logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1646         return 0
1647     else:
1648         logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1649         sources.update(source_domain)
1650
1651     headers = {
1652         "Authorization": f"Bearer {config.get('instances_social_api_key')}",
1653     }
1654
1655     fetched = network.get_json_api(
1656         source_domain,
1657         "/api/1.0/instances/list?count=0&sort_by=name",
1658         headers,
1659         (config.get("connection_timeout"), config.get("read_timeout"))
1660     )
1661     logger.debug("fetched[]='%s'", type(fetched))
1662
1663     if "error_message" in fetched:
1664         logger.warning("Error during fetching API result: '%s' - EXIT!", fetched["error_message"])
1665         return 2
1666     elif "exception" in fetched:
1667         logger.warning("Exception '%s' during fetching API result - EXIT!", type(fetched["exception"]))
1668         return 3
1669     elif "json" not in fetched:
1670         logger.warning("fetched has no element 'json' - EXIT!")
1671         return 4
1672     elif "instances" not in fetched["json"]:
1673         logger.warning("fetched[row] has no element 'instances' - EXIT!")
1674         return 5
1675
1676     domains = list()
1677     rows = fetched["json"]["instances"]
1678
1679     logger.info("Checking %d row(s) ...", len(rows))
1680     for row in rows:
1681         logger.debug("row[]='%s'", type(row))
1682         domain = tidyup.domain(row["name"])
1683         logger.debug("domain='%s' - AFTER!", domain)
1684
1685         if domain == "":
1686             logger.debug("domain is empty - SKIPPED!")
1687             continue
1688
1689         logger.debug("domain='%s' - BEFORE!", domain)
1690         domain = domain.encode("idna").decode("utf-8")
1691         logger.debug("domain='%s' - AFTER!", domain)
1692
1693         if not utils.is_domain_wanted(domain):
1694             logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
1695             continue
1696         elif domain in domains:
1697             logger.debug("domain='%s' is already added - SKIPPED!", domain)
1698             continue
1699         elif instances.is_registered(domain):
1700             logger.debug("domain='%s' is already registered - SKIPPED!", domain)
1701             continue
1702         elif instances.is_recent(domain):
1703             logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1704             continue
1705
1706         logger.info("Fetching instances from domain='%s'", domain)
1707         federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
1708
1709     logger.debug("Success! - EXIT!")
1710     return 0
1711
1712 def convert_idna(args: argparse.Namespace) -> int:
1713     logger.debug("args[]='%s' - CALLED!", type(args))
1714
1715     database.cursor.execute("SELECT domain FROM instances WHERE domain NOT LIKE '%xn--%' ORDER BY domain ASC")
1716     rows = database.cursor.fetchall()
1717
1718     logger.debug("rows[]='%s'", type(rows))
1719     instances.translate_idnas(rows, "domain")
1720
1721     database.cursor.execute("SELECT origin FROM instances WHERE origin NOT LIKE '%xn--%' ORDER BY origin ASC")
1722     rows = database.cursor.fetchall()
1723
1724     logger.debug("rows[]='%s'", type(rows))
1725     instances.translate_idnas(rows, "origin")
1726
1727     database.cursor.execute("SELECT blocker FROM blocks WHERE blocker NOT LIKE '%xn--%' ORDER BY blocker ASC")
1728     rows = database.cursor.fetchall()
1729
1730     logger.debug("rows[]='%s'", type(rows))
1731     blocks.translate_idnas(rows, "blocker")
1732
1733     database.cursor.execute("SELECT blocked FROM blocks WHERE blocked NOT LIKE '%xn--%' ORDER BY blocked ASC")
1734     rows = database.cursor.fetchall()
1735
1736     logger.debug("rows[]='%s'", type(rows))
1737     blocks.translate_idnas(rows, "blocked")
1738
1739     logger.debug("Success! - EXIT!")
1740     return 0