]> git.mxchange.org Git - fba.git/blob - fba/commands.py
Continued:
[fba.git] / fba / commands.py
1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
3 #
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
8 #
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 # GNU Affero General Public License for more details.
13 #
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program.  If not, see <https://www.gnu.org/licenses/>.
16
17 import csv
18 import inspect
19 import json
20 import logging
21 import time
22
23 from urllib.parse import urlparse
24
25 import argparse
26 import atoma
27 import bs4
28 import markdown
29 import reqto
30 import validators
31
32 from fba import csrf
33 from fba import database
34 from fba import utils
35
36 from fba.helpers import blacklist
37 from fba.helpers import config
38 from fba.helpers import cookies
39 from fba.helpers import locking
40 from fba.helpers import software as software_helper
41 from fba.helpers import tidyup
42
43 from fba.http import federation
44 from fba.http import network
45
46 from fba.models import blocks
47 from fba.models import instances
48 from fba.models import sources
49
50 from fba.networks import friendica
51 from fba.networks import lemmy
52 from fba.networks import mastodon
53 from fba.networks import misskey
54 from fba.networks import pleroma
55
56 logging.basicConfig(level=logging.INFO)
57 logger = logging.getLogger(__name__)
58 #logger.setLevel(logging.DEBUG)
59
60 def check_instance(args: argparse.Namespace) -> int:
61     logger.debug("args.domain='%s' - CALLED!", args.domain)
62     status = 0
63     if not validators.domain(args.domain):
64         logger.warning("args.domain='%s' is not valid", args.domain)
65         status = 100
66     elif blacklist.is_blacklisted(args.domain):
67         logger.warning("args.domain='%s' is blacklisted", args.domain)
68         status = 101
69     elif instances.is_registered(args.domain):
70         logger.warning("args.domain='%s' is already registered", args.domain)
71         status = 102
72     else:
73         logger.info("args.domain='%s' is not known", args.domain)
74
75     logger.debug("status=%d - EXIT!", status)
76     return status
77
78 def check_nodeinfo(args: argparse.Namespace) -> int:
79     logger.debug("args[]='%s' - CALLED!", type(args))
80
81     # Fetch rows
82     database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE nodeinfo_url IS NOT NULL ORDER BY domain ASC")
83
84     cnt = 0
85     for row in database.cursor.fetchall():
86         logger.debug("Checking row[domain]='%s',row[software]='%s',row[nodeinfo_url]='%s' ...", row["domain"], row["software"], row["nodeinfo_url"])
87         punycode = row["domain"].encode("idna").decode("utf-8")
88
89         if row["nodeinfo_url"].startswith("/"):
90             logger.debug("row[nodeinfo_url]='%s' is a relative URL and always matches", row["nodeinfo_url"])
91             continue
92         elif row["nodeinfo_url"].find(punycode) == -1 and row["nodeinfo_url"].find(row["domain"]) == -1:
93             logger.warning("punycode='%s' is not found in row[nodeinfo_url]='%s',row[software]='%s'", punycode, row["nodeinfo_url"], row["software"])
94             cnt = cnt + 1
95
96     logger.info("Found %d row(s)", cnt)
97
98     logger.debug("EXIT!")
99     return 0
100
101 def fetch_pixelfed_api(args: argparse.Namespace) -> int:
102     logger.debug("args[]='%s' - CALLED!", type(args))
103
104     # No CSRF by default, you don't have to add network.source_headers by yourself here
105     headers = tuple()
106     source_domain = "pixelfed.org"
107
108     if sources.is_recent(source_domain):
109         logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
110         return 0
111     else:
112         logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
113         sources.update(source_domain)
114
115     try:
116         logger.debug("Checking CSRF from source_domain='%s' ...", source_domain)
117         headers = csrf.determine(source_domain, dict())
118     except network.exceptions as exception:
119         logger.warning("Exception '%s' during checking CSRF (fetch_peers,%s) - EXIT!", type(exception), __name__)
120         return list()
121
122     try:
123         logger.debug("Fetching JSON from pixelfed.org API, headers()=%d ...", len(headers))
124         fetched = network.get_json_api(
125             source_domain,
126             "/api/v1/servers/all.json?scope=All&country=all&language=all",
127             headers,
128             (config.get("connection_timeout"), config.get("read_timeout"))
129         )
130
131         logger.debug("JSON API returned %d elements", len(fetched))
132         if "error_message" in fetched:
133             logger.warning("API returned error_message='%s' - EXIT!", fetched["error_message"])
134             return 101
135         elif "data" not in fetched["json"]:
136             logger.warning("API did not return JSON with 'data' element - EXIT!")
137             return 102
138
139         rows = fetched["json"]["data"]
140         logger.info("Checking %d fetched rows ...", len(rows))
141         for row in rows:
142             logger.debug("row[]='%s'", type(row))
143             if "domain" not in row:
144                 logger.warning("row='%s' does not contain element 'domain' - SKIPPED!", row)
145                 continue
146             elif row["domain"] == "":
147                 logger.debug("row[domain] is empty - SKIPPED!")
148                 continue
149
150             logger.debug("row[domain]='%s' - BEFORE!", row["domain"])
151             row["domain"] = row["domain"].encode("idna").decode("utf-8")
152             logger.debug("row[domain]='%s' - AFTER!", row["domain"])
153
154             if not utils.is_domain_wanted(row["domain"]):
155                 logger.warning("row[domain]='%s' is not wanted - SKIPPED!", row["domain"])
156                 continue
157             elif instances.is_registered(row["domain"]):
158                 logger.debug("row[domain]='%s' is already registered - SKIPPED!", row["domain"])
159                 continue
160             elif instances.is_recent(row["domain"]):
161                 logger.debug("row[domain]='%s' has been recently crawled - SKIPPED!", row["domain"])
162                 continue
163
164             logger.debug("Fetching instances from row[domain]='%s' ...", row["domain"])
165             federation.fetch_instances(row["domain"], None, None, inspect.currentframe().f_code.co_name)
166
167     except network.exceptions as exception:
168         logger.warning("Cannot fetch graphql,exception[%s]:'%s' - EXIT!", type(exception), str(exception))
169         return 103
170
171     logger.debug("Success! - EXIT!")
172     return 0
173
174 def fetch_bkali(args: argparse.Namespace) -> int:
175     logger.debug("args[]='%s' - CALLED!", type(args))
176
177     logger.debug("Invoking locking.acquire() ...")
178     locking.acquire()
179
180     source_domain = "gql.api.bka.li"
181     if sources.is_recent(source_domain):
182         logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
183         return 0
184     else:
185         logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
186         sources.update(source_domain)
187
188     domains = list()
189     try:
190         logger.info("Fetching domainlist from source_domain='%s' ...", source_domain)
191         fetched = network.post_json_api(
192             source_domain,
193             "/v1/graphql",
194             json.dumps({
195                 "query": "query domainlist {nodeinfo(order_by: {domain: asc}) {domain}}"
196             })
197         )
198
199         logger.debug("fetched[]='%s'", type(fetched))
200         if "error_message" in fetched:
201             logger.warning("post_json_api() for 'gql.sources.bka.li' returned error message='%s", fetched["error_message"])
202             return 100
203         elif isinstance(fetched["json"], dict) and "error" in fetched["json"] and "message" in fetched["json"]["error"]:
204             logger.warning("post_json_api() returned error: '%s", fetched["error"]["message"])
205             return 101
206
207         rows = fetched["json"]
208
209         logger.debug("rows(%d)[]='%s'", len(rows), type(rows))
210         if len(rows) == 0:
211             raise Exception("WARNING: Returned no records")
212         elif "data" not in rows:
213             raise Exception(f"WARNING: rows()={len(rows)} does not contain key 'data'")
214         elif "nodeinfo" not in rows["data"]:
215             raise Exception(f"WARNING: rows()={len(rows['data'])} does not contain key 'nodeinfo'")
216
217         for entry in rows["data"]["nodeinfo"]:
218             logger.debug("entry[%s]='%s'", type(entry), entry)
219             if "domain" not in entry:
220                 logger.warning("entry()=%d does not contain 'domain' - SKIPPED!", len(entry))
221                 continue
222             elif entry["domain"] == "":
223                 logger.debug("entry[domain] is empty - SKIPPED!")
224                 continue
225             elif not utils.is_domain_wanted(entry["domain"]):
226                 logger.warning("entry[domain]='%s' is not wanted - SKIPPED!", entry["domain"])
227                 continue
228             elif instances.is_registered(entry["domain"]):
229                 logger.debug("entry[domain]='%s' is already registered - SKIPPED!", entry["domain"])
230                 continue
231             elif instances.is_recent(entry["domain"]):
232                 logger.debug("entry[domain]='%s' has been recently crawled - SKIPPED!", entry["domain"])
233                 continue
234
235             logger.debug("Adding domain='%s' ...", entry["domain"])
236             domains.append(entry["domain"])
237
238     except network.exceptions as exception:
239         logger.warning("Cannot fetch graphql,exception[%s]:'%s' - EXIT!", type(exception), str(exception))
240         return 102
241
242     logger.debug("domains()=%d", len(domains))
243     if len(domains) > 0:
244         logger.info("Adding %d new instances ...", len(domains))
245         for domain in domains:
246             logger.debug("domain='%s' - BEFORE!", domain)
247             domain = domain.encode("idna").decode("utf-8")
248             logger.debug("domain='%s' - AFTER!", domain)
249
250             try:
251                 logger.info("Fetching instances from domain='%s' ...", domain)
252                 federation.fetch_instances(domain, 'tak.teleyal.blog', None, inspect.currentframe().f_code.co_name)
253             except network.exceptions as exception:
254                 logger.warning("Exception '%s' during fetching instances (fetch_bkali) from domain='%s'", type(exception), domain)
255                 instances.set_last_error(domain, exception)
256                 return 100
257
258     logger.debug("Success - EXIT!")
259     return 0
260
261 def fetch_blocks(args: argparse.Namespace) -> int:
262     logger.debug("args[]='%s' - CALLED!", type(args))
263     if args.domain is not None and args.domain != "":
264         logger.debug("args.domain='%s' - checking ...", args.domain)
265         if not validators.domain(args.domain):
266             logger.warning("args.domain='%s' is not valid.", args.domain)
267             return 100
268         elif blacklist.is_blacklisted(args.domain):
269             logger.warning("args.domain='%s' is blacklisted, won't check it!", args.domain)
270             return 101
271         elif not instances.is_registered(args.domain):
272             logger.warning("args.domain='%s' is not registered, please run ./utils.py fetch_instances '%s' first.", args.domain, args.domain)
273             return 102
274
275     logger.debug("Invoking locking.acquire() ...")
276     locking.acquire()
277
278     if args.domain is not None and args.domain != "":
279         # Re-check single domain
280         logger.debug("Querying database for single args.domain='%s' ...", args.domain)
281         database.cursor.execute(
282             "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE domain = ?", [args.domain]
283         )
284     elif args.software is not None and args.software != "":
285         # Re-check single software
286         logger.debug("Querying database for args.software='%s' ...", args.software)
287         database.cursor.execute(
288             "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software = ? AND nodeinfo_url IS NOT NULL", [args.software]
289         )
290     else:
291         # Re-check after "timeout" (aka. minimum interval)
292         database.cursor.execute(
293             "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'lemmy', 'friendica', 'misskey') AND (last_blocked IS NULL OR last_blocked < ?) AND nodeinfo_url IS NOT NULL ORDER BY rowid DESC", [time.time() - config.get("recheck_block")]
294         )
295
296     rows = database.cursor.fetchall()
297     logger.info("Checking %d entries ...", len(rows))
298     for blocker, software, origin, nodeinfo_url in rows:
299         logger.debug("blocker='%s',software='%s',origin='%s',nodeinfo_url='%s'", blocker, software, origin, nodeinfo_url)
300         blocker = tidyup.domain(blocker)
301         logger.debug("blocker='%s' - AFTER!", blocker)
302
303         if blocker == "":
304             logger.warning("blocker is now empty!")
305             continue
306         elif nodeinfo_url is None or nodeinfo_url == "":
307             logger.debug("blocker='%s',software='%s' has empty nodeinfo_url", blocker, software)
308             continue
309         elif not utils.is_domain_wanted(blocker):
310             logger.warning("blocker='%s' is not wanted - SKIPPED!", blocker)
311             continue
312
313         logger.debug("blocker='%s'", blocker)
314         instances.set_last_blocked(blocker)
315         instances.set_has_obfuscation(blocker, False)
316
317         blocking = list()
318         blockdict = list()
319         if software == "pleroma":
320             logger.info("blocker='%s',software='%s'", blocker, software)
321             blocking = pleroma.fetch_blocks(blocker, nodeinfo_url)
322         elif software == "mastodon":
323             logger.info("blocker='%s',software='%s'", blocker, software)
324             blocking = mastodon.fetch_blocks(blocker, nodeinfo_url)
325         elif software == "lemmy":
326             logger.info("blocker='%s',software='%s'", blocker, software)
327             blocking = lemmy.fetch_blocks(blocker, nodeinfo_url)
328         elif software == "friendica":
329             logger.info("blocker='%s',software='%s'", blocker, software)
330             blocking = friendica.fetch_blocks(blocker)
331         elif software == "misskey":
332             logger.info("blocker='%s',software='%s'", blocker, software)
333             blocking = misskey.fetch_blocks(blocker)
334         else:
335             logger.warning("Unknown software: blocker='%s',software='%s'", blocker, software)
336
337         logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
338         instances.set_total_blocks(blocker, blocking)
339
340         logger.info("Checking %d entries from blocker='%s',software='%s' ...", len(blocking), blocker, software)
341         blockdict = list()
342         for block in blocking:
343             logger.debug("blocked='%s',block_level='%s',reason='%s'", block["blocked"], block["block_level"], block["reason"])
344
345             if block["block_level"] == "":
346                 logger.warning("block_level is empty, blocker='%s',blocked='%s'", block["blocker"], block["blocked"])
347                 continue
348
349             logger.debug("blocked='%s',reason='%s' - BEFORE!", block["blocked"], block["reason"])
350             block["blocked"] = tidyup.domain(block["blocked"])
351             block["reason"]  = tidyup.reason(block["reason"]) if block["reason"] is not None and block["reason"] != "" else None
352             logger.debug("blocked='%s',reason='%s' - AFTER!", block["blocked"], block["reason"])
353
354             if block["blocked"] == "":
355                 logger.warning("blocked is empty, blocker='%s'", blocker)
356                 continue
357             elif block["blocked"].endswith(".onion"):
358                 logger.debug("blocked='%s' is a TOR .onion domain - SKIPPED", block["blocked"])
359                 continue
360             elif block["blocked"].endswith(".arpa"):
361                 logger.debug("blocked='%s' is a reverse IP address - SKIPPED", block["blocked"])
362                 continue
363             elif block["blocked"].endswith(".tld"):
364                 logger.debug("blocked='%s' is a fake domain - SKIPPED", block["blocked"])
365                 continue
366             elif block["blocked"].find("*") >= 0:
367                 logger.debug("blocker='%s' uses obfuscated domains", blocker)
368
369                 # Some friendica servers also obscure domains without hash
370                 row = instances.deobfuscate("*", block["blocked"], block["hash"] if "hash" in block else None)
371
372                 logger.debug("row[]='%s'", type(row))
373                 if row is None:
374                     logger.warning("Cannot deobfuscate blocked='%s',blocker='%s',software='%s' - SKIPPED!", block["blocked"], blocker, software)
375                     instances.set_has_obfuscation(blocker, True)
376                     continue
377
378                 block["blocked"] = row["domain"]
379                 origin           = row["origin"]
380                 nodeinfo_url     = row["nodeinfo_url"]
381             elif block["blocked"].find("?") >= 0:
382                 logger.debug("blocker='%s' uses obfuscated domains", blocker)
383
384                 # Some obscure them with question marks, not sure if that's dependent on version or not
385                 row = instances.deobfuscate("?", block["blocked"], block["hash"] if "hash" in block else None)
386
387                 logger.debug("row[]='%s'", type(row))
388                 if row is None:
389                     logger.warning("Cannot deobfuscate blocked='%s',blocker='%s',software='%s' - SKIPPED!", block["blocked"], blocker, software)
390                     instances.set_has_obfuscation(blocker, True)
391                     continue
392
393                 block["blocked"] = row["domain"]
394                 origin           = row["origin"]
395                 nodeinfo_url     = row["nodeinfo_url"]
396
397             logger.debug("Looking up instance by domainm, blocked='%s'", block["blocked"])
398             if block["blocked"] == "":
399                 logger.debug("block[blocked] is empty - SKIPPED!")
400                 continue
401
402             logger.debug("block[blocked]='%s' - BEFORE!", block["blocked"])
403             block["blocked"] = block["blocked"].encode("idna").decode("utf-8")
404             logger.debug("block[blocked]='%s' - AFTER!", block["blocked"])
405
406             if not utils.is_domain_wanted(block["blocked"]):
407                 logger.warning("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
408                 continue
409             elif block["block_level"] in ["accept", "accepted"]:
410                 logger.debug("blocked='%s' is accepted, not wanted here - SKIPPED!", block["blocked"])
411                 continue
412             elif not instances.is_registered(block["blocked"]):
413                 logger.debug("Hash wasn't found, adding: blocked='%s',blocker='%s'", block["blocked"], blocker)
414                 federation.fetch_instances(block["blocked"], blocker, None, inspect.currentframe().f_code.co_name)
415
416             block["block_level"] = utils.alias_block_level(block["block_level"])
417
418             if utils.process_block(blocker, block["blocked"], block["reason"], block["block_level"]) and block["block_level"] == "reject" and config.get("bot_enabled"):
419                 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], blocker)
420                 blockdict.append({
421                     "blocked": block["blocked"],
422                     "reason" : block["reason"],
423                 })
424
425             logger.debug("Invoking cookies.clear(%s) ...", block["blocked"])
426             cookies.clear(block["blocked"])
427
428         logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
429         if instances.has_pending(blocker):
430             logger.debug("Flushing updates for blocker='%s' ...", blocker)
431             instances.update_data(blocker)
432
433         logger.debug("Invoking commit() ...")
434         database.connection.commit()
435
436         logger.debug("Invoking cookies.clear(%s) ...", blocker)
437         cookies.clear(blocker)
438
439         logger.debug("config.get(bot_enabled)='%s',blockdict()=%d'", config.get("bot_enabled"), len(blockdict))
440         if config.get("bot_enabled") and len(blockdict) > 0:
441             logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
442             network.send_bot_post(blocker, blockdict)
443
444     logger.debug("Success! - EXIT!")
445     return 0
446
447 def fetch_observer(args: argparse.Namespace) -> int:
448     logger.debug("args[]='%s' - CALLED!", type(args))
449
450     logger.debug("Invoking locking.acquire() ...")
451     locking.acquire()
452
453     source_domain = "fediverse.observer"
454     if sources.is_recent(source_domain):
455         logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
456         return 0
457     else:
458         logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
459         sources.update(source_domain)
460
461     types = list()
462     if args.software is None:
463         logger.info("Fetching software list ...")
464         raw = utils.fetch_url(
465             f"https://{source_domain}",
466             network.web_headers,
467             (config.get("connection_timeout"), config.get("read_timeout"))
468         ).text
469         logger.debug("raw[%s]()=%d", type(raw), len(raw))
470
471         doc = bs4.BeautifulSoup(raw, features="html.parser")
472         logger.debug("doc[]='%s'", type(doc))
473
474         items = doc.find("div", {"aria-labelledby": "navbarDropdownMenuSoftwares"}).findAll("a", {"class": "dropdown-item"})
475         logger.debug("items[]='%s'", type(items))
476
477         logger.info("Checking %d menu items ...", len(items))
478         for item in items:
479             logger.debug("item[%s]='%s'", type(item), item)
480             if item.text.lower() == "all":
481                 logger.debug("Skipping 'All' menu entry ...")
482                 continue
483
484             logger.debug("Appending item.text='%s' ...", item.text)
485             types.append(tidyup.domain(item.text))
486     else:
487         logger.info("Adding args.software='%s' as type ...", args.software)
488         types.append(args.software)
489
490     logger.info("Fetching %d different table data ...", len(types))
491     for software in types:
492         logger.debug("software='%s' - BEFORE!", software)
493         if args.software is not None and args.software != software:
494             logger.debug("args.software='%s' does not match software='%s' - SKIPPED!", args.software, software)
495             continue
496
497         doc = None
498         try:
499             logger.debug("Fetching table data for software='%s' ...", software)
500             raw = utils.fetch_url(
501                 f"https://{source_domain}/app/views/tabledata.php?software={software}",
502                 network.web_headers,
503                 (config.get("connection_timeout"), config.get("read_timeout"))
504             ).text
505             logger.debug("raw[%s]()=%d", type(raw), len(raw))
506
507             doc = bs4.BeautifulSoup(raw, features="html.parser")
508             logger.debug("doc[]='%s'", type(doc))
509         except network.exceptions as exception:
510             logger.warning("Cannot fetch software='%s' from source_domain='%s': '%s'", software, source_domain, type(exception))
511             continue
512
513         items = doc.findAll("a", {"class": "url"})
514         logger.info("Checking %d items,software='%s' ...", len(items), software)
515         for item in items:
516             logger.debug("item[]='%s'", type(item))
517             domain = item.decode_contents()
518             logger.debug("domain='%s' - AFTER!", domain)
519
520             if domain == "":
521                 logger.debug("domain is empty - SKIPPED!")
522                 continue
523
524             logger.debug("domain='%s' - BEFORE!", domain)
525             domain = domain.encode("idna").decode("utf-8")
526             logger.debug("domain='%s' - AFTER!", domain)
527
528             if not utils.is_domain_wanted(domain):
529                 logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
530                 continue
531             elif instances.is_registered(domain):
532                 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
533                 continue
534             elif instances.is_recent(domain):
535                 logger.debug("domain='%s' is recently being handled - SKIPPED!", domain)
536                 continue
537
538             software = software_helper.alias(software)
539             logger.info("Fetching instances for domain='%s'", domain)
540             federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
541
542     logger.debug("Success! - EXIT!")
543     return 0
544
545 def fetch_todon_wiki(args: argparse.Namespace) -> int:
546     logger.debug("args[]='%s' - CALLED!", type(args))
547
548     logger.debug("Invoking locking.acquire() ...")
549     locking.acquire()
550
551     source_domain = "wiki.todon.eu"
552     if sources.is_recent(source_domain):
553         logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
554         return 0
555     else:
556         logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
557         sources.update(source_domain)
558
559     blocklist = {
560         "silenced": list(),
561         "reject": list(),
562     }
563
564     raw = utils.fetch_url(f"https://{source_domain}/todon/domainblocks", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))).text
565     logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
566
567     doc = bs4.BeautifulSoup(raw, "html.parser")
568     logger.debug("doc[]='%s'", type(doc))
569
570     silenced = doc.find("h3", {"id": "silencedlimited_servers"}).find_next("ul").findAll("li")
571     logger.info("Checking %d silenced/limited entries ...", len(silenced))
572     blocklist["silenced"] = utils.find_domains(silenced, "div")
573
574     suspended = doc.find("h3", {"id": "suspended_servers"}).find_next("ul").findAll("li")
575     logger.info("Checking %d suspended entries ...", len(suspended))
576     blocklist["reject"] = utils.find_domains(suspended, "div")
577
578     blocking = blocklist["silenced"] + blocklist["reject"]
579     blocker = "todon.eu"
580
581     logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
582     instances.set_total_blocks(blocker, blocking)
583
584     blockdict = list()
585     for block_level in blocklist:
586         blockers = blocklist[block_level]
587
588         logger.debug("block_level='%s',blockers()=%d'", block_level, len(blockers))
589         for blocked in blockers:
590             logger.debug("blocked='%s'", blocked)
591
592             if not instances.is_registered(blocked):
593                 try:
594                     logger.info("Fetching instances from domain='%s' ...", blocked)
595                     federation.fetch_instances(blocked, blocker, None, inspect.currentframe().f_code.co_name)
596                 except network.exceptions as exception:
597                     logger.warning("Exception '%s' during fetching instances (fetch_cs) from blocked='%s'", type(exception), blocked)
598                     instances.set_last_error(blocked, exception)
599
600             if blocks.is_instance_blocked(blocker, blocked, block_level):
601                 logger.debug("blocked='%s',block_level='%s' is already blocked - SKIPPED!", blocked, block_level)
602                 continue
603
604             logger.info("Adding new block: blocked='%s',block_level='%s'", blocked, block_level)
605             if utils.process_block(blocker, blocked, None, block_level) and block_level == "reject" and config.get("bot_enabled"):
606                 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", blocked, block_level, blocker)
607                 blockdict.append({
608                     "blocked": blocked,
609                     "reason" : None,
610                 })
611
612         logger.debug("Invoking commit() ...")
613         database.connection.commit()
614
615         logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
616         if config.get("bot_enabled") and len(blockdict) > 0:
617             logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
618             network.send_bot_post(blocker, blockdict)
619
620     logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
621     if instances.has_pending(blocker):
622         logger.debug("Flushing updates for blocker='%s' ...", blocker)
623         instances.update_data(blocker)
624
625     logger.debug("Success! - EXIT!")
626     return 0
627
628 def fetch_cs(args: argparse.Namespace):
629     logger.debug("args[]='%s' - CALLED!", type(args))
630
631     logger.debug("Invoking locking.acquire() ...")
632     locking.acquire()
633
634     extensions = [
635         "extra",
636         "abbr",
637         "attr_list",
638         "def_list",
639         "fenced_code",
640         "footnotes",
641         "md_in_html",
642         "admonition",
643         "codehilite",
644         "legacy_attrs",
645         "legacy_em",
646         "meta",
647         "nl2br",
648         "sane_lists",
649         "smarty",
650         "toc",
651         "wikilinks"
652     ]
653
654     blocklist = {
655         "silenced": list(),
656         "reject"  : list(),
657     }
658
659     source_domain = "raw.githubusercontent.com"
660     if sources.is_recent(source_domain):
661         logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
662         return 0
663     else:
664         logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
665         sources.update(source_domain)
666
667     raw = utils.fetch_url(f"https://{source_domain}/chaossocial/meta/master/federation.md", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))).text
668     logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
669
670     doc = bs4.BeautifulSoup(markdown.markdown(raw, extensions=extensions), features="html.parser")
671     logger.debug("doc()=%d[]='%s'", len(doc), type(doc))
672
673     silenced = doc.find("h2", {"id": "silenced-instances"}).findNext("table").find("tbody")
674     logger.debug("silenced[%s]()=%d", type(silenced), len(silenced))
675     blocklist["silenced"] = federation.find_domains(silenced)
676
677     blocked = doc.find("h2", {"id": "blocked-instances"}).findNext("table").find("tbody")
678     logger.debug("blocked[%s]()=%d", type(blocked), len(blocked))
679     blocklist["reject"] = federation.find_domains(blocked)
680
681     blocking = blocklist["silenced"] + blocklist["reject"]
682     blocker = "chaos.social"
683
684     logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
685     instances.set_total_blocks(blocker, blocking)
686
687     logger.debug("blocklist[silenced]()=%d,blocklist[reject]()=%d", len(blocklist["silenced"]), len(blocklist["reject"]))
688     blockdict = list()
689     if len(blocking) > 0:
690         for block_level in blocklist:
691             logger.info("block_level='%s' has %d row(s)", block_level, len(blocklist[block_level]))
692
693             for row in blocklist[block_level]:
694                 logger.debug("row[%s]='%s'", type(row), row)
695                 if not "domain" in row:
696                     logger.warning("row[]='%s' has no element 'domain' - SKIPPED!", type(row))
697                     continue
698                 elif instances.is_recent(row["domain"], "last_blocked"):
699                     logger.debug("row[domain]='%s' has been recently crawled - SKIPPED!", row["domain"])
700                     continue
701                 elif not instances.is_registered(row["domain"]):
702                     try:
703                         logger.info("Fetching instances from domain='%s' ...", row["domain"])
704                         federation.fetch_instances(row["domain"], blocker, None, inspect.currentframe().f_code.co_name)
705                     except network.exceptions as exception:
706                         logger.warning("Exception '%s' during fetching instances (fetch_cs) from row[domain]='%s'", type(exception), row["domain"])
707                         instances.set_last_error(row["domain"], exception)
708
709                 if utils.process_block(blocker, row["domain"], row["reason"], block_level) and block_level == "reject" and config.get("bot_enabled"):
710                     logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", row["domain"], block_level, blocker)
711                     blockdict.append({
712                         "blocked": row["domain"],
713                         "reason" : row["reason"],
714                     })
715
716         logger.debug("Invoking commit() ...")
717         database.connection.commit()
718
719         logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
720         if config.get("bot_enabled") and len(blockdict) > 0:
721             logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
722             network.send_bot_post(blocker, blockdict)
723
724     logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
725     if instances.has_pending(blocker):
726         logger.debug("Flushing updates for blocker='%s' ...", blocker)
727         instances.update_data(blocker)
728
729     logger.debug("Success! - EXIT!")
730     return 0
731
732 def fetch_fba_rss(args: argparse.Namespace) -> int:
733     logger.debug("args[]='%s' - CALLED!", type(args))
734
735     domains = list()
736
737     logger.debug("Invoking locking.acquire() ...")
738     locking.acquire()
739
740     components = urlparse(args.feed)
741
742     if sources.is_recent(components.netloc):
743         logger.info("API from components.netloc='%s' has recently being accessed - EXIT!", components.netloc)
744         return 0
745     else:
746         logger.debug("components.netloc='%s' has not been recently used, marking ...", components.netloc)
747         sources.update(components.netloc)
748
749     logger.info("Fetch FBA-specific RSS args.feed='%s' ...", args.feed)
750     response = utils.fetch_url(args.feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
751
752     logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
753     if response.ok and response.status_code < 300 and len(response.text) > 0:
754         logger.debug("Parsing RSS feed (%d Bytes) ...", len(response.text))
755         rss = atoma.parse_rss_bytes(response.content)
756
757         logger.debug("rss[]='%s'", type(rss))
758         for item in rss.items:
759             logger.debug("item='%s'", item)
760             domain = tidyup.domain(item.link.split("=")[1])
761
762             logger.debug("domain='%s' - AFTER!", domain)
763             if domain == "":
764                 logger.debug("domain is empty - SKIPPED!")
765                 continue
766
767             logger.debug("domain='%s' - BEFORE!", domain)
768             domain = domain.encode("idna").decode("utf-8")
769             logger.debug("domain='%s' - AFTER!", domain)
770
771             if not utils.is_domain_wanted(domain):
772                 logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
773                 continue
774             elif domain in domains:
775                 logger.debug("domain='%s' is already added - SKIPPED!", domain)
776                 continue
777             elif instances.is_registered(domain):
778                 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
779                 continue
780             elif instances.is_recent(domain):
781                 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
782                 continue
783
784             logger.debug("Adding domain='%s'", domain)
785             domains.append(domain)
786
787     logger.debug("domains()=%d", len(domains))
788     if len(domains) > 0:
789         logger.info("Adding %d new instances ...", len(domains))
790         for domain in domains:
791             logger.debug("domain='%s'", domain)
792             try:
793                 logger.info("Fetching instances from domain='%s' ...", domain)
794                 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
795             except network.exceptions as exception:
796                 logger.warning("Exception '%s' during fetching instances (fetch_fba_rss) from domain='%s'", type(exception), domain)
797                 instances.set_last_error(domain, exception)
798                 return 100
799
800     logger.debug("Success! - EXIT!")
801     return 0
802
803 def fetch_fbabot_atom(args: argparse.Namespace) -> int:
804     logger.debug("args[]='%s' - CALLED!", type(args))
805
806     logger.debug("Invoking locking.acquire() ...")
807     locking.acquire()
808
809     source_domain = "ryona.agency"
810     if sources.is_recent(source_domain):
811         logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
812         return 0
813     else:
814         logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
815         sources.update(source_domain)
816
817     feed = f"https://{source_domain}/users/fba/feed.atom"
818
819     domains = list()
820
821     logger.info("Fetching ATOM feed='%s' from FBA bot account ...", feed)
822     response = utils.fetch_url(feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
823
824     logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
825     if response.ok and response.status_code < 300 and len(response.text) > 0:
826         logger.debug("Parsing ATOM feed (%d Bytes) ...", len(response.text))
827         atom = atoma.parse_atom_bytes(response.content)
828
829         logger.debug("atom[]='%s'", type(atom))
830         for entry in atom.entries:
831             logger.debug("entry[]='%s'", type(entry))
832             doc = bs4.BeautifulSoup(entry.content.value, "html.parser")
833             logger.debug("doc[]='%s'", type(doc))
834             for element in doc.findAll("a"):
835                 logger.debug("element[]='%s'", type(element))
836                 for href in element["href"].split(","):
837                     logger.debug("href[%s]='%s' - BEFORE!", type(href), href)
838                     domain = tidyup.domain(href)
839
840                     logger.debug("domain='%s' - AFTER!", domain)
841                     if domain == "":
842                         logger.debug("domain is empty - SKIPPED!")
843                         continue
844
845                     logger.debug("domain='%s' - BEFORE!", domain)
846                     domain = domain.encode("idna").decode("utf-8")
847                     logger.debug("domain='%s' - AFTER!", domain)
848
849                     if not utils.is_domain_wanted(domain):
850                         logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
851                         continue
852                     elif domain in domains:
853                         logger.debug("domain='%s' is already added - SKIPPED!", domain)
854                         continue
855                     elif instances.is_registered(domain):
856                         logger.debug("domain='%s' is already registered - SKIPPED!", domain)
857                         continue
858                     elif instances.is_recent(domain):
859                         logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
860                         continue
861
862                     logger.debug("Adding domain='%s',domains()=%d", domain, len(domains))
863                     domains.append(domain)
864
865     logger.debug("domains()=%d", len(domains))
866     if len(domains) > 0:
867         logger.info("Adding %d new instances ...", len(domains))
868         for domain in domains:
869             logger.debug("domain='%s'", domain)
870             try:
871                 logger.info("Fetching instances from domain='%s' ...", domain)
872                 federation.fetch_instances(domain, source_domain, None, inspect.currentframe().f_code.co_name)
873             except network.exceptions as exception:
874                 logger.warning("Exception '%s' during fetching instances (fetch_fbabot_atom) from domain='%s'", type(exception), domain)
875                 instances.set_last_error(domain, exception)
876                 return 100
877
878     logger.debug("Success! - EXIT!")
879     return 0
880
881 def fetch_instances(args: argparse.Namespace) -> int:
882     logger.debug("args[]='%s' - CALLED!", type(args))
883
884     logger.debug("args.domain='%s' - checking ...", args.domain)
885     if not validators.domain(args.domain):
886         logger.warning("args.domain='%s' is not valid.", args.domain)
887         return 100
888     elif blacklist.is_blacklisted(args.domain):
889         logger.warning("args.domain='%s' is blacklisted, won't check it!", args.domain)
890         return 101
891
892     logger.debug("Invoking locking.acquire() ...")
893     locking.acquire()
894
895     # Initial fetch
896     try:
897         logger.info("Fetching instances from args.domain='%s' ...", args.domain)
898         federation.fetch_instances(args.domain, None, None, inspect.currentframe().f_code.co_name)
899     except network.exceptions as exception:
900         logger.warning("Exception '%s' during fetching instances (fetch_instances) from args.domain='%s'", type(exception), args.domain)
901         instances.set_last_error(args.domain, exception)
902         instances.update_data(args.domain)
903         return 100
904
905     if args.single:
906         logger.debug("Not fetching more instances - EXIT!")
907         return 0
908
909     # Loop through some instances
910     database.cursor.execute(
911         "SELECT domain, origin, software, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'lemmy', 'peertube', 'takahe') AND (last_instance_fetch IS NULL OR last_instance_fetch < ?) ORDER BY rowid DESC", [time.time() - config.get("recheck_instance")]
912     )
913
914     rows = database.cursor.fetchall()
915     logger.info("Checking %d entries ...", len(rows))
916     for row in rows:
917         logger.debug("row[domain]='%s'", row["domain"])
918         if row["domain"] == "":
919             logger.debug("row[domain] is empty - SKIPPED!")
920             continue
921
922         logger.debug("row[domain]='%s' - BEFORE!", row["domain"])
923         row["domain"] = row["domain"].encode("idna").decode("utf-8")
924         logger.debug("row[domain]='%s' - AFTER!", row["domain"])
925
926         if not utils.is_domain_wanted(row["domain"]):
927             logger.warning("Domain row[domain]='%s' is not wanted - SKIPPED!", row["domain"])
928             continue
929
930         try:
931             logger.info("Fetching instances for domain='%s',origin='%s',software='%s',nodeinfo_url='%s'", row["domain"], row["origin"], row["software"], row["nodeinfo_url"])
932             federation.fetch_instances(row["domain"], row["origin"], row["software"], inspect.currentframe().f_code.co_name, row["nodeinfo_url"])
933         except network.exceptions as exception:
934             logger.warning("Exception '%s' during fetching instances (fetch_instances) from row[domain]='%s'", type(exception), row["domain"])
935             instances.set_last_error(row["domain"], exception)
936
937     logger.debug("Success - EXIT!")
938     return 0
939
940 def fetch_oliphant(args: argparse.Namespace) -> int:
941     logger.debug("args[]='%s' - CALLED!", type(args))
942
943     logger.debug("Invoking locking.acquire() ...")
944     locking.acquire()
945
946     source_domain = "codeberg.org"
947     if sources.is_recent(source_domain):
948         logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
949         return 0
950     else:
951         logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
952         sources.update(source_domain)
953
954     # Base URL
955     base_url = f"https://{source_domain}/oliphant/blocklists/raw/branch/main/blocklists"
956
957     # URLs to fetch
958     blocklists = (
959         {
960             "blocker": "artisan.chat",
961             "csv_url": "mastodon/artisan.chat.csv",
962         },{
963             "blocker": "mastodon.art",
964             "csv_url": "mastodon/mastodon.art.csv",
965         },{
966             "blocker": "pleroma.envs.net",
967             "csv_url": "mastodon/pleroma.envs.net.csv",
968         },{
969             "blocker": "oliphant.social",
970             "csv_url": "mastodon/_unified_tier3_blocklist.csv",
971         },{
972             "blocker": "mastodon.online",
973             "csv_url": "mastodon/mastodon.online.csv",
974         },{
975             "blocker": "mastodon.social",
976             "csv_url": "mastodon/mastodon.social.csv",
977         },{
978             "blocker": "mastodon.social",
979             "csv_url": "other/missing-tier0-mastodon.social.csv",
980         },{
981             "blocker": "rage.love",
982             "csv_url": "mastodon/rage.love.csv",
983         },{
984             "blocker": "sunny.garden",
985             "csv_url": "mastodon/sunny.garden.csv",
986         },{
987             "blocker": "sunny.garden",
988             "csv_url": "mastodon/gardenfence.csv",
989         },{
990             "blocker": "solarpunk.moe",
991             "csv_url": "mastodon/solarpunk.moe.csv",
992         },{
993             "blocker": "toot.wales",
994             "csv_url": "mastodon/toot.wales.csv",
995         },{
996             "blocker": "union.place",
997             "csv_url": "mastodon/union.place.csv",
998         },{
999             "blocker": "oliphant.social",
1000             "csv_url": "mastodon/birdsite.csv",
1001         }
1002     )
1003
1004     domains = list()
1005
1006     logger.debug("Downloading %d files ...", len(blocklists))
1007     for block in blocklists:
1008         # Is domain given and not equal blocker?
1009         if isinstance(args.domain, str) and args.domain != block["blocker"]:
1010             logger.debug("Skipping blocker='%s', not matching args.domain='%s'", block["blocker"], args.domain)
1011             continue
1012         elif args.domain in domains:
1013             logger.debug("args.domain='%s' already handled - SKIPPED!", args.domain)
1014             continue
1015
1016         # Fetch this URL
1017         logger.info("Fetching csv_url='%s' for blocker='%s' ...", block["csv_url"], block["blocker"])
1018         response = utils.fetch_url(f"{base_url}/{block['csv_url']}", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
1019
1020         logger.debug("response.ok='%s',response.status_code=%d,response.content()=%d", response.ok, response.status_code, len(response.content))
1021         if not response.ok or response.status_code >= 300 or response.content == "":
1022             logger.warning("Could not fetch csv_url='%s' for blocker='%s' - SKIPPED!", block["csv_url"], block["blocker"])
1023             continue
1024
1025         logger.debug("Fetched %d Bytes, parsing CSV ...", len(response.content))
1026         reader = csv.DictReader(response.content.decode("utf-8").splitlines(), dialect="unix")
1027
1028         blockdict = list()
1029
1030         cnt = 0
1031         for row in reader:
1032             logger.debug("row[%s]='%s'", type(row), row)
1033             domain = severity = None
1034             reject_media = reject_reports = False
1035
1036             if "#domain" in row:
1037                 domain = row["#domain"]
1038             elif "domain" in row:
1039                 domain = row["domain"]
1040             else:
1041                 logger.debug("row='%s' does not contain domain column", row)
1042                 continue
1043
1044             if "#severity" in row:
1045                 severity = utils.alias_block_level(row["#severity"])
1046             elif "severity" in row:
1047                 severity = utils.alias_block_level(row["severity"])
1048             else:
1049                 logger.debug("row='%s' does not contain severity column", row)
1050                 continue
1051
1052             if "#reject_media" in row and row["#reject_media"].lower() == "true":
1053                 reject_media = True
1054             elif "reject_media" in row and row["reject_media"].lower() == "true":
1055                 reject_media = True
1056
1057             if "#reject_reports" in row and row["#reject_reports"].lower() == "true":
1058                 reject_reports = True
1059             elif "reject_reports" in row and row["reject_reports"].lower() == "true":
1060                 reject_reports = True
1061
1062             cnt = cnt + 1
1063             logger.debug("domain='%s',severity='%s',reject_media='%s',reject_reports='%s'", domain, severity, reject_media, reject_reports)
1064             if domain == "":
1065                 logger.debug("domain is empty - SKIPPED!")
1066                 continue
1067             elif domain.endswith(".onion"):
1068                 logger.debug("domain='%s' is a TOR .onion domain - SKIPPED", domain)
1069                 continue
1070             elif domain.endswith(".arpa"):
1071                 logger.debug("domain='%s' is a reverse IP address - SKIPPED", domain)
1072                 continue
1073             elif domain.endswith(".tld"):
1074                 logger.debug("domain='%s' is a fake domain - SKIPPED", domain)
1075                 continue
1076             elif domain.find("*") >= 0 or domain.find("?") >= 0:
1077                 logger.debug("domain='%s' is obfuscated - Invoking utils.deobfuscate(%s, %s) ...", domain, domain, block["blocker"])
1078                 domain = utils.deobfuscate(domain, block["blocker"])
1079                 logger.debug("domain='%s' - AFTER!", domain)
1080
1081             if not validators.domain(domain):
1082                 logger.debug("domain='%s' is not a valid domain - SKIPPED!")
1083                 continue
1084             elif blacklist.is_blacklisted(domain):
1085                 logger.warning("domain='%s' is blacklisted - SKIPPED!", domain)
1086                 continue
1087
1088             logger.debug("Marking domain='%s' as handled", domain)
1089             domains.append(domain)
1090
1091             logger.debug("Processing domain='%s' ...", domain)
1092             processed = utils.process_domain(domain, block["blocker"], inspect.currentframe().f_code.co_name)
1093             logger.debug("processed='%s'", processed)
1094
1095             if utils.process_block(block["blocker"], domain, None, severity) and config.get("bot_enabled"):
1096                 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", domain, block["block_level"], block["blocker"])
1097                 blockdict.append({
1098                     "blocked": domain,
1099                     "reason" : block["reason"],
1100                 })
1101
1102             if reject_media:
1103                 utils.process_block(block["blocker"], domain, None, "reject_media")
1104             if reject_reports:
1105                 utils.process_block(block["blocker"], domain, None, "reject_reports")
1106
1107         logger.debug("Invoking instances.set_total_blocks(%s, domains()=%d) ...", block["blocker"], len(domains))
1108         instances.set_total_blocks(block["blocker"], domains)
1109
1110         logger.debug("Checking if blocker='%s' has pending updates ...", block["blocker"])
1111         if instances.has_pending(block["blocker"]):
1112             logger.debug("Flushing updates for block[blocker]='%s' ...", block["blocker"])
1113             instances.update_data(block["blocker"])
1114
1115         logger.debug("Invoking commit() ...")
1116         database.connection.commit()
1117
1118         logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
1119         if config.get("bot_enabled") and len(blockdict) > 0:
1120             logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", block["blocker"], len(blockdict))
1121             network.send_bot_post(block["blocker"], blockdict)
1122
1123     logger.debug("Success! - EXIT!")
1124     return 0
1125
1126 def fetch_txt(args: argparse.Namespace) -> int:
1127     logger.debug("args[]='%s' - CALLED!", type(args))
1128
1129     logger.debug("Invoking locking.acquire() ...")
1130     locking.acquire()
1131
1132     # Static URLs
1133     urls = ({
1134         "blocker": "seirdy.one",
1135         "url"    : "https://seirdy.one/pb/bsl.txt",
1136     },)
1137
1138     logger.info("Checking %d text file(s) ...", len(urls))
1139     for row in urls:
1140         logger.debug("Fetching row[url]='%s' ...", row["url"])
1141         response = utils.fetch_url(row["url"], network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
1142
1143         logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
1144         if response.ok and response.status_code < 300 and response.text != "":
1145             logger.debug("Returned %d Bytes for processing", len(response.text.strip()))
1146             domains = response.text.split("\n")
1147
1148             logger.info("Processing %d domains ...", len(domains))
1149             for domain in domains:
1150                 logger.debug("domain='%s' - BEFORE!", domain)
1151                 domain = tidyup.domain(domain)
1152
1153                 logger.debug("domain='%s' - AFTER!", domain)
1154                 if domain == "":
1155                     logger.debug("domain is empty - SKIPPED!")
1156                     continue
1157                 elif not utils.is_domain_wanted(domain):
1158                     logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
1159                     continue
1160                 elif instances.is_recent(domain):
1161                     logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1162                     continue
1163
1164                 logger.debug("Processing domain='%s',row[blocker]='%s'", domain, row["blocker"])
1165                 processed = utils.process_domain(domain, row["blocker"], inspect.currentframe().f_code.co_name)
1166
1167                 logger.debug("processed='%s'", processed)
1168                 if not processed:
1169                     logger.debug("domain='%s' was not generically processed - SKIPPED!", domain)
1170                     continue
1171
1172     logger.debug("Success! - EXIT!")
1173     return 0
1174
1175 def fetch_fedipact(args: argparse.Namespace) -> int:
1176     logger.debug("args[]='%s' - CALLED!", type(args))
1177
1178     logger.debug("Invoking locking.acquire() ...")
1179     locking.acquire()
1180
1181     source_domain = "fedipact.online"
1182     if sources.is_recent(source_domain):
1183         logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1184         return 0
1185     else:
1186         logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1187         sources.update(source_domain)
1188
1189     response = utils.fetch_url(
1190         f"https://{source_domain}",
1191         network.web_headers,
1192         (config.get("connection_timeout"), config.get("read_timeout"))
1193     )
1194
1195     logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
1196     if response.ok and response.status_code < 300 and response.text != "":
1197         logger.debug("Parsing %d Bytes ...", len(response.text))
1198
1199         doc = bs4.BeautifulSoup(response.text, "html.parser")
1200         logger.debug("doc[]='%s'", type(doc))
1201
1202         rows = doc.findAll("li")
1203         logger.info("Checking %d row(s) ...", len(rows))
1204         for row in rows:
1205             logger.debug("row[]='%s'", type(row))
1206             domain = tidyup.domain(row.contents[0])
1207
1208             logger.debug("domain='%s' - AFTER!", domain)
1209             if domain == "":
1210                 logger.debug("domain is empty - SKIPPED!")
1211                 continue
1212
1213             logger.debug("domain='%s' - BEFORE!", domain)
1214             domain = domain.encode("idna").decode("utf-8")
1215             logger.debug("domain='%s' - AFTER!", domain)
1216
1217             if not utils.is_domain_wanted(domain):
1218                 logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
1219                 continue
1220             elif instances.is_registered(domain):
1221                 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
1222                 continue
1223             elif instances.is_recent(domain):
1224                 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1225                 continue
1226
1227             logger.info("Fetching domain='%s' ...", domain)
1228             federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
1229
1230     logger.debug("Success! - EXIT!")
1231     return 0
1232
1233 def fetch_joinfediverse(args: argparse.Namespace) -> int:
1234     logger.debug("args[]='%s' - CALLED!", type(args))
1235
1236     logger.debug("Invoking locking.acquire() ...")
1237     locking.acquire()
1238
1239     source_domain = "joinfediverse.wiki"
1240     if sources.is_recent(source_domain):
1241         logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1242         return 0
1243     else:
1244         logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1245         sources.update(source_domain)
1246
1247     raw = utils.fetch_url(
1248         f"https://{source_domain}/FediBlock",
1249         network.web_headers,
1250         (config.get("connection_timeout"), config.get("read_timeout"))
1251     ).text
1252     logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
1253
1254     doc = bs4.BeautifulSoup(raw, "html.parser")
1255     logger.debug("doc[]='%s'", type(doc))
1256
1257     tables = doc.findAll("table", {"class": "wikitable"})
1258
1259     logger.info("Analyzing %d table(s) ...", len(tables))
1260     blocklist = list()
1261     for table in tables:
1262         logger.debug("table[]='%s'", type(table))
1263
1264         rows = table.findAll("tr")
1265         logger.info("Checking %d row(s) ...", len(rows))
1266         block_headers = dict()
1267         for row in rows:
1268             logger.debug("row[%s]='%s'", type(row), row)
1269
1270             headers = row.findAll("th")
1271             logger.debug("Found headers()=%d header(s)", len(headers))
1272             if len(headers) > 1:
1273                 block_headers = dict()
1274                 cnt = 0
1275                 for header in headers:
1276                     cnt = cnt + 1
1277                     logger.debug("header[]='%s',cnt=%d", type(header), cnt)
1278                     text = header.contents[0]
1279
1280                     logger.debug("text[]='%s'", type(text))
1281                     if not isinstance(text, str):
1282                         logger.debug("text[]='%s' is not of type 'str' - SKIPPED!", type(text))
1283                         continue
1284                     elif validators.domain(text.strip()):
1285                         logger.debug("text='%s' is a domain - SKIPPED!", text.strip())
1286                         continue
1287
1288                     text = tidyup.domain(text.strip())
1289                     logger.debug("text='%s'", text)
1290                     if text in ["domain", "instance", "subdomain(s)", "block reason(s)"]:
1291                         logger.debug("Found header: '%s'=%d", text, cnt)
1292                         block_headers[cnt] = text
1293
1294             elif len(block_headers) == 0:
1295                 logger.debug("row is not scrapable - SKIPPED!")
1296                 continue
1297             elif len(block_headers) > 0:
1298                 logger.debug("Found a row with %d scrapable headers ...", len(block_headers))
1299                 cnt = 0
1300                 block = dict()
1301
1302                 for element in row.find_all(["th", "td"]):
1303                     cnt = cnt + 1
1304                     logger.debug("element[]='%s',cnt=%d", type(element), cnt)
1305                     if cnt in block_headers:
1306                         logger.debug("block_headers[%d]='%s'", cnt, block_headers[cnt])
1307
1308                         text = element.text.strip()
1309                         key = block_headers[cnt] if block_headers[cnt] not in ["domain", "instance"] else "blocked"
1310
1311                         logger.debug("cnt=%d is wanted: key='%s',text[%s]='%s'", cnt, key, type(text), text)
1312                         if key in ["domain", "instance"]:
1313                             block[key] = text
1314                         elif key == "reason":
1315                             block[key] = tidyup.reason(text)
1316                         elif key == "subdomain(s)":
1317                             block[key] = list()
1318                             if text != "":
1319                                 block[key] = text.split("/")
1320                         else:
1321                             logger.debug("key='%s'", key)
1322                             block[key] = text
1323
1324                 logger.debug("block()=%d ...", len(block))
1325                 if len(block) > 0:
1326                     logger.debug("Appending block()=%d ...", len(block))
1327                     blocklist.append(block)
1328
1329     logger.debug("blocklist()=%d", len(blocklist))
1330
1331     database.cursor.execute("SELECT domain FROM instances WHERE domain LIKE 'climatejustice.%'")
1332     domains = database.cursor.fetchall()
1333
1334     logger.debug("domains(%d)[]='%s'", len(domains), type(domains))
1335     blocking = list()
1336     for block in blocklist:
1337         logger.debug("block='%s'", block)
1338         if "subdomain(s)" in block and len(block["subdomain(s)"]) > 0:
1339             origin = block["blocked"]
1340             logger.debug("origin='%s'", origin)
1341             for subdomain in block["subdomain(s)"]:
1342                 block["blocked"] = subdomain + "." + origin
1343                 logger.debug("block[blocked]='%s'", block["blocked"])
1344                 blocking.append(block)
1345         else:
1346             blocking.append(block)
1347
1348     logger.debug("blocking()=%d", blocking)
1349     for block in blocking:
1350         logger.debug("block[]='%s'", type(block))
1351         block["blocked"] = tidyup.domain(block["blocked"])
1352
1353         logger.debug("block[blocked]='%s' - AFTER!", block["blocked"])
1354         if block["blocked"] == "":
1355             logger.debug("block[blocked] is empty - SKIPPED!")
1356             continue
1357         elif not utils.is_domain_wanted(block["blocked"]):
1358             logger.warning("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
1359             continue
1360         elif instances.is_recent(block["blocked"]):
1361             logger.debug("blocked='%s' has been recently checked - SKIPPED!", block["blocked"])
1362             continue
1363
1364         logger.info("Proccessing blocked='%s' ...", block["blocked"])
1365         utils.process_domain(block["blocked"], "climatejustice.social", inspect.currentframe().f_code.co_name)
1366
1367     blockdict = list()
1368     for blocker in domains:
1369         blocker = blocker[0]
1370         logger.debug("blocker[%s]='%s'", type(blocker), blocker)
1371
1372         for block in blocking:
1373             logger.debug("block[blocked]='%s',block[block reason(s)]='%s' - BEFORE!", block["blocked"], block["block reason(s)"] if "block reason(s)" in block else None)
1374             block["reason"] = tidyup.reason(block["block reason(s)"]) if "block reason(s)" in block else None
1375
1376             logger.debug("block[blocked]='%s',block[reason]='%s' - AFTER!", block["blocked"], block["reason"])
1377             if block["blocked"] == "":
1378                 logger.debug("block[blocked] is empty - SKIPPED!")
1379                 continue
1380             elif not utils.is_domain_wanted(block["blocked"]):
1381                 logger.warning("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
1382                 continue
1383
1384             logger.debug("blocked='%s',reason='%s'", block["blocked"], block["reason"])
1385             if utils.process_block(blocker, block["blocked"], block["reason"], "reject") and config.get("bot_enabled"):
1386                 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], blocker)
1387                 blockdict.append({
1388                     "blocked": block["blocked"],
1389                     "reason" : block["reason"],
1390                 })
1391
1392         if instances.has_pending(blocker):
1393             logger.debug("Flushing updates for blocker='%s' ...", blocker)
1394             instances.update_data(blocker)
1395
1396         logger.debug("Invoking commit() ...")
1397         database.connection.commit()
1398
1399         logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
1400         if config.get("bot_enabled") and len(blockdict) > 0:
1401             logger.info("Sending bot POST for blocker='%s,blockdict()=%d ...", blocker, len(blockdict))
1402             network.send_bot_post(blocker, blockdict)
1403
1404     logger.debug("Success! - EXIT!")
1405     return 0
1406
1407 def recheck_obfuscation(args: argparse.Namespace) -> int:
1408     logger.debug("args[]='%s' - CALLED!", type(args))
1409
1410     logger.debug("Invoking locking.acquire() ...")
1411     locking.acquire()
1412
1413     if isinstance(args.domain, str) and args.domain != "" and utils.is_domain_wanted(args.domain):
1414         database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1 AND domain = ?", [args.domain])
1415     elif isinstance(args.software, str) and args.software != "" and validators.domain(args.software) == args.software:
1416         database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1 AND software = ?", [args.software])
1417     else:
1418         database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1")
1419
1420     rows = database.cursor.fetchall()
1421     logger.info("Checking %d domains ...", len(rows))
1422     for row in rows:
1423         logger.debug("Fetching peers from domain='%s',software='%s',nodeinfo_url='%s' ...", row["domain"], row["software"], row["nodeinfo_url"])
1424         if (args.all is None or not args.all) and instances.is_recent(row["domain"]) and args.domain is None and args.software is None:
1425             logger.debug("row[domain]='%s' has been recently checked, args.all[]='%s' - SKIPPED!", row["domain"], type(args.all))
1426             continue
1427
1428         blocking = list()
1429         if row["software"] == "pleroma":
1430             logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1431             blocking = pleroma.fetch_blocks(row["domain"], row["nodeinfo_url"])
1432         elif row["software"] == "mastodon":
1433             logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1434             blocking = mastodon.fetch_blocks(row["domain"], row["nodeinfo_url"])
1435         elif row["software"] == "lemmy":
1436             logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1437             blocking = lemmy.fetch_blocks(row["domain"], row["nodeinfo_url"])
1438         elif row["software"] == "friendica":
1439             logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1440             blocking = friendica.fetch_blocks(row["domain"])
1441         elif row["software"] == "misskey":
1442             logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1443             blocking = misskey.fetch_blocks(row["domain"])
1444         else:
1445             logger.warning("Unknown sofware: domain='%s',software='%s'", row["domain"], row["software"])
1446
1447         logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", row["domain"], len(blocking))
1448         instances.set_total_blocks(row["domain"], blocking)
1449
1450         logger.info("Checking %d block(s) from domain='%s' ...", len(blocking), row["domain"])
1451         obfuscated = 0
1452         blockdict = list()
1453         for block in blocking:
1454             logger.debug("block[blocked]='%s'", block["blocked"])
1455             blocked = None
1456
1457             if block["blocked"] == "":
1458                 logger.debug("block[blocked] is empty - SKIPPED!")
1459                 continue
1460             elif block["blocked"].endswith(".arpa"):
1461                 logger.debug("blocked='%s' is a reversed IP address - SKIPPED!", block["blocked"])
1462                 continue
1463             elif block["blocked"].endswith(".tld"):
1464                 logger.debug("blocked='%s' is a fake domain name - SKIPPED!", block["blocked"])
1465                 continue
1466             elif block["blocked"].endswith(".onion"):
1467                 logger.debug("blocked='%s' is a TOR onion domain name - SKIPPED!", block["blocked"])
1468                 continue
1469             elif block["blocked"].find("*") >= 0 or block["blocked"].find("?") >= 0:
1470                 logger.debug("block='%s' is obfuscated.", block["blocked"])
1471                 obfuscated = obfuscated + 1
1472                 blocked = utils.deobfuscate(block["blocked"], row["domain"], block["hash"] if "hash" in block else None)
1473             elif not utils.is_domain_wanted(block["blocked"]):
1474                 logger.warning("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
1475                 continue
1476             elif blocks.is_instance_blocked(row["domain"], block["blocked"]):
1477                 logger.debug("blocked='%s' is already blocked - SKIPPED!", block["blocked"])
1478                 continue
1479
1480             logger.debug("blocked[%s]='%s',block[blocked]='%s'", type(blocked), blocked, block["blocked"])
1481             if blocked is not None and blocked != block["blocked"]:
1482                 logger.debug("blocked='%s' was deobfuscated to blocked='%s'", block["blocked"], blocked)
1483                 obfuscated = obfuscated - 1
1484                 if blocks.is_instance_blocked(row["domain"], blocked):
1485                     logger.debug("blocked='%s' is already blocked by domain='%s' - SKIPPED!", blocked, row["domain"])
1486                     continue
1487
1488                 block["block_level"] = utils.alias_block_level(block["block_level"])
1489
1490                 logger.info("blocked='%s' has been deobfuscated to blocked='%s', adding ...", block["blocked"], blocked)
1491                 if utils.process_block(row["domain"], blocked, block["reason"], block["block_level"]) and block["block_level"] == "reject" and config.get("bot_enabled"):
1492                     logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], row["domain"])
1493                     blockdict.append({
1494                         "blocked": blocked,
1495                         "reason" : block["reason"],
1496                     })
1497
1498         logger.info("domain='%s' has %d obfuscated domain(s)", row["domain"], obfuscated)
1499         if obfuscated == 0 and len(blocking) > 0:
1500             logger.info("Block list from domain='%s' has been fully deobfuscated.", row["domain"])
1501             instances.set_has_obfuscation(row["domain"], False)
1502
1503         if instances.has_pending(row["domain"]):
1504             logger.debug("Flushing updates for blocker='%s' ...", row["domain"])
1505             instances.update_data(row["domain"])
1506
1507         logger.debug("Invoking commit() ...")
1508         database.connection.commit()
1509
1510         logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
1511         if config.get("bot_enabled") and len(blockdict) > 0:
1512             logger.info("Sending bot POST for blocker='%s,blockdict()=%d ...", row["domain"], len(blockdict))
1513             network.send_bot_post(row["domain"], blockdict)
1514
1515     logger.debug("Success! - EXIT!")
1516     return 0
1517
1518 def fetch_fedilist(args: argparse.Namespace) -> int:
1519     logger.debug("args[]='%s' - CALLED!", type(args))
1520
1521     logger.debug("Invoking locking.acquire() ...")
1522     locking.acquire()
1523
1524     source_domain = "demo.fedilist.com"
1525     if sources.is_recent(source_domain):
1526         logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1527         return 0
1528     else:
1529         logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1530         sources.update(source_domain)
1531
1532     url = f"http://{source_domain}/instance/csv?onion=not"
1533     if args.software is not None and args.software != "":
1534         logger.debug("args.software='%s'", args.software)
1535         url = f"http://{source_domain}/instance/csv?software={args.software}&onion=not"
1536
1537     logger.info("Fetching url='%s' ...", url)
1538     response = reqto.get(
1539         url,
1540         headers=network.web_headers,
1541         timeout=(config.get("connection_timeout"), config.get("read_timeout")),
1542         allow_redirects=False
1543     )
1544
1545     logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
1546     if not response.ok or response.status_code >= 300 or len(response.content) == 0:
1547         logger.warning("Failed fetching url='%s': response.ok='%s',response.status_code=%d,response.content()=%d - EXIT!", response.ok, response.status_code, len(response.text))
1548         return 1
1549
1550     reader = csv.DictReader(response.content.decode("utf-8").splitlines(), dialect="unix")
1551
1552     logger.debug("reader[]='%s'", type(reader))
1553     blockdict = list()
1554     for row in reader:
1555         logger.debug("row[]='%s'", type(row))
1556         domain = tidyup.domain(row["hostname"])
1557         logger.debug("domain='%s' - AFTER!", domain)
1558
1559         if domain == "":
1560             logger.debug("domain is empty after tidyup: row[hostname]='%s' - SKIPPED!", row["hostname"])
1561             continue
1562
1563         logger.debug("domain='%s' - BEFORE!", domain)
1564         domain = domain.encode("idna").decode("utf-8")
1565         logger.debug("domain='%s' - AFTER!", domain)
1566
1567         if not utils.is_domain_wanted(domain):
1568             logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
1569             continue
1570         elif (args.all is None or not args.all) and instances.is_registered(domain):
1571             logger.debug("domain='%s' is already registered, --all not specified: args.all[]='%s'", type(args.all))
1572             continue
1573         elif instances.is_recent(domain):
1574             logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1575             continue
1576
1577         logger.info("Fetching instances from domain='%s' ...", domain)
1578         federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
1579
1580     logger.debug("Success! - EXIT!")
1581     return 0
1582
1583 def update_nodeinfo(args: argparse.Namespace) -> int:
1584     logger.debug("args[]='%s' - CALLED!", type(args))
1585
1586     logger.debug("Invoking locking.acquire() ...")
1587     locking.acquire()
1588
1589     if args.domain is not None and args.domain != "":
1590         logger.debug("Fetching args.domain='%s'", args.domain)
1591         database.cursor.execute("SELECT domain, software FROM instances WHERE domain = ?", [args.domain])
1592     elif args.software is not None and args.software != "":
1593         logger.info("Fetching domains for args.software='%s'", args.software)
1594         database.cursor.execute("SELECT domain, software FROM instances WHERE software = ?", [args.software])
1595     else:
1596         logger.info("Fetching domains for recently updated ...")
1597         database.cursor.execute("SELECT domain, software FROM instances WHERE last_nodeinfo < ? OR last_nodeinfo IS NULL", [time.time() - config.get("recheck_nodeinfo")])
1598
1599     domains = database.cursor.fetchall()
1600
1601     logger.info("Checking %d domain(s) ...", len(domains))
1602     cnt = 0
1603     for row in domains:
1604         logger.debug("row[]='%s'", type(row))
1605         try:
1606             logger.info("Checking nodeinfo for row[domain]='%s',row[software]='%s' (%s%%) ...", row["domain"], row["software"], "{:5.1f}".format(cnt / len(domains) * 100))
1607             software = federation.determine_software(row["domain"])
1608
1609             logger.debug("Determined software='%s'", software)
1610             if software != row["software"] and software is not None:
1611                 logger.warning("Software type for row[domain]='%s' has changed from '%s' to '%s'!", row["domain"], row["software"], software)
1612                 instances.set_software(row["domain"], software)
1613
1614             instances.set_success(row["domain"])
1615         except network.exceptions as exception:
1616             logger.warning("Exception '%s' during updating nodeinfo for row[domain]='%s'", type(exception), row["domain"])
1617             instances.set_last_error(row["domain"], exception)
1618
1619         instances.set_last_nodeinfo(row["domain"])
1620         instances.update_data(row["domain"])
1621         cnt = cnt + 1
1622
1623     logger.debug("Success! - EXIT!")
1624     return 0
1625
1626 def fetch_instances_social(args: argparse.Namespace) -> int:
1627     logger.debug("args[]='%s' - CALLED!", type(args))
1628
1629     logger.debug("Invoking locking.acquire() ...")
1630     locking.acquire()
1631
1632     source_domain = "instances.social"
1633
1634     if config.get("instances_social_api_key") == "":
1635         logger.error("API key not set. Please set in your config.json file.")
1636         return 1
1637     elif sources.is_recent(source_domain):
1638         logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1639         return 0
1640     else:
1641         logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1642         sources.update(source_domain)
1643
1644     headers = {
1645         "Authorization": f"Bearer {config.get('instances_social_api_key')}",
1646     }
1647
1648     fetched = network.get_json_api(
1649         source_domain,
1650         "/api/1.0/instances/list?count=0&sort_by=name",
1651         headers,
1652         (config.get("connection_timeout"), config.get("read_timeout"))
1653     )
1654     logger.debug("fetched[]='%s'", type(fetched))
1655
1656     if "error_message" in fetched:
1657         logger.warning("Error during fetching API result: '%s' - EXIT!", fetched["error_message"])
1658         return 2
1659     elif "exception" in fetched:
1660         logger.warning("Exception '%s' during fetching API result - EXIT!", type(fetched["exception"]))
1661         return 3
1662     elif "json" not in fetched:
1663         logger.warning("fetched has no element 'json' - EXIT!")
1664         return 4
1665     elif "instances" not in fetched["json"]:
1666         logger.warning("fetched[row] has no element 'instances' - EXIT!")
1667         return 5
1668
1669     domains = list()
1670     rows = fetched["json"]["instances"]
1671
1672     logger.info("Checking %d row(s) ...", len(rows))
1673     for row in rows:
1674         logger.debug("row[]='%s'", type(row))
1675         domain = tidyup.domain(row["name"])
1676         logger.debug("domain='%s' - AFTER!", domain)
1677
1678         if domain == "":
1679             logger.debug("domain is empty - SKIPPED!")
1680             continue
1681
1682         logger.debug("domain='%s' - BEFORE!", domain)
1683         domain = domain.encode("idna").decode("utf-8")
1684         logger.debug("domain='%s' - AFTER!", domain)
1685
1686         if not utils.is_domain_wanted(domain):
1687             logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
1688             continue
1689         elif domain in domains:
1690             logger.debug("domain='%s' is already added - SKIPPED!", domain)
1691             continue
1692         elif instances.is_registered(domain):
1693             logger.debug("domain='%s' is already registered - SKIPPED!", domain)
1694             continue
1695         elif instances.is_recent(domain):
1696             logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1697             continue
1698
1699         logger.info("Fetching instances from domain='%s'", domain)
1700         federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
1701
1702     logger.debug("Success! - EXIT!")
1703     return 0
1704
1705 def convert_idna(args: argparse.Namespace) -> int:
1706     logger.debug("args[]='%s' - CALLED!", type(args))
1707
1708     database.cursor.execute("SELECT domain FROM instances WHERE domain NOT LIKE '%xn--%' ORDER BY domain ASC")
1709     rows = database.cursor.fetchall()
1710
1711     logger.debug("rows[]='%s'", type(rows))
1712     instances.translate_idnas(rows, "domain")
1713
1714     database.cursor.execute("SELECT origin FROM instances WHERE origin NOT LIKE '%xn--%' ORDER BY origin ASC")
1715     rows = database.cursor.fetchall()
1716
1717     logger.debug("rows[]='%s'", type(rows))
1718     instances.translate_idnas(rows, "origin")
1719
1720     database.cursor.execute("SELECT blocker FROM blocks WHERE blocker NOT LIKE '%xn--%' ORDER BY blocker ASC")
1721     rows = database.cursor.fetchall()
1722
1723     logger.debug("rows[]='%s'", type(rows))
1724     blocks.translate_idnas(rows, "blocker")
1725
1726     database.cursor.execute("SELECT blocked FROM blocks WHERE blocked NOT LIKE '%xn--%' ORDER BY blocked ASC")
1727     rows = database.cursor.fetchall()
1728
1729     logger.debug("rows[]='%s'", type(rows))
1730     blocks.translate_idnas(rows, "blocked")
1731
1732     logger.debug("Success! - EXIT!")
1733     return 0