]> git.mxchange.org Git - fba.git/blob - fba/commands.py
e7813512328ae073347dbf78bf0a1c12c9e16e2d
[fba.git] / fba / commands.py
1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
3 #
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
8 #
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 # GNU Affero General Public License for more details.
13 #
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program.  If not, see <https://www.gnu.org/licenses/>.
16
17 import csv
18 import inspect
19 import json
20 import logging
21 import time
22
23 from urllib.parse import urlparse
24
25 import argparse
26 import atoma
27 import bs4
28 import markdown
29 import reqto
30 import validators
31
32 from fba import csrf
33 from fba import database
34 from fba import utils
35
36 from fba.helpers import blacklist
37 from fba.helpers import config
38 from fba.helpers import cookies
39 from fba.helpers import locking
40 from fba.helpers import processing
41 from fba.helpers import software as software_helper
42 from fba.helpers import tidyup
43
44 from fba.http import federation
45 from fba.http import network
46
47 from fba.models import blocks
48 from fba.models import instances
49 from fba.models import sources
50
51 from fba.networks import friendica
52 from fba.networks import lemmy
53 from fba.networks import mastodon
54 from fba.networks import misskey
55 from fba.networks import pleroma
56
57 logging.basicConfig(level=logging.INFO)
58 logger = logging.getLogger(__name__)
59 #logger.setLevel(logging.DEBUG)
60
61 def check_instance(args: argparse.Namespace) -> int:
62     logger.debug("args.domain='%s' - CALLED!", args.domain)
63     status = 0
64     if not validators.domain(args.domain):
65         logger.warning("args.domain='%s' is not valid", args.domain)
66         status = 100
67     elif blacklist.is_blacklisted(args.domain):
68         logger.warning("args.domain='%s' is blacklisted", args.domain)
69         status = 101
70     elif instances.is_registered(args.domain):
71         logger.warning("args.domain='%s' is already registered", args.domain)
72         status = 102
73     else:
74         logger.info("args.domain='%s' is not known", args.domain)
75
76     logger.debug("status=%d - EXIT!", status)
77     return status
78
79 def check_nodeinfo(args: argparse.Namespace) -> int:
80     logger.debug("args[]='%s' - CALLED!", type(args))
81
82     # Fetch rows
83     database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE nodeinfo_url IS NOT NULL ORDER BY domain ASC")
84
85     cnt = 0
86     for row in database.cursor.fetchall():
87         logger.debug("Checking row[domain]='%s',row[software]='%s',row[nodeinfo_url]='%s' ...", row["domain"], row["software"], row["nodeinfo_url"])
88         punycode = row["domain"].encode("idna").decode("utf-8")
89
90         if row["nodeinfo_url"].startswith("/"):
91             logger.debug("row[nodeinfo_url]='%s' is a relative URL and always matches", row["nodeinfo_url"])
92             continue
93         elif row["nodeinfo_url"].find(punycode) == -1 and row["nodeinfo_url"].find(row["domain"]) == -1:
94             logger.warning("punycode='%s' is not found in row[nodeinfo_url]='%s',row[software]='%s'", punycode, row["nodeinfo_url"], row["software"])
95             cnt = cnt + 1
96
97     logger.info("Found %d row(s)", cnt)
98
99     logger.debug("EXIT!")
100     return 0
101
102 def fetch_pixelfed_api(args: argparse.Namespace) -> int:
103     logger.debug("args[]='%s' - CALLED!", type(args))
104
105     # No CSRF by default, you don't have to add network.source_headers by yourself here
106     headers = tuple()
107     source_domain = "pixelfed.org"
108
109     if sources.is_recent(source_domain):
110         logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
111         return 0
112     else:
113         logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
114         sources.update(source_domain)
115
116     try:
117         logger.debug("Checking CSRF from source_domain='%s' ...", source_domain)
118         headers = csrf.determine(source_domain, dict())
119     except network.exceptions as exception:
120         logger.warning("Exception '%s' during checking CSRF (fetch_peers,%s) - EXIT!", type(exception), __name__)
121         return list()
122
123     try:
124         logger.debug("Fetching JSON from pixelfed.org API, headers()=%d ...", len(headers))
125         fetched = network.get_json_api(
126             source_domain,
127             "/api/v1/servers/all.json?scope=All&country=all&language=all",
128             headers,
129             (config.get("connection_timeout"), config.get("read_timeout"))
130         )
131
132         logger.debug("JSON API returned %d elements", len(fetched))
133         if "error_message" in fetched:
134             logger.warning("API returned error_message='%s' - EXIT!", fetched["error_message"])
135             return 101
136         elif "data" not in fetched["json"]:
137             logger.warning("API did not return JSON with 'data' element - EXIT!")
138             return 102
139
140         rows = fetched["json"]["data"]
141         logger.info("Checking %d fetched rows ...", len(rows))
142         for row in rows:
143             logger.debug("row[]='%s'", type(row))
144             if "domain" not in row:
145                 logger.warning("row='%s' does not contain element 'domain' - SKIPPED!", row)
146                 continue
147             elif row["domain"] == "":
148                 logger.debug("row[domain] is empty - SKIPPED!")
149                 continue
150
151             logger.debug("row[domain]='%s' - BEFORE!", row["domain"])
152             domain = row["domain"].encode("idna").decode("utf-8")
153             logger.debug("domain='%s' - AFTER!", domain)
154
155             if not utils.is_domain_wanted(domain):
156                 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
157                 continue
158             elif instances.is_registered(domain):
159                 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
160                 continue
161             elif instances.is_recent(domain):
162                 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
163                 continue
164
165             logger.debug("Fetching instances from domain='%s' ...", domain)
166             federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
167
168     except network.exceptions as exception:
169         logger.warning("Cannot fetch graphql,exception[%s]:'%s' - EXIT!", type(exception), str(exception))
170         return 103
171
172     logger.debug("Success! - EXIT!")
173     return 0
174
175 def fetch_bkali(args: argparse.Namespace) -> int:
176     logger.debug("args[]='%s' - CALLED!", type(args))
177
178     logger.debug("Invoking locking.acquire() ...")
179     locking.acquire()
180
181     source_domain = "gql.api.bka.li"
182     if sources.is_recent(source_domain):
183         logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
184         return 0
185     else:
186         logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
187         sources.update(source_domain)
188
189     domains = list()
190     try:
191         logger.info("Fetching domainlist from source_domain='%s' ...", source_domain)
192         fetched = network.post_json_api(
193             source_domain,
194             "/v1/graphql",
195             json.dumps({
196                 "query": "query domainlist {nodeinfo(order_by: {domain: asc}) {domain}}"
197             })
198         )
199
200         logger.debug("fetched[]='%s'", type(fetched))
201         if "error_message" in fetched:
202             logger.warning("post_json_api() for 'gql.sources.bka.li' returned error message='%s", fetched["error_message"])
203             return 100
204         elif isinstance(fetched["json"], dict) and "error" in fetched["json"] and "message" in fetched["json"]["error"]:
205             logger.warning("post_json_api() returned error: '%s", fetched["error"]["message"])
206             return 101
207
208         rows = fetched["json"]
209
210         logger.debug("rows(%d)[]='%s'", len(rows), type(rows))
211         if len(rows) == 0:
212             raise Exception("WARNING: Returned no records")
213         elif "data" not in rows:
214             raise Exception(f"WARNING: rows()={len(rows)} does not contain key 'data'")
215         elif "nodeinfo" not in rows["data"]:
216             raise Exception(f"WARNING: rows()={len(rows['data'])} does not contain key 'nodeinfo'")
217
218         for entry in rows["data"]["nodeinfo"]:
219             logger.debug("entry[%s]='%s'", type(entry), entry)
220             if "domain" not in entry:
221                 logger.warning("entry()=%d does not contain 'domain' - SKIPPED!", len(entry))
222                 continue
223             elif entry["domain"] == "":
224                 logger.debug("entry[domain] is empty - SKIPPED!")
225                 continue
226             elif not utils.is_domain_wanted(entry["domain"]):
227                 logger.debug("entry[domain]='%s' is not wanted - SKIPPED!", entry["domain"])
228                 continue
229             elif instances.is_registered(entry["domain"]):
230                 logger.debug("entry[domain]='%s' is already registered - SKIPPED!", entry["domain"])
231                 continue
232             elif instances.is_recent(entry["domain"]):
233                 logger.debug("entry[domain]='%s' has been recently crawled - SKIPPED!", entry["domain"])
234                 continue
235
236             logger.debug("Adding domain='%s' ...", entry["domain"])
237             domains.append(entry["domain"])
238
239     except network.exceptions as exception:
240         logger.warning("Cannot fetch graphql,exception[%s]:'%s' - EXIT!", type(exception), str(exception))
241         return 102
242
243     logger.debug("domains()=%d", len(domains))
244     if len(domains) > 0:
245         logger.info("Adding %d new instances ...", len(domains))
246         for domain in domains:
247             logger.debug("domain='%s' - BEFORE!", domain)
248             domain = domain.encode("idna").decode("utf-8")
249             logger.debug("domain='%s' - AFTER!", domain)
250
251             try:
252                 logger.info("Fetching instances from domain='%s' ...", domain)
253                 federation.fetch_instances(domain, 'tak.teleyal.blog', None, inspect.currentframe().f_code.co_name)
254             except network.exceptions as exception:
255                 logger.warning("Exception '%s' during fetching instances (fetch_bkali) from domain='%s'", type(exception), domain)
256                 instances.set_last_error(domain, exception)
257                 return 100
258
259     logger.debug("Success - EXIT!")
260     return 0
261
262 def fetch_blocks(args: argparse.Namespace) -> int:
263     logger.debug("args[]='%s' - CALLED!", type(args))
264     if args.domain is not None and args.domain != "":
265         logger.debug("args.domain='%s' - checking ...", args.domain)
266         if not validators.domain(args.domain):
267             logger.warning("args.domain='%s' is not valid.", args.domain)
268             return 100
269         elif blacklist.is_blacklisted(args.domain):
270             logger.warning("args.domain='%s' is blacklisted, won't check it!", args.domain)
271             return 101
272         elif not instances.is_registered(args.domain):
273             logger.warning("args.domain='%s' is not registered, please run ./utils.py fetch_instances '%s' first.", args.domain, args.domain)
274             return 102
275
276     logger.debug("Invoking locking.acquire() ...")
277     locking.acquire()
278
279     if args.domain is not None and args.domain != "":
280         # Re-check single domain
281         logger.debug("Querying database for single args.domain='%s' ...", args.domain)
282         database.cursor.execute(
283             "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE domain = ?", [args.domain]
284         )
285     elif args.software is not None and args.software != "":
286         # Re-check single software
287         logger.debug("Querying database for args.software='%s' ...", args.software)
288         database.cursor.execute(
289             "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software = ? AND nodeinfo_url IS NOT NULL", [args.software]
290         )
291     else:
292         # Re-check after "timeout" (aka. minimum interval)
293         database.cursor.execute(
294             "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'lemmy', 'friendica', 'misskey') AND (last_blocked IS NULL OR last_blocked < ?) AND nodeinfo_url IS NOT NULL ORDER BY rowid DESC", [time.time() - config.get("recheck_block")]
295         )
296
297     rows = database.cursor.fetchall()
298     logger.info("Checking %d entries ...", len(rows))
299     for blocker, software, origin, nodeinfo_url in rows:
300         logger.debug("blocker='%s',software='%s',origin='%s',nodeinfo_url='%s'", blocker, software, origin, nodeinfo_url)
301         blocker = tidyup.domain(blocker)
302         logger.debug("blocker='%s' - AFTER!", blocker)
303
304         if blocker == "":
305             logger.warning("blocker is now empty!")
306             continue
307         elif nodeinfo_url is None or nodeinfo_url == "":
308             logger.debug("blocker='%s',software='%s' has empty nodeinfo_url", blocker, software)
309             continue
310         elif not utils.is_domain_wanted(blocker):
311             logger.debug("blocker='%s' is not wanted - SKIPPED!", blocker)
312             continue
313
314         logger.debug("blocker='%s'", blocker)
315         instances.set_last_blocked(blocker)
316         instances.set_has_obfuscation(blocker, False)
317
318         blocking = list()
319         if software == "pleroma":
320             logger.info("blocker='%s',software='%s'", blocker, software)
321             blocking = pleroma.fetch_blocks(blocker, nodeinfo_url)
322         elif software == "mastodon":
323             logger.info("blocker='%s',software='%s'", blocker, software)
324             blocking = mastodon.fetch_blocks(blocker, nodeinfo_url)
325         elif software == "lemmy":
326             logger.info("blocker='%s',software='%s'", blocker, software)
327             blocking = lemmy.fetch_blocks(blocker, nodeinfo_url)
328         elif software == "friendica":
329             logger.info("blocker='%s',software='%s'", blocker, software)
330             blocking = friendica.fetch_blocks(blocker)
331         elif software == "misskey":
332             logger.info("blocker='%s',software='%s'", blocker, software)
333             blocking = misskey.fetch_blocks(blocker)
334         else:
335             logger.warning("Unknown software: blocker='%s',software='%s'", blocker, software)
336
337         logger.debug("blocker='%s'", blocker)
338         if blocker != "chaos.social":
339             logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
340             instances.set_total_blocks(blocker, blocking)
341
342         logger.info("Checking %d entries from blocker='%s',software='%s' ...", len(blocking), blocker, software)
343         blockdict = list()
344         for block in blocking:
345             logger.debug("blocked='%s',block_level='%s',reason='%s'", block["blocked"], block["block_level"], block["reason"])
346
347             if block["block_level"] == "":
348                 logger.warning("block_level is empty, blocker='%s',blocked='%s'", block["blocker"], block["blocked"])
349                 continue
350
351             logger.debug("blocked='%s',reason='%s' - BEFORE!", block["blocked"], block["reason"])
352             block["blocked"] = tidyup.domain(block["blocked"])
353             block["reason"]  = tidyup.reason(block["reason"]) if block["reason"] is not None and block["reason"] != "" else None
354             logger.debug("blocked='%s',reason='%s' - AFTER!", block["blocked"], block["reason"])
355
356             if block["blocked"] == "":
357                 logger.warning("blocked is empty, blocker='%s'", blocker)
358                 continue
359             elif block["blocked"].endswith(".onion"):
360                 logger.debug("blocked='%s' is a TOR .onion domain - SKIPPED", block["blocked"])
361                 continue
362             elif block["blocked"].endswith(".arpa"):
363                 logger.debug("blocked='%s' is a reverse IP address - SKIPPED", block["blocked"])
364                 continue
365             elif block["blocked"].endswith(".tld"):
366                 logger.debug("blocked='%s' is a fake domain - SKIPPED", block["blocked"])
367                 continue
368             elif block["blocked"].find("*") >= 0:
369                 logger.debug("blocker='%s' uses obfuscated domains", blocker)
370
371                 # Some friendica servers also obscure domains without hash
372                 row = instances.deobfuscate("*", block["blocked"], block["hash"] if "hash" in block else None)
373
374                 logger.debug("row[]='%s'", type(row))
375                 if row is None:
376                     logger.warning("Cannot deobfuscate blocked='%s',blocker='%s',software='%s' - SKIPPED!", block["blocked"], blocker, software)
377                     instances.set_has_obfuscation(blocker, True)
378                     continue
379
380                 block["blocked"] = row["domain"]
381                 origin           = row["origin"]
382                 nodeinfo_url     = row["nodeinfo_url"]
383             elif block["blocked"].find("?") >= 0:
384                 logger.debug("blocker='%s' uses obfuscated domains", blocker)
385
386                 # Some obscure them with question marks, not sure if that's dependent on version or not
387                 row = instances.deobfuscate("?", block["blocked"], block["hash"] if "hash" in block else None)
388
389                 logger.debug("row[]='%s'", type(row))
390                 if row is None:
391                     logger.warning("Cannot deobfuscate blocked='%s',blocker='%s',software='%s' - SKIPPED!", block["blocked"], blocker, software)
392                     instances.set_has_obfuscation(blocker, True)
393                     continue
394
395                 block["blocked"] = row["domain"]
396                 origin           = row["origin"]
397                 nodeinfo_url     = row["nodeinfo_url"]
398
399             logger.debug("Looking up instance by domainm, blocked='%s'", block["blocked"])
400             if block["blocked"] == "":
401                 logger.debug("block[blocked] is empty - SKIPPED!")
402                 continue
403
404             logger.debug("block[blocked]='%s' - BEFORE!", block["blocked"])
405             block["blocked"] = block["blocked"].lstrip(".").encode("idna").decode("utf-8")
406             logger.debug("block[blocked]='%s' - AFTER!", block["blocked"])
407
408             if not utils.is_domain_wanted(block["blocked"]):
409                 logger.debug("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
410                 continue
411             elif block["block_level"] in ["accept", "accepted"]:
412                 logger.debug("blocked='%s' is accepted, not wanted here - SKIPPED!", block["blocked"])
413                 continue
414             elif not instances.is_registered(block["blocked"]):
415                 logger.debug("Hash wasn't found, adding: blocked='%s',blocker='%s'", block["blocked"], blocker)
416                 federation.fetch_instances(block["blocked"], blocker, None, inspect.currentframe().f_code.co_name)
417
418             block["block_level"] = blocks.alias_block_level(block["block_level"])
419
420             if processing.block(blocker, block["blocked"], block["reason"], block["block_level"]) and block["block_level"] == "reject" and config.get("bot_enabled"):
421                 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], blocker)
422                 blockdict.append({
423                     "blocked": block["blocked"],
424                     "reason" : block["reason"],
425                 })
426
427             logger.debug("Invoking cookies.clear(%s) ...", block["blocked"])
428             cookies.clear(block["blocked"])
429
430         logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
431         if instances.has_pending(blocker):
432             logger.debug("Flushing updates for blocker='%s' ...", blocker)
433             instances.update_data(blocker)
434
435         logger.debug("Invoking commit() ...")
436         database.connection.commit()
437
438         logger.debug("Invoking cookies.clear(%s) ...", blocker)
439         cookies.clear(blocker)
440
441         logger.debug("config.get(bot_enabled)='%s',blockdict()=%d'", config.get("bot_enabled"), len(blockdict))
442         if config.get("bot_enabled") and len(blockdict) > 0:
443             logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
444             network.send_bot_post(blocker, blockdict)
445
446     logger.debug("Success! - EXIT!")
447     return 0
448
449 def fetch_observer(args: argparse.Namespace) -> int:
450     logger.debug("args[]='%s' - CALLED!", type(args))
451
452     logger.debug("Invoking locking.acquire() ...")
453     locking.acquire()
454
455     source_domain = "fediverse.observer"
456     if sources.is_recent(source_domain):
457         logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
458         return 0
459     else:
460         logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
461         sources.update(source_domain)
462
463     types = list()
464     if args.software is None:
465         logger.info("Fetching software list ...")
466         raw = utils.fetch_url(
467             f"https://{source_domain}",
468             network.web_headers,
469             (config.get("connection_timeout"), config.get("read_timeout"))
470         ).text
471         logger.debug("raw[%s]()=%d", type(raw), len(raw))
472
473         doc = bs4.BeautifulSoup(raw, features="html.parser")
474         logger.debug("doc[]='%s'", type(doc))
475
476         navbar = doc.find("div", {"aria-labelledby": "navbarDropdownMenuSoftwares"})
477         logger.debug("navbar[]='%s'", type(navbar))
478         if navbar is None:
479             logger.warning("Cannot find navigation bar, cannot continue!")
480             return 1
481
482         items = navbar.findAll("a", {"class": "dropdown-item"})
483         logger.debug("items[]='%s'", type(items))
484
485         logger.info("Checking %d menu items ...", len(items))
486         for item in items:
487             logger.debug("item[%s]='%s'", type(item), item)
488             if item.text.lower() == "all":
489                 logger.debug("Skipping 'All' menu entry ...")
490                 continue
491
492             logger.debug("Appending item.text='%s' ...", item.text)
493             types.append(tidyup.domain(item.text))
494     else:
495         logger.info("Adding args.software='%s' as type ...", args.software)
496         types.append(args.software)
497
498     logger.info("Fetching %d different table data ...", len(types))
499     for software in types:
500         logger.debug("software='%s' - BEFORE!", software)
501         if args.software is not None and args.software != software:
502             logger.debug("args.software='%s' does not match software='%s' - SKIPPED!", args.software, software)
503             continue
504
505         doc = None
506         try:
507             logger.debug("Fetching table data for software='%s' ...", software)
508             raw = utils.fetch_url(
509                 f"https://{source_domain}/app/views/tabledata.php?software={software}",
510                 network.web_headers,
511                 (config.get("connection_timeout"), config.get("read_timeout"))
512             ).text
513             logger.debug("raw[%s]()=%d", type(raw), len(raw))
514
515             doc = bs4.BeautifulSoup(raw, features="html.parser")
516             logger.debug("doc[]='%s'", type(doc))
517         except network.exceptions as exception:
518             logger.warning("Cannot fetch software='%s' from source_domain='%s': '%s'", software, source_domain, type(exception))
519             continue
520
521         items = doc.findAll("a", {"class": "url"})
522         logger.info("Checking %d items,software='%s' ...", len(items), software)
523         for item in items:
524             logger.debug("item[]='%s'", type(item))
525             domain = item.decode_contents()
526             logger.debug("domain='%s' - AFTER!", domain)
527
528             if domain == "":
529                 logger.debug("domain is empty - SKIPPED!")
530                 continue
531
532             logger.debug("domain='%s' - BEFORE!", domain)
533             domain = domain.encode("idna").decode("utf-8")
534             logger.debug("domain='%s' - AFTER!", domain)
535
536             if not utils.is_domain_wanted(domain):
537                 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
538                 continue
539             elif instances.is_registered(domain):
540                 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
541                 continue
542             elif instances.is_recent(domain):
543                 logger.debug("domain='%s' is recently being handled - SKIPPED!", domain)
544                 continue
545
546             software = software_helper.alias(software)
547             logger.info("Fetching instances for domain='%s'", domain)
548             federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
549
550     logger.debug("Success! - EXIT!")
551     return 0
552
553 def fetch_todon_wiki(args: argparse.Namespace) -> int:
554     logger.debug("args[]='%s' - CALLED!", type(args))
555
556     logger.debug("Invoking locking.acquire() ...")
557     locking.acquire()
558
559     source_domain = "wiki.todon.eu"
560     if sources.is_recent(source_domain):
561         logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
562         return 0
563     else:
564         logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
565         sources.update(source_domain)
566
567     blocklist = {
568         "silenced": list(),
569         "reject": list(),
570     }
571
572     raw = utils.fetch_url(f"https://{source_domain}/todon/domainblocks", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))).text
573     logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
574
575     doc = bs4.BeautifulSoup(raw, "html.parser")
576     logger.debug("doc[]='%s'", type(doc))
577
578     silenced = doc.find("h3", {"id": "silencedlimited_servers"}).find_next("ul").findAll("li")
579     logger.info("Checking %d silenced/limited entries ...", len(silenced))
580     blocklist["silenced"] = utils.find_domains(silenced, "div")
581
582     suspended = doc.find("h3", {"id": "suspended_servers"}).find_next("ul").findAll("li")
583     logger.info("Checking %d suspended entries ...", len(suspended))
584     blocklist["reject"] = utils.find_domains(suspended, "div")
585
586     blocking = blocklist["silenced"] + blocklist["reject"]
587     blocker = "todon.eu"
588
589     logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
590     instances.set_total_blocks(blocker, blocking)
591
592     blockdict = list()
593     for block_level in blocklist:
594         blockers = blocklist[block_level]
595
596         logger.debug("block_level='%s',blockers()=%d'", block_level, len(blockers))
597         for blocked in blockers:
598             logger.debug("blocked='%s'", blocked)
599
600             if not instances.is_registered(blocked):
601                 try:
602                     logger.info("Fetching instances from domain='%s' ...", blocked)
603                     federation.fetch_instances(blocked, blocker, None, inspect.currentframe().f_code.co_name)
604                 except network.exceptions as exception:
605                     logger.warning("Exception '%s' during fetching instances (fetch_cs) from blocked='%s'", type(exception), blocked)
606                     instances.set_last_error(blocked, exception)
607
608             if blocks.is_instance_blocked(blocker, blocked, block_level):
609                 logger.debug("blocked='%s',block_level='%s' is already blocked - SKIPPED!", blocked, block_level)
610                 continue
611
612             logger.info("Adding new block: blocked='%s',block_level='%s'", blocked, block_level)
613             if processing.block(blocker, blocked, None, block_level) and block_level == "reject" and config.get("bot_enabled"):
614                 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", blocked, block_level, blocker)
615                 blockdict.append({
616                     "blocked": blocked,
617                     "reason" : None,
618                 })
619
620         logger.debug("Invoking commit() ...")
621         database.connection.commit()
622
623         logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
624         if config.get("bot_enabled") and len(blockdict) > 0:
625             logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
626             network.send_bot_post(blocker, blockdict)
627
628     logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
629     if instances.has_pending(blocker):
630         logger.debug("Flushing updates for blocker='%s' ...", blocker)
631         instances.update_data(blocker)
632
633     logger.debug("Success! - EXIT!")
634     return 0
635
636 def fetch_cs(args: argparse.Namespace):
637     logger.debug("args[]='%s' - CALLED!", type(args))
638
639     logger.debug("Invoking locking.acquire() ...")
640     locking.acquire()
641
642     extensions = [
643         "extra",
644         "abbr",
645         "attr_list",
646         "def_list",
647         "fenced_code",
648         "footnotes",
649         "md_in_html",
650         "admonition",
651         "codehilite",
652         "legacy_attrs",
653         "legacy_em",
654         "meta",
655         "nl2br",
656         "sane_lists",
657         "smarty",
658         "toc",
659         "wikilinks"
660     ]
661
662     blocklist = {
663         "silenced": list(),
664         "reject"  : list(),
665     }
666
667     source_domain = "raw.githubusercontent.com"
668     if sources.is_recent(source_domain):
669         logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
670         return 0
671     else:
672         logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
673         sources.update(source_domain)
674
675     raw = utils.fetch_url(f"https://{source_domain}/chaossocial/meta/master/federation.md", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))).text
676     logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
677
678     doc = bs4.BeautifulSoup(markdown.markdown(raw, extensions=extensions), features="html.parser")
679     logger.debug("doc()=%d[]='%s'", len(doc), type(doc))
680
681     silenced = doc.find("h2", {"id": "silenced-instances"}).findNext("table").find("tbody")
682     logger.debug("silenced[%s]()=%d", type(silenced), len(silenced))
683     blocklist["silenced"] = federation.find_domains(silenced)
684
685     blocked = doc.find("h2", {"id": "blocked-instances"}).findNext("table").find("tbody")
686     logger.debug("blocked[%s]()=%d", type(blocked), len(blocked))
687     blocklist["reject"] = federation.find_domains(blocked)
688
689     blocking = blocklist["silenced"] + blocklist["reject"]
690     blocker = "chaos.social"
691
692     logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
693     instances.set_total_blocks(blocker, blocking)
694
695     logger.debug("blocklist[silenced]()=%d,blocklist[reject]()=%d", len(blocklist["silenced"]), len(blocklist["reject"]))
696     if len(blocking) > 0:
697         blockdict = list()
698         for block_level in blocklist:
699             logger.info("block_level='%s' has %d row(s)", block_level, len(blocklist[block_level]))
700
701             for row in blocklist[block_level]:
702                 logger.debug("row[%s]='%s'", type(row), row)
703                 if not "domain" in row:
704                     logger.warning("row[]='%s' has no element 'domain' - SKIPPED!", type(row))
705                     continue
706                 elif not instances.is_registered(row["domain"]):
707                     try:
708                         logger.info("Fetching instances from domain='%s' ...", row["domain"])
709                         federation.fetch_instances(row["domain"], blocker, None, inspect.currentframe().f_code.co_name)
710                     except network.exceptions as exception:
711                         logger.warning("Exception '%s' during fetching instances (fetch_cs) from row[domain]='%s'", type(exception), row["domain"])
712                         instances.set_last_error(row["domain"], exception)
713
714                 if processing.block(blocker, row["domain"], row["reason"], block_level) and block_level == "reject" and config.get("bot_enabled"):
715                     logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", row["domain"], block_level, blocker)
716                     blockdict.append({
717                         "blocked": row["domain"],
718                         "reason" : row["reason"],
719                     })
720
721         logger.debug("Invoking commit() ...")
722         database.connection.commit()
723
724         logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
725         if config.get("bot_enabled") and len(blockdict) > 0:
726             logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
727             network.send_bot_post(blocker, blockdict)
728
729     logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
730     if instances.has_pending(blocker):
731         logger.debug("Flushing updates for blocker='%s' ...", blocker)
732         instances.update_data(blocker)
733
734     logger.debug("Success! - EXIT!")
735     return 0
736
737 def fetch_fba_rss(args: argparse.Namespace) -> int:
738     logger.debug("args[]='%s' - CALLED!", type(args))
739
740     domains = list()
741
742     logger.debug("Invoking locking.acquire() ...")
743     locking.acquire()
744
745     components = urlparse(args.feed)
746
747     if sources.is_recent(components.netloc):
748         logger.info("API from components.netloc='%s' has recently being accessed - EXIT!", components.netloc)
749         return 0
750     else:
751         logger.debug("components.netloc='%s' has not been recently used, marking ...", components.netloc)
752         sources.update(components.netloc)
753
754     logger.info("Fetch FBA-specific RSS args.feed='%s' ...", args.feed)
755     response = utils.fetch_url(args.feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
756
757     logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
758     if response.ok and response.status_code < 300 and len(response.text) > 0:
759         logger.debug("Parsing RSS feed (%d Bytes) ...", len(response.text))
760         rss = atoma.parse_rss_bytes(response.content)
761
762         logger.debug("rss[]='%s'", type(rss))
763         for item in rss.items:
764             logger.debug("item='%s'", item)
765             domain = tidyup.domain(item.link.split("=")[1])
766
767             logger.debug("domain='%s' - AFTER!", domain)
768             if domain == "":
769                 logger.debug("domain is empty - SKIPPED!")
770                 continue
771
772             logger.debug("domain='%s' - BEFORE!", domain)
773             domain = domain.encode("idna").decode("utf-8")
774             logger.debug("domain='%s' - AFTER!", domain)
775
776             if not utils.is_domain_wanted(domain):
777                 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
778                 continue
779             elif domain in domains:
780                 logger.debug("domain='%s' is already added - SKIPPED!", domain)
781                 continue
782             elif instances.is_registered(domain):
783                 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
784                 continue
785             elif instances.is_recent(domain):
786                 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
787                 continue
788
789             logger.debug("Adding domain='%s'", domain)
790             domains.append(domain)
791
792     logger.debug("domains()=%d", len(domains))
793     if len(domains) > 0:
794         logger.info("Adding %d new instances ...", len(domains))
795         for domain in domains:
796             logger.debug("domain='%s'", domain)
797             try:
798                 logger.info("Fetching instances from domain='%s' ...", domain)
799                 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
800             except network.exceptions as exception:
801                 logger.warning("Exception '%s' during fetching instances (fetch_fba_rss) from domain='%s'", type(exception), domain)
802                 instances.set_last_error(domain, exception)
803                 return 100
804
805     logger.debug("Success! - EXIT!")
806     return 0
807
808 def fetch_fbabot_atom(args: argparse.Namespace) -> int:
809     logger.debug("args[]='%s' - CALLED!", type(args))
810
811     logger.debug("Invoking locking.acquire() ...")
812     locking.acquire()
813
814     source_domain = "ryona.agency"
815     if sources.is_recent(source_domain):
816         logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
817         return 0
818     else:
819         logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
820         sources.update(source_domain)
821
822     feed = f"https://{source_domain}/users/fba/feed.atom"
823
824     domains = list()
825
826     logger.info("Fetching ATOM feed='%s' from FBA bot account ...", feed)
827     response = utils.fetch_url(feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
828
829     logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
830     if response.ok and response.status_code < 300 and len(response.text) > 0:
831         logger.debug("Parsing ATOM feed (%d Bytes) ...", len(response.text))
832         atom = atoma.parse_atom_bytes(response.content)
833
834         logger.debug("atom[]='%s'", type(atom))
835         for entry in atom.entries:
836             logger.debug("entry[]='%s'", type(entry))
837             doc = bs4.BeautifulSoup(entry.content.value, "html.parser")
838             logger.debug("doc[]='%s'", type(doc))
839             for element in doc.findAll("a"):
840                 logger.debug("element[]='%s'", type(element))
841                 for href in element["href"].split(","):
842                     logger.debug("href[%s]='%s' - BEFORE!", type(href), href)
843                     domain = tidyup.domain(href)
844
845                     logger.debug("domain='%s' - AFTER!", domain)
846                     if domain == "":
847                         logger.debug("domain is empty - SKIPPED!")
848                         continue
849
850                     logger.debug("domain='%s' - BEFORE!", domain)
851                     domain = domain.encode("idna").decode("utf-8")
852                     logger.debug("domain='%s' - AFTER!", domain)
853
854                     if not utils.is_domain_wanted(domain):
855                         logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
856                         continue
857                     elif domain in domains:
858                         logger.debug("domain='%s' is already added - SKIPPED!", domain)
859                         continue
860                     elif instances.is_registered(domain):
861                         logger.debug("domain='%s' is already registered - SKIPPED!", domain)
862                         continue
863                     elif instances.is_recent(domain):
864                         logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
865                         continue
866
867                     logger.debug("Adding domain='%s',domains()=%d", domain, len(domains))
868                     domains.append(domain)
869
870     logger.debug("domains()=%d", len(domains))
871     if len(domains) > 0:
872         logger.info("Adding %d new instances ...", len(domains))
873         for domain in domains:
874             logger.debug("domain='%s'", domain)
875             try:
876                 logger.info("Fetching instances from domain='%s' ...", domain)
877                 federation.fetch_instances(domain, source_domain, None, inspect.currentframe().f_code.co_name)
878             except network.exceptions as exception:
879                 logger.warning("Exception '%s' during fetching instances (fetch_fbabot_atom) from domain='%s'", type(exception), domain)
880                 instances.set_last_error(domain, exception)
881                 return 100
882
883     logger.debug("Success! - EXIT!")
884     return 0
885
886 def fetch_instances(args: argparse.Namespace) -> int:
887     logger.debug("args[]='%s' - CALLED!", type(args))
888
889     logger.debug("args.domain='%s' - checking ...", args.domain)
890     if not validators.domain(args.domain):
891         logger.warning("args.domain='%s' is not valid.", args.domain)
892         return 100
893     elif blacklist.is_blacklisted(args.domain):
894         logger.warning("args.domain='%s' is blacklisted, won't check it!", args.domain)
895         return 101
896
897     logger.debug("Invoking locking.acquire() ...")
898     locking.acquire()
899
900     # Initial fetch
901     try:
902         logger.info("Fetching instances from args.domain='%s' ...", args.domain)
903         federation.fetch_instances(args.domain, None, None, inspect.currentframe().f_code.co_name)
904     except network.exceptions as exception:
905         logger.warning("Exception '%s' during fetching instances (fetch_instances) from args.domain='%s'", type(exception), args.domain)
906         instances.set_last_error(args.domain, exception)
907         instances.update_data(args.domain)
908         return 100
909
910     if args.single:
911         logger.debug("Not fetching more instances - EXIT!")
912         return 0
913
914     # Loop through some instances
915     database.cursor.execute(
916         "SELECT domain, origin, software, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'lemmy', 'peertube', 'takahe', 'gotosocial', 'brighteon', 'wildebeest', 'bookwyrm', 'mitra', 'areionskey') AND (last_instance_fetch IS NULL OR last_instance_fetch < ?) ORDER BY rowid DESC", [time.time() - config.get("recheck_instance")]
917     )
918
919     rows = database.cursor.fetchall()
920     logger.info("Checking %d entries ...", len(rows))
921     for row in rows:
922         logger.debug("row[domain]='%s'", row["domain"])
923         if row["domain"] == "":
924             logger.debug("row[domain] is empty - SKIPPED!")
925             continue
926
927         logger.debug("row[domain]='%s' - BEFORE!", row["domain"])
928         domain = row["domain"].encode("idna").decode("utf-8")
929         logger.debug("domain='%s' - AFTER!", domain)
930
931         if not utils.is_domain_wanted(domain):
932             logger.debug("Domain domain='%s' is not wanted - SKIPPED!", domain)
933             continue
934
935         try:
936             logger.info("Fetching instances for domain='%s',origin='%s',software='%s',nodeinfo_url='%s'", domain, row["origin"], row["software"], row["nodeinfo_url"])
937             federation.fetch_instances(domain, row["origin"], row["software"], inspect.currentframe().f_code.co_name, row["nodeinfo_url"])
938         except network.exceptions as exception:
939             logger.warning("Exception '%s' during fetching instances (fetch_instances) from domain='%s'", type(exception), domain)
940             instances.set_last_error(domain, exception)
941
942     logger.debug("Success - EXIT!")
943     return 0
944
945 def fetch_oliphant(args: argparse.Namespace) -> int:
946     logger.debug("args[]='%s' - CALLED!", type(args))
947
948     logger.debug("Invoking locking.acquire() ...")
949     locking.acquire()
950
951     source_domain = "codeberg.org"
952     if sources.is_recent(source_domain):
953         logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
954         return 0
955     else:
956         logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
957         sources.update(source_domain)
958
959     # Base URL
960     base_url = f"https://{source_domain}/oliphant/blocklists/raw/branch/main/blocklists"
961
962     # URLs to fetch
963     blocklists = (
964         {
965             "blocker": "artisan.chat",
966             "csv_url": "mastodon/artisan.chat.csv",
967         },{
968             "blocker": "mastodon.art",
969             "csv_url": "mastodon/mastodon.art.csv",
970         },{
971             "blocker": "pleroma.envs.net",
972             "csv_url": "mastodon/pleroma.envs.net.csv",
973         },{
974             "blocker": "oliphant.social",
975             "csv_url": "mastodon/_unified_tier3_blocklist.csv",
976         },{
977             "blocker": "mastodon.online",
978             "csv_url": "mastodon/mastodon.online.csv",
979         },{
980             "blocker": "mastodon.social",
981             "csv_url": "mastodon/mastodon.social.csv",
982         },{
983             "blocker": "mastodon.social",
984             "csv_url": "other/missing-tier0-mastodon.social.csv",
985         },{
986             "blocker": "rage.love",
987             "csv_url": "mastodon/rage.love.csv",
988         },{
989             "blocker": "sunny.garden",
990             "csv_url": "mastodon/sunny.garden.csv",
991         },{
992             "blocker": "sunny.garden",
993             "csv_url": "mastodon/gardenfence.csv",
994         },{
995             "blocker": "solarpunk.moe",
996             "csv_url": "mastodon/solarpunk.moe.csv",
997         },{
998             "blocker": "toot.wales",
999             "csv_url": "mastodon/toot.wales.csv",
1000         },{
1001             "blocker": "union.place",
1002             "csv_url": "mastodon/union.place.csv",
1003         },{
1004             "blocker": "oliphant.social",
1005             "csv_url": "mastodon/birdsite.csv",
1006         }
1007     )
1008
1009     domains = list()
1010
1011     logger.debug("Downloading %d files ...", len(blocklists))
1012     for block in blocklists:
1013         # Is domain given and not equal blocker?
1014         if isinstance(args.domain, str) and args.domain != block["blocker"]:
1015             logger.debug("Skipping blocker='%s', not matching args.domain='%s'", block["blocker"], args.domain)
1016             continue
1017         elif args.domain in domains:
1018             logger.debug("args.domain='%s' already handled - SKIPPED!", args.domain)
1019             continue
1020
1021         # Fetch this URL
1022         logger.info("Fetching csv_url='%s' for blocker='%s' ...", block["csv_url"], block["blocker"])
1023         response = utils.fetch_url(f"{base_url}/{block['csv_url']}", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
1024
1025         logger.debug("response.ok='%s',response.status_code=%d,response.content()=%d", response.ok, response.status_code, len(response.content))
1026         if not response.ok or response.status_code >= 300 or response.content == "":
1027             logger.warning("Could not fetch csv_url='%s' for blocker='%s' - SKIPPED!", block["csv_url"], block["blocker"])
1028             continue
1029
1030         logger.debug("Fetched %d Bytes, parsing CSV ...", len(response.content))
1031         reader = csv.DictReader(response.content.decode("utf-8").splitlines(), dialect="unix")
1032
1033         blockdict = list()
1034
1035         cnt = 0
1036         for row in reader:
1037             logger.debug("row[%s]='%s'", type(row), row)
1038             domain = severity = None
1039             reject_media = reject_reports = False
1040
1041             if "#domain" in row:
1042                 domain = row["#domain"]
1043             elif "domain" in row:
1044                 domain = row["domain"]
1045             else:
1046                 logger.debug("row='%s' does not contain domain column", row)
1047                 continue
1048
1049             if "#severity" in row:
1050                 severity = blocks.alias_block_level(row["#severity"])
1051             elif "severity" in row:
1052                 severity = blocks.alias_block_level(row["severity"])
1053             else:
1054                 logger.debug("row='%s' does not contain severity column", row)
1055                 continue
1056
1057             if "#reject_media" in row and row["#reject_media"].lower() == "true":
1058                 reject_media = True
1059             elif "reject_media" in row and row["reject_media"].lower() == "true":
1060                 reject_media = True
1061
1062             if "#reject_reports" in row and row["#reject_reports"].lower() == "true":
1063                 reject_reports = True
1064             elif "reject_reports" in row and row["reject_reports"].lower() == "true":
1065                 reject_reports = True
1066
1067             cnt = cnt + 1
1068             logger.debug("domain='%s',severity='%s',reject_media='%s',reject_reports='%s'", domain, severity, reject_media, reject_reports)
1069             if domain == "":
1070                 logger.debug("domain is empty - SKIPPED!")
1071                 continue
1072             elif domain.endswith(".onion"):
1073                 logger.debug("domain='%s' is a TOR .onion domain - SKIPPED", domain)
1074                 continue
1075             elif domain.endswith(".arpa"):
1076                 logger.debug("domain='%s' is a reverse IP address - SKIPPED", domain)
1077                 continue
1078             elif domain.endswith(".tld"):
1079                 logger.debug("domain='%s' is a fake domain - SKIPPED", domain)
1080                 continue
1081             elif domain.find("*") >= 0 or domain.find("?") >= 0:
1082                 logger.debug("domain='%s' is obfuscated - Invoking utils.deobfuscate(%s, %s) ...", domain, domain, block["blocker"])
1083                 domain = utils.deobfuscate(domain, block["blocker"])
1084                 logger.debug("domain='%s' - AFTER!", domain)
1085
1086             if not validators.domain(domain):
1087                 logger.debug("domain='%s' is not a valid domain - SKIPPED!")
1088                 continue
1089             elif blacklist.is_blacklisted(domain):
1090                 logger.warning("domain='%s' is blacklisted - SKIPPED!", domain)
1091                 continue
1092             elif blocks.is_instance_blocked(block["blocker"], domain, severity):
1093                 logger.debug("block[blocker]='%s' has already blocked domain='%s' with severity='%s' - SKIPPED!", block["blocker"], domain, severity)
1094                 continue
1095
1096             logger.debug("Marking domain='%s' as handled", domain)
1097             domains.append(domain)
1098
1099             logger.debug("Processing domain='%s' ...", domain)
1100             processed = processing.domain(domain, block["blocker"], inspect.currentframe().f_code.co_name)
1101             logger.debug("processed='%s'", processed)
1102
1103             if processing.block(block["blocker"], domain, None, severity) and config.get("bot_enabled"):
1104                 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", domain, block["block_level"], block["blocker"])
1105                 blockdict.append({
1106                     "blocked": domain,
1107                     "reason" : block["reason"],
1108                 })
1109
1110             if reject_media:
1111                 processing.block(block["blocker"], domain, None, "reject_media")
1112             if reject_reports:
1113                 processing.block(block["blocker"], domain, None, "reject_reports")
1114
1115         logger.debug("block[blocker]='%s'", block["blocker"])
1116         if block["blocker"] != "chaos.social":
1117             logger.debug("Invoking instances.set_total_blocks(%s, domains()=%d) ...", block["blocker"], len(domains))
1118             instances.set_total_blocks(block["blocker"], domains)
1119
1120         logger.debug("Checking if blocker='%s' has pending updates ...", block["blocker"])
1121         if instances.has_pending(block["blocker"]):
1122             logger.debug("Flushing updates for block[blocker]='%s' ...", block["blocker"])
1123             instances.update_data(block["blocker"])
1124
1125         logger.debug("Invoking commit() ...")
1126         database.connection.commit()
1127
1128         logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
1129         if config.get("bot_enabled") and len(blockdict) > 0:
1130             logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", block["blocker"], len(blockdict))
1131             network.send_bot_post(block["blocker"], blockdict)
1132
1133     logger.debug("Success! - EXIT!")
1134     return 0
1135
1136 def fetch_txt(args: argparse.Namespace) -> int:
1137     logger.debug("args[]='%s' - CALLED!", type(args))
1138
1139     logger.debug("Invoking locking.acquire() ...")
1140     locking.acquire()
1141
1142     # Static URLs
1143     urls = ({
1144         "blocker": "seirdy.one",
1145         "url"    : "https://seirdy.one/pb/bsl.txt",
1146     },)
1147
1148     logger.info("Checking %d text file(s) ...", len(urls))
1149     for row in urls:
1150         logger.debug("Fetching row[url]='%s' ...", row["url"])
1151         response = utils.fetch_url(row["url"], network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
1152
1153         logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
1154         if response.ok and response.status_code < 300 and response.text != "":
1155             logger.debug("Returned %d Bytes for processing", len(response.text.strip()))
1156             domains = response.text.split("\n")
1157
1158             logger.info("Processing %d domains ...", len(domains))
1159             for domain in domains:
1160                 logger.debug("domain='%s' - BEFORE!", domain)
1161                 domain = tidyup.domain(domain)
1162
1163                 logger.debug("domain='%s' - AFTER!", domain)
1164                 if domain == "":
1165                     logger.debug("domain is empty - SKIPPED!")
1166                     continue
1167                 elif not utils.is_domain_wanted(domain):
1168                     logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1169                     continue
1170                 elif instances.is_recent(domain):
1171                     logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1172                     continue
1173
1174                 logger.debug("Processing domain='%s',row[blocker]='%s'", domain, row["blocker"])
1175                 processed = processing.domain(domain, row["blocker"], inspect.currentframe().f_code.co_name)
1176
1177                 logger.debug("processed='%s'", processed)
1178                 if not processed:
1179                     logger.debug("domain='%s' was not generically processed - SKIPPED!", domain)
1180                     continue
1181
1182     logger.debug("Success! - EXIT!")
1183     return 0
1184
1185 def fetch_fedipact(args: argparse.Namespace) -> int:
1186     logger.debug("args[]='%s' - CALLED!", type(args))
1187
1188     logger.debug("Invoking locking.acquire() ...")
1189     locking.acquire()
1190
1191     source_domain = "fedipact.online"
1192     if sources.is_recent(source_domain):
1193         logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1194         return 0
1195     else:
1196         logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1197         sources.update(source_domain)
1198
1199     response = utils.fetch_url(
1200         f"https://{source_domain}",
1201         network.web_headers,
1202         (config.get("connection_timeout"), config.get("read_timeout"))
1203     )
1204
1205     logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
1206     if response.ok and response.status_code < 300 and response.text != "":
1207         logger.debug("Parsing %d Bytes ...", len(response.text))
1208
1209         doc = bs4.BeautifulSoup(response.text, "html.parser")
1210         logger.debug("doc[]='%s'", type(doc))
1211
1212         rows = doc.findAll("li")
1213         logger.info("Checking %d row(s) ...", len(rows))
1214         for row in rows:
1215             logger.debug("row[]='%s'", type(row))
1216             domain = tidyup.domain(row.contents[0])
1217
1218             logger.debug("domain='%s' - AFTER!", domain)
1219             if domain == "":
1220                 logger.debug("domain is empty - SKIPPED!")
1221                 continue
1222
1223             logger.debug("domain='%s' - BEFORE!", domain)
1224             domain = domain.encode("idna").decode("utf-8")
1225             logger.debug("domain='%s' - AFTER!", domain)
1226
1227             if not utils.is_domain_wanted(domain):
1228                 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1229                 continue
1230             elif instances.is_registered(domain):
1231                 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
1232                 continue
1233             elif instances.is_recent(domain):
1234                 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1235                 continue
1236
1237             logger.info("Fetching domain='%s' ...", domain)
1238             federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
1239
1240     logger.debug("Success! - EXIT!")
1241     return 0
1242
1243 def fetch_joinmisskey(args: argparse.Namespace) -> int:
1244     logger.debug("args[]='%s' - CALLED!", type(args))
1245
1246     logger.debug("Invoking locking.acquire() ...")
1247     locking.acquire()
1248
1249     source_domain = "instanceapp.misskey.page"
1250     if sources.is_recent(source_domain):
1251         logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1252         return 0
1253     else:
1254         logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1255         sources.update(source_domain)
1256
1257     raw = utils.fetch_url(
1258         f"https://{source_domain}/instances.json",
1259         network.web_headers,
1260         (config.get("connection_timeout"), config.get("read_timeout"))
1261     ).text
1262     logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
1263
1264     parsed = json.loads(raw)
1265     logger.debug("parsed[%s]()=%d", type(parsed), len(parsed))
1266
1267     if "instancesInfos" not in parsed:
1268         logger.warning("parsed()=%d does not contain element 'instancesInfos'")
1269         return 1
1270
1271     logger.info("Checking %d instane(s) ...", len(parsed["instancesInfos"]))
1272     for row in parsed["instancesInfos"]:
1273         logger.debug("row[%s]='%s'", type(row), row)
1274         if "url" not in row:
1275             logger.warning("row()=%d does not have element 'url' - SKIPPED!", len(row))
1276             continue
1277         elif not utils.is_domain_wanted(row["url"]):
1278             logger.debug("row[url]='%s' is not wanted - SKIPPED!", row["url"])
1279             continue
1280         elif instances.is_registered(row["url"]):
1281             logger.debug("row[url]='%s' is already registered - SKIPPED!", row["url"])
1282             continue
1283
1284         logger.info("Fetching row[url]='%s' ...", row["url"])
1285         federation.fetch_instances(row["url"], "misskey.io", None, inspect.currentframe().f_code.co_name)
1286
1287     logger.debug("Success! - EXIT!")
1288     return 0
1289
1290 def fetch_joinfediverse(args: argparse.Namespace) -> int:
1291     logger.debug("args[]='%s' - CALLED!", type(args))
1292
1293     logger.debug("Invoking locking.acquire() ...")
1294     locking.acquire()
1295
1296     source_domain = "joinfediverse.wiki"
1297     if sources.is_recent(source_domain):
1298         logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1299         return 0
1300     else:
1301         logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1302         sources.update(source_domain)
1303
1304     raw = utils.fetch_url(
1305         f"https://{source_domain}/FediBlock",
1306         network.web_headers,
1307         (config.get("connection_timeout"), config.get("read_timeout"))
1308     ).text
1309     logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
1310
1311     doc = bs4.BeautifulSoup(raw, "html.parser")
1312     logger.debug("doc[]='%s'", type(doc))
1313
1314     tables = doc.findAll("table", {"class": "wikitable"})
1315
1316     logger.info("Analyzing %d table(s) ...", len(tables))
1317     blocklist = list()
1318     for table in tables:
1319         logger.debug("table[]='%s'", type(table))
1320
1321         rows = table.findAll("tr")
1322         logger.info("Checking %d row(s) ...", len(rows))
1323         block_headers = dict()
1324         for row in rows:
1325             logger.debug("row[%s]='%s'", type(row), row)
1326
1327             headers = row.findAll("th")
1328             logger.debug("Found headers()=%d header(s)", len(headers))
1329             if len(headers) > 1:
1330                 block_headers = dict()
1331                 cnt = 0
1332                 for header in headers:
1333                     cnt = cnt + 1
1334                     logger.debug("header[]='%s',cnt=%d", type(header), cnt)
1335                     text = header.contents[0]
1336
1337                     logger.debug("text[]='%s'", type(text))
1338                     if not isinstance(text, str):
1339                         logger.debug("text[]='%s' is not of type 'str' - SKIPPED!", type(text))
1340                         continue
1341                     elif validators.domain(text.strip()):
1342                         logger.debug("text='%s' is a domain - SKIPPED!", text.strip())
1343                         continue
1344
1345                     text = tidyup.domain(text.strip())
1346                     logger.debug("text='%s' - AFTER!", text)
1347                     if text in ["domain", "instance", "subdomain(s)", "block reason(s)"]:
1348                         logger.debug("Found header: '%s'=%d", text, cnt)
1349                         block_headers[cnt] = text
1350
1351             elif len(block_headers) == 0:
1352                 logger.debug("row is not scrapable - SKIPPED!")
1353                 continue
1354             elif len(block_headers) > 0:
1355                 logger.debug("Found a row with %d scrapable headers ...", len(block_headers))
1356                 cnt = 0
1357                 block = dict()
1358
1359                 for element in row.find_all(["th", "td"]):
1360                     cnt = cnt + 1
1361                     logger.debug("element[]='%s',cnt=%d", type(element), cnt)
1362                     if cnt in block_headers:
1363                         logger.debug("block_headers[%d]='%s'", cnt, block_headers[cnt])
1364
1365                         text = element.text.strip()
1366                         key = block_headers[cnt] if block_headers[cnt] not in ["domain", "instance"] else "blocked"
1367
1368                         logger.debug("cnt=%d is wanted: key='%s',text[%s]='%s'", cnt, key, type(text), text)
1369                         if key in ["domain", "instance"]:
1370                             block[key] = text
1371                         elif key == "reason":
1372                             block[key] = tidyup.reason(text)
1373                         elif key == "subdomain(s)":
1374                             block[key] = list()
1375                             if text != "":
1376                                 block[key] = text.split("/")
1377                         else:
1378                             logger.debug("key='%s'", key)
1379                             block[key] = text
1380
1381                 logger.debug("block()=%d ...", len(block))
1382                 if len(block) > 0:
1383                     logger.debug("Appending block()=%d ...", len(block))
1384                     blocklist.append(block)
1385
1386     logger.debug("blocklist()=%d", len(blocklist))
1387
1388     database.cursor.execute("SELECT domain FROM instances WHERE domain LIKE 'climatejustice.%'")
1389     domains = database.cursor.fetchall()
1390
1391     logger.debug("domains(%d)[]='%s'", len(domains), type(domains))
1392     blocking = list()
1393     for block in blocklist:
1394         logger.debug("block='%s'", block)
1395         if "subdomain(s)" in block and len(block["subdomain(s)"]) > 0:
1396             origin = block["blocked"]
1397             logger.debug("origin='%s'", origin)
1398             for subdomain in block["subdomain(s)"]:
1399                 block["blocked"] = subdomain + "." + origin
1400                 logger.debug("block[blocked]='%s'", block["blocked"])
1401                 blocking.append(block)
1402         else:
1403             blocking.append(block)
1404
1405     logger.debug("blocking()=%d", blocking)
1406     for block in blocking:
1407         logger.debug("block[]='%s'", type(block))
1408         if "blocked" not in block:
1409             raise KeyError(f"block()={len(block)} does not have element 'blocked'")
1410
1411         block["blocked"] = tidyup.domain(block["blocked"]).encode("idna").decode("utf-8")
1412         logger.debug("block[blocked]='%s' - AFTER!", block["blocked"])
1413
1414         if block["blocked"] == "":
1415             logger.debug("block[blocked] is empty - SKIPPED!")
1416             continue
1417         elif not utils.is_domain_wanted(block["blocked"]):
1418             logger.debug("block[blocked]='%s' is not wanted - SKIPPED!", block["blocked"])
1419             continue
1420         elif instances.is_recent(block["blocked"]):
1421             logger.debug("block[blocked]='%s' has been recently checked - SKIPPED!", block["blocked"])
1422             continue
1423
1424         logger.info("Proccessing blocked='%s' ...", block["blocked"])
1425         processing.domain(block["blocked"], "climatejustice.social", inspect.currentframe().f_code.co_name)
1426
1427     blockdict = list()
1428     for blocker in domains:
1429         blocker = blocker[0]
1430         logger.debug("blocker[%s]='%s'", type(blocker), blocker)
1431
1432         for block in blocking:
1433             logger.debug("block[blocked]='%s',block[block reason(s)]='%s' - BEFORE!", block["blocked"], block["block reason(s)"] if "block reason(s)" in block else None)
1434             block["reason"] = tidyup.reason(block["block reason(s)"]) if "block reason(s)" in block else None
1435
1436             logger.debug("block[blocked]='%s',block[reason]='%s' - AFTER!", block["blocked"], block["reason"])
1437             if block["blocked"] == "":
1438                 logger.debug("block[blocked] is empty - SKIPPED!")
1439                 continue
1440             elif not utils.is_domain_wanted(block["blocked"]):
1441                 logger.debug("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
1442                 continue
1443
1444             logger.debug("blocked='%s',reason='%s'", block["blocked"], block["reason"])
1445             if processing.block(blocker, block["blocked"], block["reason"], "reject") and config.get("bot_enabled"):
1446                 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], blocker)
1447                 blockdict.append({
1448                     "blocked": block["blocked"],
1449                     "reason" : block["reason"],
1450                 })
1451
1452         if instances.has_pending(blocker):
1453             logger.debug("Flushing updates for blocker='%s' ...", blocker)
1454             instances.update_data(blocker)
1455
1456         logger.debug("Invoking commit() ...")
1457         database.connection.commit()
1458
1459         logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
1460         if config.get("bot_enabled") and len(blockdict) > 0:
1461             logger.info("Sending bot POST for blocker='%s,blockdict()=%d ...", blocker, len(blockdict))
1462             network.send_bot_post(blocker, blockdict)
1463
1464     logger.debug("Success! - EXIT!")
1465     return 0
1466
1467 def recheck_obfuscation(args: argparse.Namespace) -> int:
1468     logger.debug("args[]='%s' - CALLED!", type(args))
1469
1470     logger.debug("Invoking locking.acquire() ...")
1471     locking.acquire()
1472
1473     if isinstance(args.domain, str) and args.domain != "" and utils.is_domain_wanted(args.domain):
1474         database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1 AND domain = ?", [args.domain])
1475     elif isinstance(args.software, str) and args.software != "" and validators.domain(args.software) == args.software:
1476         database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1 AND software = ?", [args.software])
1477     else:
1478         database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1")
1479
1480     rows = database.cursor.fetchall()
1481     logger.info("Checking %d domains ...", len(rows))
1482     for row in rows:
1483         logger.debug("Fetching peers from domain='%s',software='%s',nodeinfo_url='%s' ...", row["domain"], row["software"], row["nodeinfo_url"])
1484         if (args.force is None or not args.force) and instances.is_recent(row["domain"]) and args.domain is None and args.software is None:
1485             logger.debug("row[domain]='%s' has been recently checked, args.force[]='%s' - SKIPPED!", row["domain"], type(args.force))
1486             continue
1487
1488         blocking = list()
1489         if row["software"] == "pleroma":
1490             logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1491             blocking = pleroma.fetch_blocks(row["domain"], row["nodeinfo_url"])
1492         elif row["software"] == "mastodon":
1493             logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1494             blocking = mastodon.fetch_blocks(row["domain"], row["nodeinfo_url"])
1495         elif row["software"] == "lemmy":
1496             logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1497             blocking = lemmy.fetch_blocks(row["domain"], row["nodeinfo_url"])
1498         elif row["software"] == "friendica":
1499             logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1500             blocking = friendica.fetch_blocks(row["domain"])
1501         elif row["software"] == "misskey":
1502             logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1503             blocking = misskey.fetch_blocks(row["domain"])
1504         else:
1505             logger.warning("Unknown sofware: domain='%s',software='%s'", row["domain"], row["software"])
1506
1507         logger.debug("row[domain]='%s'", row["domain"])
1508         # chaos.social requires special care ...
1509         if row["domain"] != "chaos.social":
1510             logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", row["domain"], len(blocking))
1511             instances.set_total_blocks(row["domain"], blocking)
1512
1513         obfuscated = 0
1514         blockdict = list()
1515
1516         logger.info("Checking %d block(s) from domain='%s' ...", len(blocking), row["domain"])
1517         for block in blocking:
1518             logger.debug("block[blocked]='%s'", block["blocked"])
1519             blocked = None
1520
1521             if block["blocked"] == "":
1522                 logger.debug("block[blocked] is empty - SKIPPED!")
1523                 continue
1524             elif block["blocked"].endswith(".arpa"):
1525                 logger.debug("blocked='%s' is a reversed IP address - SKIPPED!", block["blocked"])
1526                 continue
1527             elif block["blocked"].endswith(".tld"):
1528                 logger.debug("blocked='%s' is a fake domain name - SKIPPED!", block["blocked"])
1529                 continue
1530             elif block["blocked"].endswith(".onion"):
1531                 logger.debug("blocked='%s' is a TOR onion domain name - SKIPPED!", block["blocked"])
1532                 continue
1533             elif block["blocked"].find("*") >= 0 or block["blocked"].find("?") >= 0:
1534                 logger.debug("block='%s' is obfuscated.", block["blocked"])
1535                 obfuscated = obfuscated + 1
1536                 blocked = utils.deobfuscate(block["blocked"], row["domain"], block["hash"] if "hash" in block else None)
1537             elif not utils.is_domain_wanted(block["blocked"]):
1538                 logger.debug("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
1539                 continue
1540             elif blocks.is_instance_blocked(row["domain"], block["blocked"]):
1541                 logger.debug("blocked='%s' is already blocked - SKIPPED!", block["blocked"])
1542                 continue
1543
1544             logger.debug("blocked[%s]='%s',block[blocked]='%s'", type(blocked), blocked, block["blocked"])
1545             if blocked is not None and blocked != block["blocked"]:
1546                 logger.debug("blocked='%s' was deobfuscated to blocked='%s'", block["blocked"], blocked)
1547                 obfuscated = obfuscated - 1
1548                 if blocks.is_instance_blocked(row["domain"], blocked):
1549                     logger.debug("blocked='%s' is already blocked by domain='%s' - SKIPPED!", blocked, row["domain"])
1550                     continue
1551
1552                 block["block_level"] = blocks.alias_block_level(block["block_level"])
1553
1554                 logger.info("blocked='%s' has been deobfuscated to blocked='%s', adding ...", block["blocked"], blocked)
1555                 if processing.block(row["domain"], blocked, block["reason"], block["block_level"]) and block["block_level"] == "reject" and config.get("bot_enabled"):
1556                     logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], row["domain"])
1557                     blockdict.append({
1558                         "blocked": blocked,
1559                         "reason" : block["reason"],
1560                     })
1561
1562         logger.info("domain='%s' has %d obfuscated domain(s)", row["domain"], obfuscated)
1563         if obfuscated == 0 and len(blocking) > 0:
1564             logger.info("Block list from domain='%s' has been fully deobfuscated.", row["domain"])
1565             instances.set_has_obfuscation(row["domain"], False)
1566
1567         if instances.has_pending(row["domain"]):
1568             logger.debug("Flushing updates for blocker='%s' ...", row["domain"])
1569             instances.update_data(row["domain"])
1570
1571         logger.debug("Invoking commit() ...")
1572         database.connection.commit()
1573
1574         logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
1575         if config.get("bot_enabled") and len(blockdict) > 0:
1576             logger.info("Sending bot POST for blocker='%s,blockdict()=%d ...", row["domain"], len(blockdict))
1577             network.send_bot_post(row["domain"], blockdict)
1578
1579     logger.debug("Success! - EXIT!")
1580     return 0
1581
1582 def fetch_fedilist(args: argparse.Namespace) -> int:
1583     logger.debug("args[]='%s' - CALLED!", type(args))
1584
1585     logger.debug("Invoking locking.acquire() ...")
1586     locking.acquire()
1587
1588     source_domain = "demo.fedilist.com"
1589     if sources.is_recent(source_domain):
1590         logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1591         return 0
1592     else:
1593         logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1594         sources.update(source_domain)
1595
1596     url = f"http://{source_domain}/instance/csv?onion=not"
1597     if args.software is not None and args.software != "":
1598         logger.debug("args.software='%s'", args.software)
1599         url = f"http://{source_domain}/instance/csv?software={args.software}&onion=not"
1600
1601     logger.info("Fetching url='%s' ...", url)
1602     response = reqto.get(
1603         url,
1604         headers=network.web_headers,
1605         timeout=(config.get("connection_timeout"), config.get("read_timeout")),
1606         allow_redirects=False
1607     )
1608
1609     logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
1610     if not response.ok or response.status_code >= 300 or len(response.content) == 0:
1611         logger.warning("Failed fetching url='%s': response.ok='%s',response.status_code=%d,response.content()=%d - EXIT!", url, response.ok, response.status_code, len(response.text))
1612         return 1
1613
1614     reader = csv.DictReader(response.content.decode("utf-8").splitlines(), dialect="unix")
1615
1616     logger.debug("reader[]='%s'", type(reader))
1617     for row in reader:
1618         logger.debug("row[]='%s'", type(row))
1619         if "hostname" not in row:
1620             logger.warning("row()=%d has no element 'hostname' - SKIPPED!", len(row))
1621             continue
1622
1623         logger.debug("row[hostname]='%s' - BEFORE!", row["hostname"])
1624         domain = tidyup.domain(row["hostname"])
1625         logger.debug("domain='%s' - AFTER!", domain)
1626
1627         if domain == "":
1628             logger.debug("domain is empty after tidyup: row[hostname]='%s' - SKIPPED!", row["hostname"])
1629             continue
1630
1631         logger.debug("domain='%s' - BEFORE!", domain)
1632         domain = domain.encode("idna").decode("utf-8")
1633         logger.debug("domain='%s' - AFTER!", domain)
1634
1635         if not utils.is_domain_wanted(domain):
1636             logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1637             continue
1638         elif (args.force is None or not args.force) and instances.is_registered(domain):
1639             logger.debug("domain='%s' is already registered, --force not specified: args.force[]='%s'", domain, type(args.force))
1640             continue
1641         elif instances.is_recent(domain):
1642             logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1643             continue
1644
1645         logger.info("Fetching instances from domain='%s' ...", domain)
1646         federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
1647
1648     logger.debug("Success! - EXIT!")
1649     return 0
1650
1651 def update_nodeinfo(args: argparse.Namespace) -> int:
1652     logger.debug("args[]='%s' - CALLED!", type(args))
1653
1654     logger.debug("Invoking locking.acquire() ...")
1655     locking.acquire()
1656
1657     if args.domain is not None and args.domain != "":
1658         logger.debug("Fetching args.domain='%s'", args.domain)
1659         database.cursor.execute("SELECT domain, software FROM instances WHERE domain = ?", [args.domain])
1660     elif args.software is not None and args.software != "":
1661         logger.info("Fetching domains for args.software='%s'", args.software)
1662         database.cursor.execute("SELECT domain, software FROM instances WHERE software = ?", [args.software])
1663     else:
1664         logger.info("Fetching domains for recently updated ...")
1665         database.cursor.execute("SELECT domain, software FROM instances WHERE last_nodeinfo < ? OR last_nodeinfo IS NULL", [time.time() - config.get("recheck_nodeinfo")])
1666
1667     domains = database.cursor.fetchall()
1668
1669     logger.info("Checking %d domain(s) ...", len(domains))
1670     cnt = 0
1671     for row in domains:
1672         logger.debug("row[]='%s'", type(row))
1673         try:
1674             logger.info("Checking nodeinfo for row[domain]='%s',row[software]='%s' (%s%%) ...", row["domain"], row["software"], "{:5.1f}".format(cnt / len(domains) * 100))
1675             software = federation.determine_software(row["domain"])
1676
1677             logger.debug("Determined software='%s'", software)
1678             if (software != row["software"] and software is not None) or args.force is True:
1679                 logger.warning("Software type for row[domain]='%s' has changed from '%s' to '%s'!", row["domain"], row["software"], software)
1680                 instances.set_software(row["domain"], software)
1681
1682             instances.set_success(row["domain"])
1683         except network.exceptions as exception:
1684             logger.warning("Exception '%s' during updating nodeinfo for row[domain]='%s'", type(exception), row["domain"])
1685             instances.set_last_error(row["domain"], exception)
1686
1687         instances.set_last_nodeinfo(row["domain"])
1688         instances.update_data(row["domain"])
1689         cnt = cnt + 1
1690
1691     logger.debug("Success! - EXIT!")
1692     return 0
1693
1694 def fetch_instances_social(args: argparse.Namespace) -> int:
1695     logger.debug("args[]='%s' - CALLED!", type(args))
1696
1697     logger.debug("Invoking locking.acquire() ...")
1698     locking.acquire()
1699
1700     source_domain = "instances.social"
1701
1702     if config.get("instances_social_api_key") == "":
1703         logger.error("API key not set. Please set in your config.json file.")
1704         return 1
1705     elif sources.is_recent(source_domain):
1706         logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1707         return 0
1708     else:
1709         logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1710         sources.update(source_domain)
1711
1712     headers = {
1713         "Authorization": f"Bearer {config.get('instances_social_api_key')}",
1714     }
1715
1716     fetched = network.get_json_api(
1717         source_domain,
1718         "/api/1.0/instances/list?count=0&sort_by=name",
1719         headers,
1720         (config.get("connection_timeout"), config.get("read_timeout"))
1721     )
1722     logger.debug("fetched[]='%s'", type(fetched))
1723
1724     if "error_message" in fetched:
1725         logger.warning("Error during fetching API result: '%s' - EXIT!", fetched["error_message"])
1726         return 2
1727     elif "exception" in fetched:
1728         logger.warning("Exception '%s' during fetching API result - EXIT!", type(fetched["exception"]))
1729         return 3
1730     elif "json" not in fetched:
1731         logger.warning("fetched has no element 'json' - EXIT!")
1732         return 4
1733     elif "instances" not in fetched["json"]:
1734         logger.warning("fetched[row] has no element 'instances' - EXIT!")
1735         return 5
1736
1737     domains = list()
1738     rows = fetched["json"]["instances"]
1739
1740     logger.info("Checking %d row(s) ...", len(rows))
1741     for row in rows:
1742         logger.debug("row[]='%s'", type(row))
1743         domain = tidyup.domain(row["name"])
1744         logger.debug("domain='%s' - AFTER!", domain)
1745
1746         if domain == "":
1747             logger.debug("domain is empty - SKIPPED!")
1748             continue
1749
1750         logger.debug("domain='%s' - BEFORE!", domain)
1751         domain = domain.encode("idna").decode("utf-8")
1752         logger.debug("domain='%s' - AFTER!", domain)
1753
1754         if not utils.is_domain_wanted(domain):
1755             logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1756             continue
1757         elif domain in domains:
1758             logger.debug("domain='%s' is already added - SKIPPED!", domain)
1759             continue
1760         elif instances.is_registered(domain):
1761             logger.debug("domain='%s' is already registered - SKIPPED!", domain)
1762             continue
1763         elif instances.is_recent(domain):
1764             logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1765             continue
1766
1767         logger.info("Fetching instances from domain='%s'", domain)
1768         federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
1769
1770     logger.debug("Success! - EXIT!")
1771     return 0
1772
1773 def convert_idna(args: argparse.Namespace) -> int:
1774     logger.debug("args[]='%s' - CALLED!", type(args))
1775
1776     database.cursor.execute("SELECT domain FROM instances WHERE domain NOT LIKE '%xn--%' ORDER BY domain ASC")
1777     rows = database.cursor.fetchall()
1778
1779     logger.debug("rows[]='%s'", type(rows))
1780     instances.translate_idnas(rows, "domain")
1781
1782     database.cursor.execute("SELECT origin FROM instances WHERE origin NOT LIKE '%xn--%' ORDER BY origin ASC")
1783     rows = database.cursor.fetchall()
1784
1785     logger.debug("rows[]='%s'", type(rows))
1786     instances.translate_idnas(rows, "origin")
1787
1788     database.cursor.execute("SELECT blocker FROM blocks WHERE blocker NOT LIKE '%xn--%' ORDER BY blocker ASC")
1789     rows = database.cursor.fetchall()
1790
1791     logger.debug("rows[]='%s'", type(rows))
1792     blocks.translate_idnas(rows, "blocker")
1793
1794     database.cursor.execute("SELECT blocked FROM blocks WHERE blocked NOT LIKE '%xn--%' ORDER BY blocked ASC")
1795     rows = database.cursor.fetchall()
1796
1797     logger.debug("rows[]='%s'", type(rows))
1798     blocks.translate_idnas(rows, "blocked")
1799
1800     logger.debug("Success! - EXIT!")
1801     return 0