]> git.mxchange.org Git - fba.git/blob - fba/commands.py
Continued:
[fba.git] / fba / commands.py
1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
3 #
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
8 #
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 # GNU Affero General Public License for more details.
13 #
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program.  If not, see <https://www.gnu.org/licenses/>.
16
17 import csv
18 import inspect
19 import json
20 import logging
21 import time
22
23 from urllib.parse import urlparse
24
25 import argparse
26 import atoma
27 import bs4
28 import markdown
29 import reqto
30 import validators
31
32 from fba import csrf
33 from fba import database
34 from fba import utils
35
36 from fba.helpers import blacklist
37 from fba.helpers import config
38 from fba.helpers import cookies
39 from fba.helpers import locking
40 from fba.helpers import processing
41 from fba.helpers import software as software_helper
42 from fba.helpers import tidyup
43
44 from fba.http import federation
45 from fba.http import network
46
47 from fba.models import blocks
48 from fba.models import instances
49 from fba.models import sources
50
51 from fba.networks import friendica
52 from fba.networks import lemmy
53 from fba.networks import mastodon
54 from fba.networks import misskey
55 from fba.networks import pleroma
56
57 logging.basicConfig(level=logging.INFO)
58 logger = logging.getLogger(__name__)
59 #logger.setLevel(logging.DEBUG)
60
61 def check_instance(args: argparse.Namespace) -> int:
62     logger.debug("args.domain='%s' - CALLED!", args.domain)
63     status = 0
64     if not validators.domain(args.domain):
65         logger.warning("args.domain='%s' is not valid", args.domain)
66         status = 100
67     elif blacklist.is_blacklisted(args.domain):
68         logger.warning("args.domain='%s' is blacklisted", args.domain)
69         status = 101
70     elif instances.is_registered(args.domain):
71         logger.warning("args.domain='%s' is already registered", args.domain)
72         status = 102
73     else:
74         logger.info("args.domain='%s' is not known", args.domain)
75
76     logger.debug("status=%d - EXIT!", status)
77     return status
78
79 def check_nodeinfo(args: argparse.Namespace) -> int:
80     logger.debug("args[]='%s' - CALLED!", type(args))
81
82     # Fetch rows
83     database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE nodeinfo_url IS NOT NULL ORDER BY domain ASC")
84
85     cnt = 0
86     for row in database.cursor.fetchall():
87         logger.debug("Checking row[domain]='%s',row[software]='%s',row[nodeinfo_url]='%s' ...", row["domain"], row["software"], row["nodeinfo_url"])
88         punycode = row["domain"].encode("idna").decode("utf-8")
89
90         if row["nodeinfo_url"].startswith("/"):
91             logger.debug("row[nodeinfo_url]='%s' is a relative URL and always matches", row["nodeinfo_url"])
92             continue
93         elif row["nodeinfo_url"].find(punycode) == -1 and row["nodeinfo_url"].find(row["domain"]) == -1:
94             logger.warning("punycode='%s' is not found in row[nodeinfo_url]='%s',row[software]='%s'", punycode, row["nodeinfo_url"], row["software"])
95             cnt = cnt + 1
96
97     logger.info("Found %d row(s)", cnt)
98
99     logger.debug("EXIT!")
100     return 0
101
102 def fetch_pixelfed_api(args: argparse.Namespace) -> int:
103     logger.debug("args[]='%s' - CALLED!", type(args))
104
105     # No CSRF by default, you don't have to add network.source_headers by yourself here
106     headers = tuple()
107     source_domain = "pixelfed.org"
108
109     if sources.is_recent(source_domain):
110         logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
111         return 0
112     else:
113         logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
114         sources.update(source_domain)
115
116     try:
117         logger.debug("Checking CSRF from source_domain='%s' ...", source_domain)
118         headers = csrf.determine(source_domain, dict())
119     except network.exceptions as exception:
120         logger.warning("Exception '%s' during checking CSRF (fetch_peers,%s) - EXIT!", type(exception), __name__)
121         return list()
122
123     try:
124         logger.debug("Fetching JSON from pixelfed.org API, headers()=%d ...", len(headers))
125         fetched = network.get_json_api(
126             source_domain,
127             "/api/v1/servers/all.json?scope=All&country=all&language=all",
128             headers,
129             (config.get("connection_timeout"), config.get("read_timeout"))
130         )
131
132         logger.debug("JSON API returned %d elements", len(fetched))
133         if "error_message" in fetched:
134             logger.warning("API returned error_message='%s' - EXIT!", fetched["error_message"])
135             return 101
136         elif "data" not in fetched["json"]:
137             logger.warning("API did not return JSON with 'data' element - EXIT!")
138             return 102
139
140         rows = fetched["json"]["data"]
141         logger.info("Checking %d fetched rows ...", len(rows))
142         for row in rows:
143             logger.debug("row[]='%s'", type(row))
144             if "domain" not in row:
145                 logger.warning("row='%s' does not contain element 'domain' - SKIPPED!", row)
146                 continue
147             elif row["domain"] == "":
148                 logger.debug("row[domain] is empty - SKIPPED!")
149                 continue
150
151             logger.debug("row[domain]='%s' - BEFORE!", row["domain"])
152             domain = row["domain"].encode("idna").decode("utf-8")
153             logger.debug("domain='%s' - AFTER!", domain)
154
155             if not utils.is_domain_wanted(domain):
156                 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
157                 continue
158             elif instances.is_registered(domain):
159                 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
160                 continue
161             elif instances.is_recent(domain):
162                 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
163                 continue
164
165             logger.debug("Fetching instances from domain='%s' ...", domain)
166             federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
167
168     except network.exceptions as exception:
169         logger.warning("Cannot fetch graphql,exception[%s]:'%s' - EXIT!", type(exception), str(exception))
170         return 103
171
172     logger.debug("Success! - EXIT!")
173     return 0
174
175 def fetch_bkali(args: argparse.Namespace) -> int:
176     logger.debug("args[]='%s' - CALLED!", type(args))
177
178     logger.debug("Invoking locking.acquire() ...")
179     locking.acquire()
180
181     source_domain = "gql.api.bka.li"
182     if sources.is_recent(source_domain):
183         logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
184         return 0
185     else:
186         logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
187         sources.update(source_domain)
188
189     domains = list()
190     try:
191         logger.info("Fetching domainlist from source_domain='%s' ...", source_domain)
192         fetched = network.post_json_api(
193             source_domain,
194             "/v1/graphql",
195             json.dumps({
196                 "query": "query domainlist {nodeinfo(order_by: {domain: asc}) {domain}}"
197             })
198         )
199
200         logger.debug("fetched[]='%s'", type(fetched))
201         if "error_message" in fetched:
202             logger.warning("post_json_api() for 'gql.sources.bka.li' returned error message='%s", fetched["error_message"])
203             return 100
204         elif isinstance(fetched["json"], dict) and "error" in fetched["json"] and "message" in fetched["json"]["error"]:
205             logger.warning("post_json_api() returned error: '%s", fetched["error"]["message"])
206             return 101
207
208         rows = fetched["json"]
209
210         logger.debug("rows(%d)[]='%s'", len(rows), type(rows))
211         if len(rows) == 0:
212             raise Exception("WARNING: Returned no records")
213         elif "data" not in rows:
214             raise Exception(f"WARNING: rows()={len(rows)} does not contain key 'data'")
215         elif "nodeinfo" not in rows["data"]:
216             raise Exception(f"WARNING: rows()={len(rows['data'])} does not contain key 'nodeinfo'")
217
218         for entry in rows["data"]["nodeinfo"]:
219             logger.debug("entry[%s]='%s'", type(entry), entry)
220             if "domain" not in entry:
221                 logger.warning("entry()=%d does not contain 'domain' - SKIPPED!", len(entry))
222                 continue
223             elif entry["domain"] == "":
224                 logger.debug("entry[domain] is empty - SKIPPED!")
225                 continue
226             elif not utils.is_domain_wanted(entry["domain"]):
227                 logger.debug("entry[domain]='%s' is not wanted - SKIPPED!", entry["domain"])
228                 continue
229             elif instances.is_registered(entry["domain"]):
230                 logger.debug("entry[domain]='%s' is already registered - SKIPPED!", entry["domain"])
231                 continue
232             elif instances.is_recent(entry["domain"]):
233                 logger.debug("entry[domain]='%s' has been recently crawled - SKIPPED!", entry["domain"])
234                 continue
235
236             logger.debug("Adding domain='%s' ...", entry["domain"])
237             domains.append(entry["domain"])
238
239     except network.exceptions as exception:
240         logger.warning("Cannot fetch graphql,exception[%s]:'%s' - EXIT!", type(exception), str(exception))
241         return 102
242
243     logger.debug("domains()=%d", len(domains))
244     if len(domains) > 0:
245         logger.info("Adding %d new instances ...", len(domains))
246         for domain in domains:
247             logger.debug("domain='%s' - BEFORE!", domain)
248             domain = domain.encode("idna").decode("utf-8")
249             logger.debug("domain='%s' - AFTER!", domain)
250
251             try:
252                 logger.info("Fetching instances from domain='%s' ...", domain)
253                 federation.fetch_instances(domain, 'tak.teleyal.blog', None, inspect.currentframe().f_code.co_name)
254             except network.exceptions as exception:
255                 logger.warning("Exception '%s' during fetching instances (fetch_bkali) from domain='%s'", type(exception), domain)
256                 instances.set_last_error(domain, exception)
257                 return 100
258
259     logger.debug("Success - EXIT!")
260     return 0
261
262 def fetch_blocks(args: argparse.Namespace) -> int:
263     logger.debug("args[]='%s' - CALLED!", type(args))
264     if args.domain is not None and args.domain != "":
265         logger.debug("args.domain='%s' - checking ...", args.domain)
266         if not validators.domain(args.domain):
267             logger.warning("args.domain='%s' is not valid.", args.domain)
268             return 100
269         elif blacklist.is_blacklisted(args.domain):
270             logger.warning("args.domain='%s' is blacklisted, won't check it!", args.domain)
271             return 101
272         elif not instances.is_registered(args.domain):
273             logger.warning("args.domain='%s' is not registered, please run ./utils.py fetch_instances '%s' first.", args.domain, args.domain)
274             return 102
275
276     logger.debug("Invoking locking.acquire() ...")
277     locking.acquire()
278
279     if args.domain is not None and args.domain != "":
280         # Re-check single domain
281         logger.debug("Querying database for single args.domain='%s' ...", args.domain)
282         database.cursor.execute(
283             "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE domain = ?", [args.domain]
284         )
285     elif args.software is not None and args.software != "":
286         # Re-check single software
287         logger.debug("Querying database for args.software='%s' ...", args.software)
288         database.cursor.execute(
289             "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software = ? AND nodeinfo_url IS NOT NULL", [args.software]
290         )
291     else:
292         # Re-check after "timeout" (aka. minimum interval)
293         database.cursor.execute(
294             "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'lemmy', 'friendica', 'misskey') AND (last_blocked IS NULL OR last_blocked < ?) AND nodeinfo_url IS NOT NULL ORDER BY rowid DESC", [time.time() - config.get("recheck_block")]
295         )
296
297     rows = database.cursor.fetchall()
298     logger.info("Checking %d entries ...", len(rows))
299     for blocker, software, origin, nodeinfo_url in rows:
300         logger.debug("blocker='%s',software='%s',origin='%s',nodeinfo_url='%s'", blocker, software, origin, nodeinfo_url)
301         blocker = tidyup.domain(blocker)
302         logger.debug("blocker='%s' - AFTER!", blocker)
303
304         if blocker == "":
305             logger.warning("blocker is now empty!")
306             continue
307         elif nodeinfo_url is None or nodeinfo_url == "":
308             logger.debug("blocker='%s',software='%s' has empty nodeinfo_url", blocker, software)
309             continue
310         elif not utils.is_domain_wanted(blocker):
311             logger.debug("blocker='%s' is not wanted - SKIPPED!", blocker)
312             continue
313
314         logger.debug("blocker='%s'", blocker)
315         instances.set_last_blocked(blocker)
316         instances.set_has_obfuscation(blocker, False)
317
318         blocking = list()
319         if software == "pleroma":
320             logger.info("blocker='%s',software='%s'", blocker, software)
321             blocking = pleroma.fetch_blocks(blocker, nodeinfo_url)
322         elif software == "mastodon":
323             logger.info("blocker='%s',software='%s'", blocker, software)
324             blocking = mastodon.fetch_blocks(blocker, nodeinfo_url)
325         elif software == "lemmy":
326             logger.info("blocker='%s',software='%s'", blocker, software)
327             blocking = lemmy.fetch_blocks(blocker, nodeinfo_url)
328         elif software == "friendica":
329             logger.info("blocker='%s',software='%s'", blocker, software)
330             blocking = friendica.fetch_blocks(blocker)
331         elif software == "misskey":
332             logger.info("blocker='%s',software='%s'", blocker, software)
333             blocking = misskey.fetch_blocks(blocker)
334         else:
335             logger.warning("Unknown software: blocker='%s',software='%s'", blocker, software)
336
337         logger.debug("blocker='%s'", blocker)
338         if blocker != "chaos.social":
339             logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
340             instances.set_total_blocks(blocker, blocking)
341
342         logger.info("Checking %d entries from blocker='%s',software='%s' ...", len(blocking), blocker, software)
343         blockdict = list()
344         for block in blocking:
345             logger.debug("blocked='%s',block_level='%s',reason='%s'", block["blocked"], block["block_level"], block["reason"])
346
347             if block["block_level"] == "":
348                 logger.warning("block_level is empty, blocker='%s',blocked='%s'", block["blocker"], block["blocked"])
349                 continue
350
351             logger.debug("blocked='%s',reason='%s' - BEFORE!", block["blocked"], block["reason"])
352             block["blocked"] = tidyup.domain(block["blocked"])
353             block["reason"]  = tidyup.reason(block["reason"]) if block["reason"] is not None and block["reason"] != "" else None
354             logger.debug("blocked='%s',reason='%s' - AFTER!", block["blocked"], block["reason"])
355
356             if block["blocked"] == "":
357                 logger.warning("blocked is empty, blocker='%s'", blocker)
358                 continue
359             elif block["blocked"].endswith(".onion"):
360                 logger.debug("blocked='%s' is a TOR .onion domain - SKIPPED", block["blocked"])
361                 continue
362             elif block["blocked"].endswith(".arpa"):
363                 logger.debug("blocked='%s' is a reverse IP address - SKIPPED", block["blocked"])
364                 continue
365             elif block["blocked"].endswith(".tld"):
366                 logger.debug("blocked='%s' is a fake domain - SKIPPED", block["blocked"])
367                 continue
368             elif block["blocked"].find("*") >= 0:
369                 logger.debug("blocker='%s' uses obfuscated domains", blocker)
370
371                 # Some friendica servers also obscure domains without hash
372                 row = instances.deobfuscate("*", block["blocked"], block["hash"] if "hash" in block else None)
373
374                 logger.debug("row[]='%s'", type(row))
375                 if row is None:
376                     logger.warning("Cannot deobfuscate blocked='%s',blocker='%s',software='%s' - SKIPPED!", block["blocked"], blocker, software)
377                     instances.set_has_obfuscation(blocker, True)
378                     continue
379
380                 block["blocked"] = row["domain"]
381                 origin           = row["origin"]
382                 nodeinfo_url     = row["nodeinfo_url"]
383             elif block["blocked"].find("?") >= 0:
384                 logger.debug("blocker='%s' uses obfuscated domains", blocker)
385
386                 # Some obscure them with question marks, not sure if that's dependent on version or not
387                 row = instances.deobfuscate("?", block["blocked"], block["hash"] if "hash" in block else None)
388
389                 logger.debug("row[]='%s'", type(row))
390                 if row is None:
391                     logger.warning("Cannot deobfuscate blocked='%s',blocker='%s',software='%s' - SKIPPED!", block["blocked"], blocker, software)
392                     instances.set_has_obfuscation(blocker, True)
393                     continue
394
395                 block["blocked"] = row["domain"]
396                 origin           = row["origin"]
397                 nodeinfo_url     = row["nodeinfo_url"]
398
399             logger.debug("Looking up instance by domainm, blocked='%s'", block["blocked"])
400             if block["blocked"] == "":
401                 logger.debug("block[blocked] is empty - SKIPPED!")
402                 continue
403
404             logger.debug("block[blocked]='%s' - BEFORE!", block["blocked"])
405             block["blocked"] = block["blocked"].lstrip(".").encode("idna").decode("utf-8")
406             logger.debug("block[blocked]='%s' - AFTER!", block["blocked"])
407
408             if not utils.is_domain_wanted(block["blocked"]):
409                 logger.debug("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
410                 continue
411             elif block["block_level"] in ["accept", "accepted"]:
412                 logger.debug("blocked='%s' is accepted, not wanted here - SKIPPED!", block["blocked"])
413                 continue
414             elif not instances.is_registered(block["blocked"]):
415                 logger.debug("Hash wasn't found, adding: blocked='%s',blocker='%s'", block["blocked"], blocker)
416                 federation.fetch_instances(block["blocked"], blocker, None, inspect.currentframe().f_code.co_name)
417
418             block["block_level"] = blocks.alias_block_level(block["block_level"])
419
420             if processing.block(blocker, block["blocked"], block["reason"], block["block_level"]) and block["block_level"] == "reject" and config.get("bot_enabled"):
421                 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], blocker)
422                 blockdict.append({
423                     "blocked": block["blocked"],
424                     "reason" : block["reason"],
425                 })
426
427             logger.debug("Invoking cookies.clear(%s) ...", block["blocked"])
428             cookies.clear(block["blocked"])
429
430         logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
431         if instances.has_pending(blocker):
432             logger.debug("Flushing updates for blocker='%s' ...", blocker)
433             instances.update_data(blocker)
434
435         logger.debug("Invoking commit() ...")
436         database.connection.commit()
437
438         logger.debug("Invoking cookies.clear(%s) ...", blocker)
439         cookies.clear(blocker)
440
441         logger.debug("config.get(bot_enabled)='%s',blockdict()=%d'", config.get("bot_enabled"), len(blockdict))
442         if config.get("bot_enabled") and len(blockdict) > 0:
443             logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
444             network.send_bot_post(blocker, blockdict)
445
446     logger.debug("Success! - EXIT!")
447     return 0
448
449 def fetch_observer(args: argparse.Namespace) -> int:
450     logger.debug("args[]='%s' - CALLED!", type(args))
451
452     logger.debug("Invoking locking.acquire() ...")
453     locking.acquire()
454
455     source_domain = "fediverse.observer"
456     if sources.is_recent(source_domain):
457         logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
458         return 0
459     else:
460         logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
461         sources.update(source_domain)
462
463     types = list()
464     if args.software is None:
465         logger.info("Fetching software list ...")
466         raw = utils.fetch_url(
467             f"https://{source_domain}",
468             network.web_headers,
469             (config.get("connection_timeout"), config.get("read_timeout"))
470         ).text
471         logger.debug("raw[%s]()=%d", type(raw), len(raw))
472
473         doc = bs4.BeautifulSoup(raw, features="html.parser")
474         logger.debug("doc[]='%s'", type(doc))
475
476         navbar = doc.find("div", {"aria-labelledby": "navbarDropdownMenuSoftwares"})
477         logger.debug("navbar[]='%s'", type(navbar))
478         if navbar is None:
479             logger.warning("Cannot find navigation bar, cannot continue!")
480             return 1
481
482         items = navbar.findAll("a", {"class": "dropdown-item"})
483         logger.debug("items[]='%s'", type(items))
484
485         logger.info("Checking %d menu items ...", len(items))
486         for item in items:
487             logger.debug("item[%s]='%s'", type(item), item)
488             if item.text.lower() == "all":
489                 logger.debug("Skipping 'All' menu entry ...")
490                 continue
491
492             logger.debug("Appending item.text='%s' ...", item.text)
493             types.append(tidyup.domain(item.text))
494     else:
495         logger.info("Adding args.software='%s' as type ...", args.software)
496         types.append(args.software)
497
498     logger.info("Fetching %d different table data ...", len(types))
499     for software in types:
500         logger.debug("software='%s' - BEFORE!", software)
501         if args.software is not None and args.software != software:
502             logger.debug("args.software='%s' does not match software='%s' - SKIPPED!", args.software, software)
503             continue
504
505         doc = None
506         try:
507             logger.debug("Fetching table data for software='%s' ...", software)
508             raw = utils.fetch_url(
509                 f"https://{source_domain}/app/views/tabledata.php?software={software}",
510                 network.web_headers,
511                 (config.get("connection_timeout"), config.get("read_timeout"))
512             ).text
513             logger.debug("raw[%s]()=%d", type(raw), len(raw))
514
515             doc = bs4.BeautifulSoup(raw, features="html.parser")
516             logger.debug("doc[]='%s'", type(doc))
517         except network.exceptions as exception:
518             logger.warning("Cannot fetch software='%s' from source_domain='%s': '%s'", software, source_domain, type(exception))
519             continue
520
521         items = doc.findAll("a", {"class": "url"})
522         logger.info("Checking %d items,software='%s' ...", len(items), software)
523         for item in items:
524             logger.debug("item[]='%s'", type(item))
525             domain = item.decode_contents()
526             logger.debug("domain='%s' - AFTER!", domain)
527
528             if domain == "":
529                 logger.debug("domain is empty - SKIPPED!")
530                 continue
531
532             logger.debug("domain='%s' - BEFORE!", domain)
533             domain = domain.encode("idna").decode("utf-8")
534             logger.debug("domain='%s' - AFTER!", domain)
535
536             if not utils.is_domain_wanted(domain):
537                 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
538                 continue
539             elif instances.is_registered(domain):
540                 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
541                 continue
542             elif instances.is_recent(domain):
543                 logger.debug("domain='%s' is recently being handled - SKIPPED!", domain)
544                 continue
545
546             software = software_helper.alias(software)
547             logger.info("Fetching instances for domain='%s'", domain)
548             federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
549
550     logger.debug("Success! - EXIT!")
551     return 0
552
553 def fetch_todon_wiki(args: argparse.Namespace) -> int:
554     logger.debug("args[]='%s' - CALLED!", type(args))
555
556     logger.debug("Invoking locking.acquire() ...")
557     locking.acquire()
558
559     source_domain = "wiki.todon.eu"
560     if sources.is_recent(source_domain):
561         logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
562         return 0
563     else:
564         logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
565         sources.update(source_domain)
566
567     blocklist = {
568         "silenced": list(),
569         "reject": list(),
570     }
571
572     raw = utils.fetch_url(f"https://{source_domain}/todon/domainblocks", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))).text
573     logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
574
575     doc = bs4.BeautifulSoup(raw, "html.parser")
576     logger.debug("doc[]='%s'", type(doc))
577
578     silenced = doc.find("h3", {"id": "silencedlimited_servers"}).find_next("ul").findAll("li")
579     logger.info("Checking %d silenced/limited entries ...", len(silenced))
580     blocklist["silenced"] = utils.find_domains(silenced, "div")
581
582     suspended = doc.find("h3", {"id": "suspended_servers"}).find_next("ul").findAll("li")
583     logger.info("Checking %d suspended entries ...", len(suspended))
584     blocklist["reject"] = utils.find_domains(suspended, "div")
585
586     blocking = blocklist["silenced"] + blocklist["reject"]
587     blocker = "todon.eu"
588
589     logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
590     instances.set_total_blocks(blocker, blocking)
591
592     blockdict = list()
593     for block_level in blocklist:
594         blockers = blocklist[block_level]
595
596         logger.debug("block_level='%s',blockers()=%d'", block_level, len(blockers))
597         for blocked in blockers:
598             logger.debug("blocked='%s'", blocked)
599
600             if not instances.is_registered(blocked):
601                 try:
602                     logger.info("Fetching instances from domain='%s' ...", blocked)
603                     federation.fetch_instances(blocked, blocker, None, inspect.currentframe().f_code.co_name)
604                 except network.exceptions as exception:
605                     logger.warning("Exception '%s' during fetching instances (fetch_cs) from blocked='%s'", type(exception), blocked)
606                     instances.set_last_error(blocked, exception)
607
608             if blocks.is_instance_blocked(blocker, blocked, block_level):
609                 logger.debug("blocked='%s',block_level='%s' is already blocked - SKIPPED!", blocked, block_level)
610                 continue
611
612             logger.info("Adding new block: blocked='%s',block_level='%s'", blocked, block_level)
613             if processing.block(blocker, blocked, None, block_level) and block_level == "reject" and config.get("bot_enabled"):
614                 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", blocked, block_level, blocker)
615                 blockdict.append({
616                     "blocked": blocked,
617                     "reason" : None,
618                 })
619
620         logger.debug("Invoking commit() ...")
621         database.connection.commit()
622
623         logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
624         if config.get("bot_enabled") and len(blockdict) > 0:
625             logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
626             network.send_bot_post(blocker, blockdict)
627
628     logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
629     if instances.has_pending(blocker):
630         logger.debug("Flushing updates for blocker='%s' ...", blocker)
631         instances.update_data(blocker)
632
633     logger.debug("Success! - EXIT!")
634     return 0
635
636 def fetch_cs(args: argparse.Namespace):
637     logger.debug("args[]='%s' - CALLED!", type(args))
638
639     logger.debug("Invoking locking.acquire() ...")
640     locking.acquire()
641
642     extensions = [
643         "extra",
644         "abbr",
645         "attr_list",
646         "def_list",
647         "fenced_code",
648         "footnotes",
649         "md_in_html",
650         "admonition",
651         "codehilite",
652         "legacy_attrs",
653         "legacy_em",
654         "meta",
655         "nl2br",
656         "sane_lists",
657         "smarty",
658         "toc",
659         "wikilinks"
660     ]
661
662     blocklist = {
663         "silenced": list(),
664         "reject"  : list(),
665     }
666
667     source_domain = "raw.githubusercontent.com"
668     if sources.is_recent(source_domain):
669         logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
670         return 0
671     else:
672         logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
673         sources.update(source_domain)
674
675     raw = utils.fetch_url(f"https://{source_domain}/chaossocial/meta/master/federation.md", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))).text
676     logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
677
678     doc = bs4.BeautifulSoup(markdown.markdown(raw, extensions=extensions), features="html.parser")
679     logger.debug("doc()=%d[]='%s'", len(doc), type(doc))
680
681     silenced = doc.find("h2", {"id": "silenced-instances"}).findNext("table").find("tbody")
682     logger.debug("silenced[%s]()=%d", type(silenced), len(silenced))
683     blocklist["silenced"] = federation.find_domains(silenced)
684
685     blocked = doc.find("h2", {"id": "blocked-instances"}).findNext("table").find("tbody")
686     logger.debug("blocked[%s]()=%d", type(blocked), len(blocked))
687     blocklist["reject"] = federation.find_domains(blocked)
688
689     blocking = blocklist["silenced"] + blocklist["reject"]
690     blocker = "chaos.social"
691
692     logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
693     instances.set_total_blocks(blocker, blocking)
694
695     logger.debug("blocklist[silenced]()=%d,blocklist[reject]()=%d", len(blocklist["silenced"]), len(blocklist["reject"]))
696     if len(blocking) > 0:
697         blockdict = list()
698         for block_level in blocklist:
699             logger.info("block_level='%s' has %d row(s)", block_level, len(blocklist[block_level]))
700
701             for row in blocklist[block_level]:
702                 logger.debug("row[%s]='%s'", type(row), row)
703                 if not "domain" in row:
704                     logger.warning("row[]='%s' has no element 'domain' - SKIPPED!", type(row))
705                     continue
706                 elif not instances.is_registered(row["domain"]):
707                     try:
708                         logger.info("Fetching instances from domain='%s' ...", row["domain"])
709                         federation.fetch_instances(row["domain"], blocker, None, inspect.currentframe().f_code.co_name)
710                     except network.exceptions as exception:
711                         logger.warning("Exception '%s' during fetching instances (fetch_cs) from row[domain]='%s'", type(exception), row["domain"])
712                         instances.set_last_error(row["domain"], exception)
713
714                 if processing.block(blocker, row["domain"], row["reason"], block_level) and block_level == "reject" and config.get("bot_enabled"):
715                     logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", row["domain"], block_level, blocker)
716                     blockdict.append({
717                         "blocked": row["domain"],
718                         "reason" : row["reason"],
719                     })
720
721         logger.debug("Invoking commit() ...")
722         database.connection.commit()
723
724         logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
725         if config.get("bot_enabled") and len(blockdict) > 0:
726             logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
727             network.send_bot_post(blocker, blockdict)
728
729     logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
730     if instances.has_pending(blocker):
731         logger.debug("Flushing updates for blocker='%s' ...", blocker)
732         instances.update_data(blocker)
733
734     logger.debug("Success! - EXIT!")
735     return 0
736
737 def fetch_fba_rss(args: argparse.Namespace) -> int:
738     logger.debug("args[]='%s' - CALLED!", type(args))
739
740     domains = list()
741
742     logger.debug("Invoking locking.acquire() ...")
743     locking.acquire()
744
745     components = urlparse(args.feed)
746
747     if sources.is_recent(components.netloc):
748         logger.info("API from components.netloc='%s' has recently being accessed - EXIT!", components.netloc)
749         return 0
750     else:
751         logger.debug("components.netloc='%s' has not been recently used, marking ...", components.netloc)
752         sources.update(components.netloc)
753
754     logger.info("Fetch FBA-specific RSS args.feed='%s' ...", args.feed)
755     response = utils.fetch_url(args.feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
756
757     logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
758     if response.ok and response.status_code < 300 and len(response.text) > 0:
759         logger.debug("Parsing RSS feed (%d Bytes) ...", len(response.text))
760         rss = atoma.parse_rss_bytes(response.content)
761
762         logger.debug("rss[]='%s'", type(rss))
763         for item in rss.items:
764             logger.debug("item[%s]='%s'", type(item), item)
765             domain = tidyup.domain(item.link.split("=")[1])
766
767             logger.debug("domain='%s' - AFTER!", domain)
768             if domain == "":
769                 logger.debug("domain is empty - SKIPPED!")
770                 continue
771
772             logger.debug("domain='%s' - BEFORE!", domain)
773             domain = domain.encode("idna").decode("utf-8")
774             logger.debug("domain='%s' - AFTER!", domain)
775
776             if not utils.is_domain_wanted(domain):
777                 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
778                 continue
779             elif domain in domains:
780                 logger.debug("domain='%s' is already added - SKIPPED!", domain)
781                 continue
782             elif instances.is_registered(domain):
783                 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
784                 continue
785             elif instances.is_recent(domain):
786                 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
787                 continue
788
789             logger.debug("Adding domain='%s'", domain)
790             domains.append(domain)
791
792     logger.debug("domains()=%d", len(domains))
793     if len(domains) > 0:
794         logger.info("Adding %d new instances ...", len(domains))
795         for domain in domains:
796             logger.debug("domain='%s'", domain)
797             try:
798                 logger.info("Fetching instances from domain='%s' ...", domain)
799                 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
800             except network.exceptions as exception:
801                 logger.warning("Exception '%s' during fetching instances (fetch_fba_rss) from domain='%s'", type(exception), domain)
802                 instances.set_last_error(domain, exception)
803                 return 100
804
805     logger.debug("Success! - EXIT!")
806     return 0
807
808 def fetch_fbabot_atom(args: argparse.Namespace) -> int:
809     logger.debug("args[]='%s' - CALLED!", type(args))
810
811     logger.debug("Invoking locking.acquire() ...")
812     locking.acquire()
813
814     source_domain = "ryona.agency"
815     if sources.is_recent(source_domain):
816         logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
817         return 0
818     else:
819         logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
820         sources.update(source_domain)
821
822     feed = f"https://{source_domain}/users/fba/feed.atom"
823
824     domains = list()
825
826     logger.info("Fetching ATOM feed='%s' from FBA bot account ...", feed)
827     response = utils.fetch_url(feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
828
829     logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
830     if response.ok and response.status_code < 300 and len(response.text) > 0:
831         logger.debug("Parsing ATOM feed (%d Bytes) ...", len(response.text))
832         atom = atoma.parse_atom_bytes(response.content)
833
834         logger.debug("atom[]='%s'", type(atom))
835         for entry in atom.entries:
836             logger.debug("entry[]='%s'", type(entry))
837             doc = bs4.BeautifulSoup(entry.content.value, "html.parser")
838             logger.debug("doc[]='%s'", type(doc))
839             for element in doc.findAll("a"):
840                 logger.debug("element[]='%s'", type(element))
841                 for href in element["href"].split(","):
842                     logger.debug("href[%s]='%s' - BEFORE!", type(href), href)
843                     domain = tidyup.domain(href)
844
845                     logger.debug("domain='%s' - AFTER!", domain)
846                     if domain == "":
847                         logger.debug("domain is empty - SKIPPED!")
848                         continue
849
850                     logger.debug("domain='%s' - BEFORE!", domain)
851                     domain = domain.encode("idna").decode("utf-8")
852                     logger.debug("domain='%s' - AFTER!", domain)
853
854                     if not utils.is_domain_wanted(domain):
855                         logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
856                         continue
857                     elif domain in domains:
858                         logger.debug("domain='%s' is already added - SKIPPED!", domain)
859                         continue
860                     elif instances.is_registered(domain):
861                         logger.debug("domain='%s' is already registered - SKIPPED!", domain)
862                         continue
863                     elif instances.is_recent(domain):
864                         logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
865                         continue
866
867                     logger.debug("Adding domain='%s',domains()=%d", domain, len(domains))
868                     domains.append(domain)
869
870     logger.debug("domains()=%d", len(domains))
871     if len(domains) > 0:
872         logger.info("Adding %d new instances ...", len(domains))
873         for domain in domains:
874             logger.debug("domain='%s'", domain)
875             try:
876                 logger.info("Fetching instances from domain='%s' ...", domain)
877                 federation.fetch_instances(domain, source_domain, None, inspect.currentframe().f_code.co_name)
878             except network.exceptions as exception:
879                 logger.warning("Exception '%s' during fetching instances (fetch_fbabot_atom) from domain='%s'", type(exception), domain)
880                 instances.set_last_error(domain, exception)
881                 return 100
882
883     logger.debug("Success! - EXIT!")
884     return 0
885
886 def fetch_instances(args: argparse.Namespace) -> int:
887     logger.debug("args[]='%s' - CALLED!", type(args))
888
889     logger.debug("args.domain='%s' - checking ...", args.domain)
890     if not validators.domain(args.domain):
891         logger.warning("args.domain='%s' is not valid.", args.domain)
892         return 100
893     elif blacklist.is_blacklisted(args.domain):
894         logger.warning("args.domain='%s' is blacklisted, won't check it!", args.domain)
895         return 101
896
897     logger.debug("Invoking locking.acquire() ...")
898     locking.acquire()
899
900     # Initial fetch
901     try:
902         logger.info("Fetching instances from args.domain='%s' ...", args.domain)
903         federation.fetch_instances(args.domain, None, None, inspect.currentframe().f_code.co_name)
904     except network.exceptions as exception:
905         logger.warning("Exception '%s' during fetching instances (fetch_instances) from args.domain='%s'", type(exception), args.domain)
906         instances.set_last_error(args.domain, exception)
907         instances.update_data(args.domain)
908         return 100
909
910     if args.single:
911         logger.debug("Not fetching more instances - EXIT!")
912         return 0
913
914     # Loop through some instances
915     database.cursor.execute(
916         "SELECT domain, origin, software, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'lemmy', 'peertube', 'takahe', 'gotosocial', 'brighteon', 'wildebeest', 'bookwyrm', 'mitra', 'areionskey') AND (last_instance_fetch IS NULL OR last_instance_fetch < ?) ORDER BY rowid DESC", [time.time() - config.get("recheck_instance")]
917     )
918
919     rows = database.cursor.fetchall()
920     logger.info("Checking %d entries ...", len(rows))
921     for row in rows:
922         logger.debug("row[domain]='%s'", row["domain"])
923         if row["domain"] == "":
924             logger.debug("row[domain] is empty - SKIPPED!")
925             continue
926
927         logger.debug("row[domain]='%s' - BEFORE!", row["domain"])
928         domain = row["domain"].encode("idna").decode("utf-8")
929         logger.debug("domain='%s' - AFTER!", domain)
930
931         if not utils.is_domain_wanted(domain):
932             logger.debug("Domain domain='%s' is not wanted - SKIPPED!", domain)
933             continue
934
935         try:
936             logger.info("Fetching instances for domain='%s',origin='%s',software='%s',nodeinfo_url='%s'", domain, row["origin"], row["software"], row["nodeinfo_url"])
937             federation.fetch_instances(domain, row["origin"], row["software"], inspect.currentframe().f_code.co_name, row["nodeinfo_url"])
938         except network.exceptions as exception:
939             logger.warning("Exception '%s' during fetching instances (fetch_instances) from domain='%s'", type(exception), domain)
940             instances.set_last_error(domain, exception)
941
942     logger.debug("Success - EXIT!")
943     return 0
944
945 def fetch_oliphant(args: argparse.Namespace) -> int:
946     logger.debug("args[]='%s' - CALLED!", type(args))
947
948     logger.debug("Invoking locking.acquire() ...")
949     locking.acquire()
950
951     source_domain = "codeberg.org"
952     if sources.is_recent(source_domain):
953         logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
954         return 0
955     else:
956         logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
957         sources.update(source_domain)
958
959     # Base URL
960     base_url = f"https://{source_domain}/oliphant/blocklists/raw/branch/main/blocklists"
961
962     # URLs to fetch
963     blocklists = (
964         {
965             "blocker": "artisan.chat",
966             "csv_url": "mastodon/artisan.chat.csv",
967         },{
968             "blocker": "mastodon.art",
969             "csv_url": "mastodon/mastodon.art.csv",
970         },{
971             "blocker": "pleroma.envs.net",
972             "csv_url": "mastodon/pleroma.envs.net.csv",
973         },{
974             "blocker": "oliphant.social",
975             "csv_url": "mastodon/_unified_tier3_blocklist.csv",
976         },{
977             "blocker": "mastodon.online",
978             "csv_url": "mastodon/mastodon.online.csv",
979         },{
980             "blocker": "mastodon.social",
981             "csv_url": "mastodon/mastodon.social.csv",
982         },{
983             "blocker": "mastodon.social",
984             "csv_url": "other/missing-tier0-mastodon.social.csv",
985         },{
986             "blocker": "rage.love",
987             "csv_url": "mastodon/rage.love.csv",
988         },{
989             "blocker": "sunny.garden",
990             "csv_url": "mastodon/sunny.garden.csv",
991         },{
992             "blocker": "sunny.garden",
993             "csv_url": "mastodon/gardenfence.csv",
994         },{
995             "blocker": "solarpunk.moe",
996             "csv_url": "mastodon/solarpunk.moe.csv",
997         },{
998             "blocker": "toot.wales",
999             "csv_url": "mastodon/toot.wales.csv",
1000         },{
1001             "blocker": "union.place",
1002             "csv_url": "mastodon/union.place.csv",
1003         },{
1004             "blocker": "oliphant.social",
1005             "csv_url": "mastodon/birdsite.csv",
1006         }
1007     )
1008
1009     domains = list()
1010
1011     logger.debug("Downloading %d files ...", len(blocklists))
1012     for block in blocklists:
1013         # Is domain given and not equal blocker?
1014         if isinstance(args.domain, str) and args.domain != block["blocker"]:
1015             logger.debug("Skipping blocker='%s', not matching args.domain='%s'", block["blocker"], args.domain)
1016             continue
1017         elif args.domain in domains:
1018             logger.debug("args.domain='%s' already handled - SKIPPED!", args.domain)
1019             continue
1020
1021         # Fetch this URL
1022         logger.info("Fetching csv_url='%s' for blocker='%s' ...", block["csv_url"], block["blocker"])
1023         response = utils.fetch_url(f"{base_url}/{block['csv_url']}", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
1024
1025         logger.debug("response.ok='%s',response.status_code=%d,response.content()=%d", response.ok, response.status_code, len(response.content))
1026         if not response.ok or response.status_code >= 300 or response.content == "":
1027             logger.warning("Could not fetch csv_url='%s' for blocker='%s' - SKIPPED!", block["csv_url"], block["blocker"])
1028             continue
1029
1030         logger.debug("Fetched %d Bytes, parsing CSV ...", len(response.content))
1031         reader = csv.DictReader(response.content.decode("utf-8").splitlines(), dialect="unix")
1032
1033         blockdict = list()
1034
1035         cnt = 0
1036         for row in reader:
1037             logger.debug("row[%s]='%s'", type(row), row)
1038             domain = severity = None
1039             reject_media = reject_reports = False
1040
1041             if "#domain" in row:
1042                 domain = row["#domain"]
1043             elif "domain" in row:
1044                 domain = row["domain"]
1045             else:
1046                 logger.debug("row='%s' does not contain domain column", row)
1047                 continue
1048
1049             if "#severity" in row:
1050                 severity = blocks.alias_block_level(row["#severity"])
1051             elif "severity" in row:
1052                 severity = blocks.alias_block_level(row["severity"])
1053             else:
1054                 logger.debug("row='%s' does not contain severity column", row)
1055                 continue
1056
1057             if "#reject_media" in row and row["#reject_media"].lower() == "true":
1058                 reject_media = True
1059             elif "reject_media" in row and row["reject_media"].lower() == "true":
1060                 reject_media = True
1061
1062             if "#reject_reports" in row and row["#reject_reports"].lower() == "true":
1063                 reject_reports = True
1064             elif "reject_reports" in row and row["reject_reports"].lower() == "true":
1065                 reject_reports = True
1066
1067             cnt = cnt + 1
1068             logger.debug("domain='%s',severity='%s',reject_media='%s',reject_reports='%s'", domain, severity, reject_media, reject_reports)
1069             if domain == "":
1070                 logger.debug("domain is empty - SKIPPED!")
1071                 continue
1072             elif domain.endswith(".onion"):
1073                 logger.debug("domain='%s' is a TOR .onion domain - SKIPPED", domain)
1074                 continue
1075             elif domain.endswith(".arpa"):
1076                 logger.debug("domain='%s' is a reverse IP address - SKIPPED", domain)
1077                 continue
1078             elif domain.endswith(".tld"):
1079                 logger.debug("domain='%s' is a fake domain - SKIPPED", domain)
1080                 continue
1081             elif domain.find("*") >= 0 or domain.find("?") >= 0:
1082                 logger.debug("domain='%s' is obfuscated - Invoking utils.deobfuscate(%s, %s) ...", domain, domain, block["blocker"])
1083                 domain = utils.deobfuscate(domain, block["blocker"])
1084                 logger.debug("domain='%s' - AFTER!", domain)
1085
1086             if not validators.domain(domain):
1087                 logger.debug("domain='%s' is not a valid domain - SKIPPED!")
1088                 continue
1089             elif blacklist.is_blacklisted(domain):
1090                 logger.warning("domain='%s' is blacklisted - SKIPPED!", domain)
1091                 continue
1092             elif blocks.is_instance_blocked(block["blocker"], domain, severity):
1093                 logger.debug("block[blocker]='%s' has already blocked domain='%s' with severity='%s' - SKIPPED!", block["blocker"], domain, severity)
1094                 continue
1095
1096             logger.debug("Marking domain='%s' as handled", domain)
1097             domains.append(domain)
1098
1099             logger.debug("Processing domain='%s' ...", domain)
1100             processed = processing.domain(domain, block["blocker"], inspect.currentframe().f_code.co_name)
1101             logger.debug("processed='%s'", processed)
1102
1103             if processing.block(block["blocker"], domain, None, severity) and config.get("bot_enabled"):
1104                 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", domain, block["block_level"], block["blocker"])
1105                 blockdict.append({
1106                     "blocked": domain,
1107                     "reason" : block["reason"],
1108                 })
1109
1110             if reject_media:
1111                 processing.block(block["blocker"], domain, None, "reject_media")
1112             if reject_reports:
1113                 processing.block(block["blocker"], domain, None, "reject_reports")
1114
1115         logger.debug("block[blocker]='%s'", block["blocker"])
1116         if block["blocker"] != "chaos.social":
1117             logger.debug("Invoking instances.set_total_blocks(%s, domains()=%d) ...", block["blocker"], len(domains))
1118             instances.set_total_blocks(block["blocker"], domains)
1119
1120         logger.debug("Checking if blocker='%s' has pending updates ...", block["blocker"])
1121         if instances.has_pending(block["blocker"]):
1122             logger.debug("Flushing updates for block[blocker]='%s' ...", block["blocker"])
1123             instances.update_data(block["blocker"])
1124
1125         logger.debug("Invoking commit() ...")
1126         database.connection.commit()
1127
1128         logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
1129         if config.get("bot_enabled") and len(blockdict) > 0:
1130             logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", block["blocker"], len(blockdict))
1131             network.send_bot_post(block["blocker"], blockdict)
1132
1133     logger.debug("Success! - EXIT!")
1134     return 0
1135
1136 def fetch_txt(args: argparse.Namespace) -> int:
1137     logger.debug("args[]='%s' - CALLED!", type(args))
1138
1139     logger.debug("Invoking locking.acquire() ...")
1140     locking.acquire()
1141
1142     # Static URLs
1143     urls = ({
1144         "blocker": "seirdy.one",
1145         "url"    : "https://seirdy.one/pb/bsl.txt",
1146     },)
1147
1148     logger.info("Checking %d text file(s) ...", len(urls))
1149     for row in urls:
1150         logger.debug("Fetching row[url]='%s' ...", row["url"])
1151         response = utils.fetch_url(row["url"], network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
1152
1153         logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
1154         if response.ok and response.status_code < 300 and response.text != "":
1155             logger.debug("Returned %d Bytes for processing", len(response.text.strip()))
1156             domains = response.text.split("\n")
1157
1158             logger.info("Processing %d domains ...", len(domains))
1159             for domain in domains:
1160                 logger.debug("domain='%s' - BEFORE!", domain)
1161                 domain = tidyup.domain(domain)
1162
1163                 logger.debug("domain='%s' - AFTER!", domain)
1164                 if domain == "":
1165                     logger.debug("domain is empty - SKIPPED!")
1166                     continue
1167                 elif not utils.is_domain_wanted(domain):
1168                     logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1169                     continue
1170                 elif instances.is_recent(domain):
1171                     logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1172                     continue
1173
1174                 logger.debug("Processing domain='%s',row[blocker]='%s'", domain, row["blocker"])
1175                 processed = processing.domain(domain, row["blocker"], inspect.currentframe().f_code.co_name)
1176
1177                 logger.debug("processed='%s'", processed)
1178                 if not processed:
1179                     logger.debug("domain='%s' was not generically processed - SKIPPED!", domain)
1180                     continue
1181
1182     logger.debug("Success! - EXIT!")
1183     return 0
1184
1185 def fetch_fedipact(args: argparse.Namespace) -> int:
1186     logger.debug("args[]='%s' - CALLED!", type(args))
1187
1188     logger.debug("Invoking locking.acquire() ...")
1189     locking.acquire()
1190
1191     source_domain = "fedipact.online"
1192     if sources.is_recent(source_domain):
1193         logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1194         return 0
1195     else:
1196         logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1197         sources.update(source_domain)
1198
1199     logger.info("Fetching / from source_domain='%s' ...", source_domain)
1200     response = utils.fetch_url(
1201         f"https://{source_domain}",
1202         network.web_headers,
1203         (config.get("connection_timeout"), config.get("read_timeout"))
1204     )
1205
1206     logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
1207     if response.ok and response.status_code < 300 and response.text != "":
1208         logger.debug("Parsing %d Bytes ...", len(response.text))
1209
1210         doc = bs4.BeautifulSoup(response.text, "html.parser")
1211         logger.debug("doc[]='%s'", type(doc))
1212
1213         rows = doc.findAll("li")
1214         logger.info("Checking %d row(s) ...", len(rows))
1215         for row in rows:
1216             logger.debug("row[]='%s'", type(row))
1217             domain = tidyup.domain(row.contents[0])
1218
1219             logger.debug("domain='%s' - AFTER!", domain)
1220             if domain == "":
1221                 logger.debug("domain is empty - SKIPPED!")
1222                 continue
1223
1224             logger.debug("domain='%s' - BEFORE!", domain)
1225             domain = domain.encode("idna").decode("utf-8")
1226             logger.debug("domain='%s' - AFTER!", domain)
1227
1228             if not utils.is_domain_wanted(domain):
1229                 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1230                 continue
1231             elif instances.is_registered(domain):
1232                 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
1233                 continue
1234             elif instances.is_recent(domain):
1235                 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1236                 continue
1237
1238             logger.info("Fetching domain='%s' ...", domain)
1239             federation.fetch_instances(domain, "beach.city", None, inspect.currentframe().f_code.co_name)
1240
1241     logger.debug("Success! - EXIT!")
1242     return 0
1243
1244 def fetch_joinmisskey(args: argparse.Namespace) -> int:
1245     logger.debug("args[]='%s' - CALLED!", type(args))
1246
1247     logger.debug("Invoking locking.acquire() ...")
1248     locking.acquire()
1249
1250     source_domain = "instanceapp.misskey.page"
1251     if sources.is_recent(source_domain):
1252         logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1253         return 0
1254     else:
1255         logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1256         sources.update(source_domain)
1257
1258     logger.info("Fetching instances.json from source_domain='%s' ...", source_domain)
1259     raw = utils.fetch_url(
1260         f"https://{source_domain}/instances.json",
1261         network.web_headers,
1262         (config.get("connection_timeout"), config.get("read_timeout"))
1263     ).text
1264     logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
1265
1266     parsed = json.loads(raw)
1267     logger.debug("parsed[%s]()=%d", type(parsed), len(parsed))
1268
1269     if "instancesInfos" not in parsed:
1270         logger.warning("parsed()=%d does not contain element 'instancesInfos'")
1271         return 1
1272
1273     logger.info("Checking %d instane(s) ...", len(parsed["instancesInfos"]))
1274     for row in parsed["instancesInfos"]:
1275         logger.debug("row[%s]='%s'", type(row), row)
1276         if "url" not in row:
1277             logger.warning("row()=%d does not have element 'url' - SKIPPED!", len(row))
1278             continue
1279         elif not utils.is_domain_wanted(row["url"]):
1280             logger.debug("row[url]='%s' is not wanted - SKIPPED!", row["url"])
1281             continue
1282         elif instances.is_registered(row["url"]):
1283             logger.debug("row[url]='%s' is already registered - SKIPPED!", row["url"])
1284             continue
1285
1286         logger.info("Fetching row[url]='%s' ...", row["url"])
1287         federation.fetch_instances(row["url"], "misskey.io", None, inspect.currentframe().f_code.co_name)
1288
1289     logger.debug("Success! - EXIT!")
1290     return 0
1291
1292 def fetch_joinfediverse(args: argparse.Namespace) -> int:
1293     logger.debug("args[]='%s' - CALLED!", type(args))
1294
1295     logger.debug("Invoking locking.acquire() ...")
1296     locking.acquire()
1297
1298     source_domain = "joinfediverse.wiki"
1299     if sources.is_recent(source_domain):
1300         logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1301         return 0
1302     else:
1303         logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1304         sources.update(source_domain)
1305
1306     logger.info("Fetching /FediBlock wiki page from source_domain='%s' ...", source_domain)
1307     raw = utils.fetch_url(
1308         f"https://{source_domain}/FediBlock",
1309         network.web_headers,
1310         (config.get("connection_timeout"), config.get("read_timeout"))
1311     ).text
1312     logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
1313
1314     doc = bs4.BeautifulSoup(raw, "html.parser")
1315     logger.debug("doc[]='%s'", type(doc))
1316
1317     tables = doc.findAll("table", {"class": "wikitable"})
1318
1319     logger.info("Analyzing %d table(s) ...", len(tables))
1320     blocklist = list()
1321     for table in tables:
1322         logger.debug("table[]='%s'", type(table))
1323
1324         rows = table.findAll("tr")
1325         logger.info("Checking %d row(s) ...", len(rows))
1326         block_headers = dict()
1327         for row in rows:
1328             logger.debug("row[%s]='%s'", type(row), row)
1329
1330             headers = row.findAll("th")
1331             logger.debug("Found headers()=%d header(s)", len(headers))
1332             if len(headers) > 1:
1333                 block_headers = dict()
1334                 cnt = 0
1335                 for header in headers:
1336                     cnt = cnt + 1
1337                     logger.debug("header[]='%s',cnt=%d", type(header), cnt)
1338                     text = header.contents[0]
1339
1340                     logger.debug("text[]='%s'", type(text))
1341                     if not isinstance(text, str):
1342                         logger.debug("text[]='%s' is not of type 'str' - SKIPPED!", type(text))
1343                         continue
1344                     elif validators.domain(text.strip()):
1345                         logger.debug("text='%s' is a domain - SKIPPED!", text.strip())
1346                         continue
1347
1348                     text = tidyup.domain(text.strip())
1349                     logger.debug("text='%s' - AFTER!", text)
1350                     if text in ["domain", "instance", "subdomain(s)", "block reason(s)"]:
1351                         logger.debug("Found header: '%s'=%d", text, cnt)
1352                         block_headers[cnt] = text
1353
1354             elif len(block_headers) == 0:
1355                 logger.debug("row is not scrapable - SKIPPED!")
1356                 continue
1357             elif len(block_headers) > 0:
1358                 logger.debug("Found a row with %d scrapable headers ...", len(block_headers))
1359                 cnt = 0
1360                 block = dict()
1361
1362                 for element in row.find_all(["th", "td"]):
1363                     cnt = cnt + 1
1364                     logger.debug("element[]='%s',cnt=%d", type(element), cnt)
1365                     if cnt in block_headers:
1366                         logger.debug("block_headers[%d]='%s'", cnt, block_headers[cnt])
1367
1368                         text = element.text.strip()
1369                         key = block_headers[cnt] if block_headers[cnt] not in ["domain", "instance"] else "blocked"
1370
1371                         logger.debug("cnt=%d is wanted: key='%s',text[%s]='%s'", cnt, key, type(text), text)
1372                         if key in ["domain", "instance"]:
1373                             block[key] = text
1374                         elif key == "reason":
1375                             block[key] = tidyup.reason(text)
1376                         elif key == "subdomain(s)":
1377                             block[key] = list()
1378                             if text != "":
1379                                 block[key] = text.split("/")
1380                         else:
1381                             logger.debug("key='%s'", key)
1382                             block[key] = text
1383
1384                 logger.debug("block()=%d ...", len(block))
1385                 if len(block) > 0:
1386                     logger.debug("Appending block()=%d ...", len(block))
1387                     blocklist.append(block)
1388
1389     logger.debug("blocklist()=%d", len(blocklist))
1390
1391     database.cursor.execute("SELECT domain FROM instances WHERE domain LIKE 'climatejustice.%'")
1392     domains = database.cursor.fetchall()
1393
1394     logger.debug("domains(%d)[]='%s'", len(domains), type(domains))
1395     blocking = list()
1396     for block in blocklist:
1397         logger.debug("block='%s'", block)
1398         if "subdomain(s)" in block and len(block["subdomain(s)"]) > 0:
1399             origin = block["blocked"]
1400             logger.debug("origin='%s'", origin)
1401             for subdomain in block["subdomain(s)"]:
1402                 block["blocked"] = subdomain + "." + origin
1403                 logger.debug("block[blocked]='%s'", block["blocked"])
1404                 blocking.append(block)
1405         else:
1406             blocking.append(block)
1407
1408     logger.debug("blocking()=%d", blocking)
1409     for block in blocking:
1410         logger.debug("block[]='%s'", type(block))
1411         if "blocked" not in block:
1412             raise KeyError(f"block()={len(block)} does not have element 'blocked'")
1413
1414         block["blocked"] = tidyup.domain(block["blocked"]).encode("idna").decode("utf-8")
1415         logger.debug("block[blocked]='%s' - AFTER!", block["blocked"])
1416
1417         if block["blocked"] == "":
1418             logger.debug("block[blocked] is empty - SKIPPED!")
1419             continue
1420         elif not utils.is_domain_wanted(block["blocked"]):
1421             logger.debug("block[blocked]='%s' is not wanted - SKIPPED!", block["blocked"])
1422             continue
1423         elif instances.is_recent(block["blocked"]):
1424             logger.debug("block[blocked]='%s' has been recently checked - SKIPPED!", block["blocked"])
1425             continue
1426
1427         logger.info("Proccessing blocked='%s' ...", block["blocked"])
1428         processing.domain(block["blocked"], "climatejustice.social", inspect.currentframe().f_code.co_name)
1429
1430     blockdict = list()
1431     for blocker in domains:
1432         blocker = blocker[0]
1433         logger.debug("blocker[%s]='%s'", type(blocker), blocker)
1434
1435         for block in blocking:
1436             logger.debug("block[blocked]='%s',block[block reason(s)]='%s' - BEFORE!", block["blocked"], block["block reason(s)"] if "block reason(s)" in block else None)
1437             block["reason"] = tidyup.reason(block["block reason(s)"]) if "block reason(s)" in block else None
1438
1439             logger.debug("block[blocked]='%s',block[reason]='%s' - AFTER!", block["blocked"], block["reason"])
1440             if block["blocked"] == "":
1441                 logger.debug("block[blocked] is empty - SKIPPED!")
1442                 continue
1443             elif not utils.is_domain_wanted(block["blocked"]):
1444                 logger.debug("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
1445                 continue
1446
1447             logger.debug("blocked='%s',reason='%s'", block["blocked"], block["reason"])
1448             if processing.block(blocker, block["blocked"], block["reason"], "reject") and config.get("bot_enabled"):
1449                 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], blocker)
1450                 blockdict.append({
1451                     "blocked": block["blocked"],
1452                     "reason" : block["reason"],
1453                 })
1454
1455         if instances.has_pending(blocker):
1456             logger.debug("Flushing updates for blocker='%s' ...", blocker)
1457             instances.update_data(blocker)
1458
1459         logger.debug("Invoking commit() ...")
1460         database.connection.commit()
1461
1462         logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
1463         if config.get("bot_enabled") and len(blockdict) > 0:
1464             logger.info("Sending bot POST for blocker='%s,blockdict()=%d ...", blocker, len(blockdict))
1465             network.send_bot_post(blocker, blockdict)
1466
1467     logger.debug("Success! - EXIT!")
1468     return 0
1469
1470 def recheck_obfuscation(args: argparse.Namespace) -> int:
1471     logger.debug("args[]='%s' - CALLED!", type(args))
1472
1473     logger.debug("Invoking locking.acquire() ...")
1474     locking.acquire()
1475
1476     if isinstance(args.domain, str) and args.domain != "" and utils.is_domain_wanted(args.domain):
1477         database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1 AND domain = ?", [args.domain])
1478     elif isinstance(args.software, str) and args.software != "" and validators.domain(args.software) == args.software:
1479         database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1 AND software = ?", [args.software])
1480     else:
1481         database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1")
1482
1483     rows = database.cursor.fetchall()
1484     logger.info("Checking %d domains ...", len(rows))
1485     for row in rows:
1486         logger.debug("Fetching peers from domain='%s',software='%s',nodeinfo_url='%s' ...", row["domain"], row["software"], row["nodeinfo_url"])
1487         if (args.force is None or not args.force) and args.domain is None and args.software is None and instances.is_recent(row["domain"], "last_blocked"):
1488             logger.debug("row[domain]='%s' has been recently checked, args.force[]='%s' - SKIPPED!", row["domain"], type(args.force))
1489             continue
1490
1491         blocking = list()
1492         if row["software"] == "pleroma":
1493             logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1494             blocking = pleroma.fetch_blocks(row["domain"], row["nodeinfo_url"])
1495         elif row["software"] == "mastodon":
1496             logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1497             blocking = mastodon.fetch_blocks(row["domain"], row["nodeinfo_url"])
1498         elif row["software"] == "lemmy":
1499             logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1500             blocking = lemmy.fetch_blocks(row["domain"], row["nodeinfo_url"])
1501         elif row["software"] == "friendica":
1502             logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1503             blocking = friendica.fetch_blocks(row["domain"])
1504         elif row["software"] == "misskey":
1505             logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1506             blocking = misskey.fetch_blocks(row["domain"])
1507         else:
1508             logger.warning("Unknown sofware: domain='%s',software='%s'", row["domain"], row["software"])
1509
1510         logger.debug("row[domain]='%s'", row["domain"])
1511         # chaos.social requires special care ...
1512         if row["domain"] != "chaos.social":
1513             logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", row["domain"], len(blocking))
1514             instances.set_total_blocks(row["domain"], blocking)
1515
1516         obfuscated = 0
1517         blockdict = list()
1518
1519         logger.info("Checking %d block(s) from domain='%s' ...", len(blocking), row["domain"])
1520         for block in blocking:
1521             logger.debug("block[blocked]='%s'", block["blocked"])
1522             blocked = None
1523
1524             if block["blocked"] == "":
1525                 logger.debug("block[blocked] is empty - SKIPPED!")
1526                 continue
1527             elif block["blocked"].endswith(".arpa"):
1528                 logger.debug("blocked='%s' is a reversed IP address - SKIPPED!", block["blocked"])
1529                 continue
1530             elif block["blocked"].endswith(".tld"):
1531                 logger.debug("blocked='%s' is a fake domain name - SKIPPED!", block["blocked"])
1532                 continue
1533             elif block["blocked"].endswith(".onion"):
1534                 logger.debug("blocked='%s' is a TOR onion domain name - SKIPPED!", block["blocked"])
1535                 continue
1536             elif block["blocked"].find("*") >= 0 or block["blocked"].find("?") >= 0:
1537                 logger.debug("block='%s' is obfuscated.", block["blocked"])
1538                 obfuscated = obfuscated + 1
1539                 blocked = utils.deobfuscate(block["blocked"], row["domain"], block["hash"] if "hash" in block else None)
1540             elif not utils.is_domain_wanted(block["blocked"]):
1541                 logger.debug("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
1542                 continue
1543             elif blocks.is_instance_blocked(row["domain"], block["blocked"]):
1544                 logger.debug("blocked='%s' is already blocked - SKIPPED!", block["blocked"])
1545                 continue
1546
1547             logger.debug("blocked[%s]='%s',block[blocked]='%s'", type(blocked), blocked, block["blocked"])
1548             if blocked is not None and blocked != block["blocked"]:
1549                 logger.debug("blocked='%s' was deobfuscated to blocked='%s'", block["blocked"], blocked)
1550                 obfuscated = obfuscated - 1
1551                 if blocks.is_instance_blocked(row["domain"], blocked):
1552                     logger.debug("blocked='%s' is already blocked by domain='%s' - SKIPPED!", blocked, row["domain"])
1553                     continue
1554
1555                 block["block_level"] = blocks.alias_block_level(block["block_level"])
1556
1557                 logger.info("blocked='%s' has been deobfuscated to blocked='%s', adding ...", block["blocked"], blocked)
1558                 if processing.block(row["domain"], blocked, block["reason"], block["block_level"]) and block["block_level"] == "reject" and config.get("bot_enabled"):
1559                     logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], row["domain"])
1560                     blockdict.append({
1561                         "blocked": blocked,
1562                         "reason" : block["reason"],
1563                     })
1564
1565         logger.info("domain='%s' has %d obfuscated domain(s)", row["domain"], obfuscated)
1566         if obfuscated == 0 and len(blocking) > 0:
1567             logger.info("Block list from domain='%s' has been fully deobfuscated.", row["domain"])
1568             instances.set_has_obfuscation(row["domain"], False)
1569
1570         if instances.has_pending(row["domain"]):
1571             logger.debug("Flushing updates for blocker='%s' ...", row["domain"])
1572             instances.update_data(row["domain"])
1573
1574         logger.debug("Invoking commit() ...")
1575         database.connection.commit()
1576
1577         logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
1578         if config.get("bot_enabled") and len(blockdict) > 0:
1579             logger.info("Sending bot POST for blocker='%s,blockdict()=%d ...", row["domain"], len(blockdict))
1580             network.send_bot_post(row["domain"], blockdict)
1581
1582     logger.debug("Success! - EXIT!")
1583     return 0
1584
1585 def fetch_fedilist(args: argparse.Namespace) -> int:
1586     logger.debug("args[]='%s' - CALLED!", type(args))
1587
1588     logger.debug("Invoking locking.acquire() ...")
1589     locking.acquire()
1590
1591     source_domain = "demo.fedilist.com"
1592     if sources.is_recent(source_domain):
1593         logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1594         return 0
1595     else:
1596         logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1597         sources.update(source_domain)
1598
1599     url = f"http://{source_domain}/instance/csv?onion=not"
1600     if args.software is not None and args.software != "":
1601         logger.debug("args.software='%s'", args.software)
1602         url = f"http://{source_domain}/instance/csv?software={args.software}&onion=not"
1603
1604     logger.info("Fetching url='%s' ...", url)
1605     response = reqto.get(
1606         url,
1607         headers=network.web_headers,
1608         timeout=(config.get("connection_timeout"), config.get("read_timeout")),
1609         allow_redirects=False
1610     )
1611
1612     logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
1613     if not response.ok or response.status_code >= 300 or len(response.content) == 0:
1614         logger.warning("Failed fetching url='%s': response.ok='%s',response.status_code=%d,response.content()=%d - EXIT!", url, response.ok, response.status_code, len(response.text))
1615         return 1
1616
1617     reader = csv.DictReader(response.content.decode("utf-8").splitlines(), dialect="unix")
1618
1619     logger.debug("reader[]='%s'", type(reader))
1620     if reader is None:
1621         logger.warning("Failed parsing response.content()=%d as CSV content", len(response.content))
1622         return 2
1623
1624     rows = list(reader)
1625
1626     logger.info("Checking %d rows ...", len(rows))
1627     for row in rows:
1628         logger.debug("row[]='%s'", type(row))
1629         if "hostname" not in row:
1630             logger.warning("row()=%d has no element 'hostname' - SKIPPED!", len(row))
1631             continue
1632
1633         logger.debug("row[hostname]='%s' - BEFORE!", row["hostname"])
1634         domain = tidyup.domain(row["hostname"])
1635         logger.debug("domain='%s' - AFTER!", domain)
1636
1637         if domain == "":
1638             logger.debug("domain is empty after tidyup: row[hostname]='%s' - SKIPPED!", row["hostname"])
1639             continue
1640
1641         logger.debug("domain='%s' - BEFORE!", domain)
1642         domain = domain.encode("idna").decode("utf-8")
1643         logger.debug("domain='%s' - AFTER!", domain)
1644
1645         if not utils.is_domain_wanted(domain):
1646             logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1647             continue
1648         elif (args.force is None or not args.force) and instances.is_registered(domain):
1649             logger.debug("domain='%s' is already registered, --force not specified: args.force[]='%s'", domain, type(args.force))
1650             continue
1651         elif instances.is_recent(domain):
1652             logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1653             continue
1654
1655         logger.info("Fetching instances from domain='%s' ...", domain)
1656         federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
1657
1658     logger.debug("Success! - EXIT!")
1659     return 0
1660
1661 def update_nodeinfo(args: argparse.Namespace) -> int:
1662     logger.debug("args[]='%s' - CALLED!", type(args))
1663
1664     logger.debug("Invoking locking.acquire() ...")
1665     locking.acquire()
1666
1667     if args.domain is not None and args.domain != "":
1668         logger.debug("Fetching args.domain='%s'", args.domain)
1669         database.cursor.execute("SELECT domain, software FROM instances WHERE domain = ?", [args.domain])
1670     elif args.software is not None and args.software != "":
1671         logger.info("Fetching domains for args.software='%s'", args.software)
1672         database.cursor.execute("SELECT domain, software FROM instances WHERE software = ?", [args.software])
1673     else:
1674         logger.info("Fetching domains for recently updated ...")
1675         database.cursor.execute("SELECT domain, software FROM instances WHERE last_nodeinfo < ? OR last_nodeinfo IS NULL", [time.time() - config.get("recheck_nodeinfo")])
1676
1677     domains = database.cursor.fetchall()
1678
1679     logger.info("Checking %d domain(s) ...", len(domains))
1680     cnt = 0
1681     for row in domains:
1682         logger.debug("row[]='%s'", type(row))
1683         try:
1684             logger.info("Checking nodeinfo for row[domain]='%s',row[software]='%s' (%s%%) ...", row["domain"], row["software"], "{:5.1f}".format(cnt / len(domains) * 100))
1685             software = federation.determine_software(row["domain"])
1686
1687             logger.debug("Determined software='%s'", software)
1688             if (software != row["software"] and software is not None) or args.force is True:
1689                 logger.warning("Software type for row[domain]='%s' has changed from '%s' to '%s'!", row["domain"], row["software"], software)
1690                 instances.set_software(row["domain"], software)
1691
1692             instances.set_success(row["domain"])
1693         except network.exceptions as exception:
1694             logger.warning("Exception '%s' during updating nodeinfo for row[domain]='%s'", type(exception), row["domain"])
1695             instances.set_last_error(row["domain"], exception)
1696
1697         instances.set_last_nodeinfo(row["domain"])
1698         instances.update_data(row["domain"])
1699         cnt = cnt + 1
1700
1701     logger.debug("Success! - EXIT!")
1702     return 0
1703
1704 def fetch_instances_social(args: argparse.Namespace) -> int:
1705     logger.debug("args[]='%s' - CALLED!", type(args))
1706
1707     logger.debug("Invoking locking.acquire() ...")
1708     locking.acquire()
1709
1710     source_domain = "instances.social"
1711
1712     if config.get("instances_social_api_key") == "":
1713         logger.error("API key not set. Please set in your config.json file.")
1714         return 1
1715     elif sources.is_recent(source_domain):
1716         logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1717         return 0
1718     else:
1719         logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1720         sources.update(source_domain)
1721
1722     headers = {
1723         "Authorization": f"Bearer {config.get('instances_social_api_key')}",
1724     }
1725
1726     fetched = network.get_json_api(
1727         source_domain,
1728         "/api/1.0/instances/list?count=0&sort_by=name",
1729         headers,
1730         (config.get("connection_timeout"), config.get("read_timeout"))
1731     )
1732     logger.debug("fetched[]='%s'", type(fetched))
1733
1734     if "error_message" in fetched:
1735         logger.warning("Error during fetching API result: '%s' - EXIT!", fetched["error_message"])
1736         return 2
1737     elif "exception" in fetched:
1738         logger.warning("Exception '%s' during fetching API result - EXIT!", type(fetched["exception"]))
1739         return 3
1740     elif "json" not in fetched:
1741         logger.warning("fetched has no element 'json' - EXIT!")
1742         return 4
1743     elif "instances" not in fetched["json"]:
1744         logger.warning("fetched[row] has no element 'instances' - EXIT!")
1745         return 5
1746
1747     domains = list()
1748     rows = fetched["json"]["instances"]
1749
1750     logger.info("Checking %d row(s) ...", len(rows))
1751     for row in rows:
1752         logger.debug("row[]='%s'", type(row))
1753         domain = tidyup.domain(row["name"])
1754         logger.debug("domain='%s' - AFTER!", domain)
1755
1756         if domain == "":
1757             logger.debug("domain is empty - SKIPPED!")
1758             continue
1759
1760         logger.debug("domain='%s' - BEFORE!", domain)
1761         domain = domain.encode("idna").decode("utf-8")
1762         logger.debug("domain='%s' - AFTER!", domain)
1763
1764         if not utils.is_domain_wanted(domain):
1765             logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1766             continue
1767         elif domain in domains:
1768             logger.debug("domain='%s' is already added - SKIPPED!", domain)
1769             continue
1770         elif instances.is_registered(domain):
1771             logger.debug("domain='%s' is already registered - SKIPPED!", domain)
1772             continue
1773         elif instances.is_recent(domain):
1774             logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1775             continue
1776
1777         logger.info("Fetching instances from domain='%s'", domain)
1778         federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
1779
1780     logger.debug("Success! - EXIT!")
1781     return 0
1782
1783 def convert_idna(args: argparse.Namespace) -> int:
1784     logger.debug("args[]='%s' - CALLED!", type(args))
1785
1786     database.cursor.execute("SELECT domain FROM instances WHERE domain NOT LIKE '%xn--%' ORDER BY domain ASC")
1787     rows = database.cursor.fetchall()
1788
1789     logger.debug("rows[]='%s'", type(rows))
1790     instances.translate_idnas(rows, "domain")
1791
1792     database.cursor.execute("SELECT origin FROM instances WHERE origin NOT LIKE '%xn--%' ORDER BY origin ASC")
1793     rows = database.cursor.fetchall()
1794
1795     logger.debug("rows[]='%s'", type(rows))
1796     instances.translate_idnas(rows, "origin")
1797
1798     database.cursor.execute("SELECT blocker FROM blocks WHERE blocker NOT LIKE '%xn--%' ORDER BY blocker ASC")
1799     rows = database.cursor.fetchall()
1800
1801     logger.debug("rows[]='%s'", type(rows))
1802     blocks.translate_idnas(rows, "blocker")
1803
1804     database.cursor.execute("SELECT blocked FROM blocks WHERE blocked NOT LIKE '%xn--%' ORDER BY blocked ASC")
1805     rows = database.cursor.fetchall()
1806
1807     logger.debug("rows[]='%s'", type(rows))
1808     blocks.translate_idnas(rows, "blocked")
1809
1810     logger.debug("Success! - EXIT!")
1811     return 0