]> git.mxchange.org Git - fba.git/blob - fba/commands.py
Continued:
[fba.git] / fba / commands.py
1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
3 #
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
8 #
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 # GNU Affero General Public License for more details.
13 #
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program.  If not, see <https://www.gnu.org/licenses/>.
16
17 import csv
18 import inspect
19 import json
20 import logging
21 import time
22
23 from urllib.parse import urlparse
24
25 import argparse
26 import atoma
27 import bs4
28 import markdown
29 import reqto
30 import validators
31
32 from fba import csrf
33 from fba import database
34 from fba import utils
35
36 from fba.helpers import blacklist
37 from fba.helpers import config
38 from fba.helpers import cookies
39 from fba.helpers import locking
40 from fba.helpers import processing
41 from fba.helpers import software as software_helper
42 from fba.helpers import tidyup
43
44 from fba.http import federation
45 from fba.http import network
46
47 from fba.models import blocks
48 from fba.models import instances
49 from fba.models import sources
50
51 from fba.networks import friendica
52 from fba.networks import lemmy
53 from fba.networks import mastodon
54 from fba.networks import misskey
55 from fba.networks import pleroma
56
57 logging.basicConfig(level=logging.INFO)
58 logger = logging.getLogger(__name__)
59 #logger.setLevel(logging.DEBUG)
60
61 def check_instance(args: argparse.Namespace) -> int:
62     logger.debug("args.domain='%s' - CALLED!", args.domain)
63     status = 0
64     if not validators.domain(args.domain):
65         logger.warning("args.domain='%s' is not valid", args.domain)
66         status = 100
67     elif blacklist.is_blacklisted(args.domain):
68         logger.warning("args.domain='%s' is blacklisted", args.domain)
69         status = 101
70     elif instances.is_registered(args.domain):
71         logger.warning("args.domain='%s' is already registered", args.domain)
72         status = 102
73     else:
74         logger.info("args.domain='%s' is not known", args.domain)
75
76     logger.debug("status=%d - EXIT!", status)
77     return status
78
79 def check_nodeinfo(args: argparse.Namespace) -> int:
80     logger.debug("args[]='%s' - CALLED!", type(args))
81
82     # Fetch rows
83     database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE nodeinfo_url IS NOT NULL ORDER BY domain ASC")
84
85     cnt = 0
86     for row in database.cursor.fetchall():
87         logger.debug("Checking row[domain]='%s',row[software]='%s',row[nodeinfo_url]='%s' ...", row["domain"], row["software"], row["nodeinfo_url"])
88         punycode = row["domain"].encode("idna").decode("utf-8")
89
90         if row["nodeinfo_url"].startswith("/"):
91             logger.debug("row[nodeinfo_url]='%s' is a relative URL and always matches", row["nodeinfo_url"])
92             continue
93         elif row["nodeinfo_url"].find(punycode) == -1 and row["nodeinfo_url"].find(row["domain"]) == -1:
94             logger.warning("punycode='%s' is not found in row[nodeinfo_url]='%s',row[software]='%s'", punycode, row["nodeinfo_url"], row["software"])
95             cnt = cnt + 1
96
97     logger.info("Found %d row(s)", cnt)
98
99     logger.debug("EXIT!")
100     return 0
101
102 def fetch_pixelfed_api(args: argparse.Namespace) -> int:
103     logger.debug("args[]='%s' - CALLED!", type(args))
104
105     # No CSRF by default, you don't have to add network.source_headers by yourself here
106     headers = tuple()
107     source_domain = "pixelfed.org"
108
109     if sources.is_recent(source_domain):
110         logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
111         return 0
112     else:
113         logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
114         sources.update(source_domain)
115
116     try:
117         logger.debug("Checking CSRF from source_domain='%s' ...", source_domain)
118         headers = csrf.determine(source_domain, dict())
119     except network.exceptions as exception:
120         logger.warning("Exception '%s' during checking CSRF (fetch_peers,%s) - EXIT!", type(exception), __name__)
121         return list()
122
123     try:
124         logger.debug("Fetching JSON from pixelfed.org API, headers()=%d ...", len(headers))
125         fetched = network.get_json_api(
126             source_domain,
127             "/api/v1/servers/all.json?scope=All&country=all&language=all",
128             headers,
129             (config.get("connection_timeout"), config.get("read_timeout"))
130         )
131
132         logger.debug("JSON API returned %d elements", len(fetched))
133         if "error_message" in fetched:
134             logger.warning("API returned error_message='%s' - EXIT!", fetched["error_message"])
135             return 101
136         elif "data" not in fetched["json"]:
137             logger.warning("API did not return JSON with 'data' element - EXIT!")
138             return 102
139
140         rows = fetched["json"]["data"]
141         logger.info("Checking %d fetched rows ...", len(rows))
142         for row in rows:
143             logger.debug("row[]='%s'", type(row))
144             if "domain" not in row:
145                 logger.warning("row='%s' does not contain element 'domain' - SKIPPED!", row)
146                 continue
147             elif row["domain"] == "":
148                 logger.debug("row[domain] is empty - SKIPPED!")
149                 continue
150
151             logger.debug("row[domain]='%s' - BEFORE!", row["domain"])
152             domain = row["domain"].encode("idna").decode("utf-8")
153             logger.debug("domain='%s' - AFTER!", domain)
154
155             if not utils.is_domain_wanted(domain):
156                 logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
157                 continue
158             elif instances.is_registered(domain):
159                 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
160                 continue
161             elif instances.is_recent(domain):
162                 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
163                 continue
164
165             logger.debug("Fetching instances from domain='%s' ...", domain)
166             federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
167
168     except network.exceptions as exception:
169         logger.warning("Cannot fetch graphql,exception[%s]:'%s' - EXIT!", type(exception), str(exception))
170         return 103
171
172     logger.debug("Success! - EXIT!")
173     return 0
174
175 def fetch_bkali(args: argparse.Namespace) -> int:
176     logger.debug("args[]='%s' - CALLED!", type(args))
177
178     logger.debug("Invoking locking.acquire() ...")
179     locking.acquire()
180
181     source_domain = "gql.api.bka.li"
182     if sources.is_recent(source_domain):
183         logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
184         return 0
185     else:
186         logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
187         sources.update(source_domain)
188
189     domains = list()
190     try:
191         logger.info("Fetching domainlist from source_domain='%s' ...", source_domain)
192         fetched = network.post_json_api(
193             source_domain,
194             "/v1/graphql",
195             json.dumps({
196                 "query": "query domainlist {nodeinfo(order_by: {domain: asc}) {domain}}"
197             })
198         )
199
200         logger.debug("fetched[]='%s'", type(fetched))
201         if "error_message" in fetched:
202             logger.warning("post_json_api() for 'gql.sources.bka.li' returned error message='%s", fetched["error_message"])
203             return 100
204         elif isinstance(fetched["json"], dict) and "error" in fetched["json"] and "message" in fetched["json"]["error"]:
205             logger.warning("post_json_api() returned error: '%s", fetched["error"]["message"])
206             return 101
207
208         rows = fetched["json"]
209
210         logger.debug("rows(%d)[]='%s'", len(rows), type(rows))
211         if len(rows) == 0:
212             raise Exception("WARNING: Returned no records")
213         elif "data" not in rows:
214             raise Exception(f"WARNING: rows()={len(rows)} does not contain key 'data'")
215         elif "nodeinfo" not in rows["data"]:
216             raise Exception(f"WARNING: rows()={len(rows['data'])} does not contain key 'nodeinfo'")
217
218         for entry in rows["data"]["nodeinfo"]:
219             logger.debug("entry[%s]='%s'", type(entry), entry)
220             if "domain" not in entry:
221                 logger.warning("entry()=%d does not contain 'domain' - SKIPPED!", len(entry))
222                 continue
223             elif entry["domain"] == "":
224                 logger.debug("entry[domain] is empty - SKIPPED!")
225                 continue
226             elif not utils.is_domain_wanted(entry["domain"]):
227                 logger.warning("entry[domain]='%s' is not wanted - SKIPPED!", entry["domain"])
228                 continue
229             elif instances.is_registered(entry["domain"]):
230                 logger.debug("entry[domain]='%s' is already registered - SKIPPED!", entry["domain"])
231                 continue
232             elif instances.is_recent(entry["domain"]):
233                 logger.debug("entry[domain]='%s' has been recently crawled - SKIPPED!", entry["domain"])
234                 continue
235
236             logger.debug("Adding domain='%s' ...", entry["domain"])
237             domains.append(entry["domain"])
238
239     except network.exceptions as exception:
240         logger.warning("Cannot fetch graphql,exception[%s]:'%s' - EXIT!", type(exception), str(exception))
241         return 102
242
243     logger.debug("domains()=%d", len(domains))
244     if len(domains) > 0:
245         logger.info("Adding %d new instances ...", len(domains))
246         for domain in domains:
247             logger.debug("domain='%s' - BEFORE!", domain)
248             domain = domain.encode("idna").decode("utf-8")
249             logger.debug("domain='%s' - AFTER!", domain)
250
251             try:
252                 logger.info("Fetching instances from domain='%s' ...", domain)
253                 federation.fetch_instances(domain, 'tak.teleyal.blog', None, inspect.currentframe().f_code.co_name)
254             except network.exceptions as exception:
255                 logger.warning("Exception '%s' during fetching instances (fetch_bkali) from domain='%s'", type(exception), domain)
256                 instances.set_last_error(domain, exception)
257                 return 100
258
259     logger.debug("Success - EXIT!")
260     return 0
261
262 def fetch_blocks(args: argparse.Namespace) -> int:
263     logger.debug("args[]='%s' - CALLED!", type(args))
264     if args.domain is not None and args.domain != "":
265         logger.debug("args.domain='%s' - checking ...", args.domain)
266         if not validators.domain(args.domain):
267             logger.warning("args.domain='%s' is not valid.", args.domain)
268             return 100
269         elif blacklist.is_blacklisted(args.domain):
270             logger.warning("args.domain='%s' is blacklisted, won't check it!", args.domain)
271             return 101
272         elif not instances.is_registered(args.domain):
273             logger.warning("args.domain='%s' is not registered, please run ./utils.py fetch_instances '%s' first.", args.domain, args.domain)
274             return 102
275
276     logger.debug("Invoking locking.acquire() ...")
277     locking.acquire()
278
279     if args.domain is not None and args.domain != "":
280         # Re-check single domain
281         logger.debug("Querying database for single args.domain='%s' ...", args.domain)
282         database.cursor.execute(
283             "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE domain = ?", [args.domain]
284         )
285     elif args.software is not None and args.software != "":
286         # Re-check single software
287         logger.debug("Querying database for args.software='%s' ...", args.software)
288         database.cursor.execute(
289             "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software = ? AND nodeinfo_url IS NOT NULL", [args.software]
290         )
291     else:
292         # Re-check after "timeout" (aka. minimum interval)
293         database.cursor.execute(
294             "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'lemmy', 'friendica', 'misskey') AND (last_blocked IS NULL OR last_blocked < ?) AND nodeinfo_url IS NOT NULL ORDER BY rowid DESC", [time.time() - config.get("recheck_block")]
295         )
296
297     rows = database.cursor.fetchall()
298     logger.info("Checking %d entries ...", len(rows))
299     for blocker, software, origin, nodeinfo_url in rows:
300         logger.debug("blocker='%s',software='%s',origin='%s',nodeinfo_url='%s'", blocker, software, origin, nodeinfo_url)
301         blocker = tidyup.domain(blocker)
302         logger.debug("blocker='%s' - AFTER!", blocker)
303
304         if blocker == "":
305             logger.warning("blocker is now empty!")
306             continue
307         elif nodeinfo_url is None or nodeinfo_url == "":
308             logger.debug("blocker='%s',software='%s' has empty nodeinfo_url", blocker, software)
309             continue
310         elif not utils.is_domain_wanted(blocker):
311             logger.warning("blocker='%s' is not wanted - SKIPPED!", blocker)
312             continue
313
314         logger.debug("blocker='%s'", blocker)
315         instances.set_last_blocked(blocker)
316         instances.set_has_obfuscation(blocker, False)
317
318         blocking = list()
319         if software == "pleroma":
320             logger.info("blocker='%s',software='%s'", blocker, software)
321             blocking = pleroma.fetch_blocks(blocker, nodeinfo_url)
322         elif software == "mastodon":
323             logger.info("blocker='%s',software='%s'", blocker, software)
324             blocking = mastodon.fetch_blocks(blocker, nodeinfo_url)
325         elif software == "lemmy":
326             logger.info("blocker='%s',software='%s'", blocker, software)
327             blocking = lemmy.fetch_blocks(blocker, nodeinfo_url)
328         elif software == "friendica":
329             logger.info("blocker='%s',software='%s'", blocker, software)
330             blocking = friendica.fetch_blocks(blocker)
331         elif software == "misskey":
332             logger.info("blocker='%s',software='%s'", blocker, software)
333             blocking = misskey.fetch_blocks(blocker)
334         else:
335             logger.warning("Unknown software: blocker='%s',software='%s'", blocker, software)
336
337         logger.debug("blocker='%s'", blocker)
338         if blocker != "chaos.social":
339             logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
340             instances.set_total_blocks(blocker, blocking)
341
342         logger.info("Checking %d entries from blocker='%s',software='%s' ...", len(blocking), blocker, software)
343         blockdict = list()
344         for block in blocking:
345             logger.debug("blocked='%s',block_level='%s',reason='%s'", block["blocked"], block["block_level"], block["reason"])
346
347             if block["block_level"] == "":
348                 logger.warning("block_level is empty, blocker='%s',blocked='%s'", block["blocker"], block["blocked"])
349                 continue
350
351             logger.debug("blocked='%s',reason='%s' - BEFORE!", block["blocked"], block["reason"])
352             block["blocked"] = tidyup.domain(block["blocked"])
353             block["reason"]  = tidyup.reason(block["reason"]) if block["reason"] is not None and block["reason"] != "" else None
354             logger.debug("blocked='%s',reason='%s' - AFTER!", block["blocked"], block["reason"])
355
356             if block["blocked"] == "":
357                 logger.warning("blocked is empty, blocker='%s'", blocker)
358                 continue
359             elif block["blocked"].endswith(".onion"):
360                 logger.debug("blocked='%s' is a TOR .onion domain - SKIPPED", block["blocked"])
361                 continue
362             elif block["blocked"].endswith(".arpa"):
363                 logger.debug("blocked='%s' is a reverse IP address - SKIPPED", block["blocked"])
364                 continue
365             elif block["blocked"].endswith(".tld"):
366                 logger.debug("blocked='%s' is a fake domain - SKIPPED", block["blocked"])
367                 continue
368             elif block["blocked"].find("*") >= 0:
369                 logger.debug("blocker='%s' uses obfuscated domains", blocker)
370
371                 # Some friendica servers also obscure domains without hash
372                 row = instances.deobfuscate("*", block["blocked"], block["hash"] if "hash" in block else None)
373
374                 logger.debug("row[]='%s'", type(row))
375                 if row is None:
376                     logger.warning("Cannot deobfuscate blocked='%s',blocker='%s',software='%s' - SKIPPED!", block["blocked"], blocker, software)
377                     instances.set_has_obfuscation(blocker, True)
378                     continue
379
380                 block["blocked"] = row["domain"]
381                 origin           = row["origin"]
382                 nodeinfo_url     = row["nodeinfo_url"]
383             elif block["blocked"].find("?") >= 0:
384                 logger.debug("blocker='%s' uses obfuscated domains", blocker)
385
386                 # Some obscure them with question marks, not sure if that's dependent on version or not
387                 row = instances.deobfuscate("?", block["blocked"], block["hash"] if "hash" in block else None)
388
389                 logger.debug("row[]='%s'", type(row))
390                 if row is None:
391                     logger.warning("Cannot deobfuscate blocked='%s',blocker='%s',software='%s' - SKIPPED!", block["blocked"], blocker, software)
392                     instances.set_has_obfuscation(blocker, True)
393                     continue
394
395                 block["blocked"] = row["domain"]
396                 origin           = row["origin"]
397                 nodeinfo_url     = row["nodeinfo_url"]
398
399             logger.debug("Looking up instance by domainm, blocked='%s'", block["blocked"])
400             if block["blocked"] == "":
401                 logger.debug("block[blocked] is empty - SKIPPED!")
402                 continue
403
404             logger.debug("block[blocked]='%s' - BEFORE!", block["blocked"])
405             block["blocked"] = block["blocked"].lstrip(".").encode("idna").decode("utf-8")
406             logger.debug("block[blocked]='%s' - AFTER!", block["blocked"])
407
408             if not utils.is_domain_wanted(block["blocked"]):
409                 logger.warning("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
410                 continue
411             elif block["block_level"] in ["accept", "accepted"]:
412                 logger.debug("blocked='%s' is accepted, not wanted here - SKIPPED!", block["blocked"])
413                 continue
414             elif not instances.is_registered(block["blocked"]):
415                 logger.debug("Hash wasn't found, adding: blocked='%s',blocker='%s'", block["blocked"], blocker)
416                 federation.fetch_instances(block["blocked"], blocker, None, inspect.currentframe().f_code.co_name)
417
418             block["block_level"] = blocks.alias_block_level(block["block_level"])
419
420             if processing.block(blocker, block["blocked"], block["reason"], block["block_level"]) and block["block_level"] == "reject" and config.get("bot_enabled"):
421                 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], blocker)
422                 blockdict.append({
423                     "blocked": block["blocked"],
424                     "reason" : block["reason"],
425                 })
426
427             logger.debug("Invoking cookies.clear(%s) ...", block["blocked"])
428             cookies.clear(block["blocked"])
429
430         logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
431         if instances.has_pending(blocker):
432             logger.debug("Flushing updates for blocker='%s' ...", blocker)
433             instances.update_data(blocker)
434
435         logger.debug("Invoking commit() ...")
436         database.connection.commit()
437
438         logger.debug("Invoking cookies.clear(%s) ...", blocker)
439         cookies.clear(blocker)
440
441         logger.debug("config.get(bot_enabled)='%s',blockdict()=%d'", config.get("bot_enabled"), len(blockdict))
442         if config.get("bot_enabled") and len(blockdict) > 0:
443             logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
444             network.send_bot_post(blocker, blockdict)
445
446     logger.debug("Success! - EXIT!")
447     return 0
448
449 def fetch_observer(args: argparse.Namespace) -> int:
450     logger.debug("args[]='%s' - CALLED!", type(args))
451
452     logger.debug("Invoking locking.acquire() ...")
453     locking.acquire()
454
455     source_domain = "fediverse.observer"
456     if sources.is_recent(source_domain):
457         logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
458         return 0
459     else:
460         logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
461         sources.update(source_domain)
462
463     types = list()
464     if args.software is None:
465         logger.info("Fetching software list ...")
466         raw = utils.fetch_url(
467             f"https://{source_domain}",
468             network.web_headers,
469             (config.get("connection_timeout"), config.get("read_timeout"))
470         ).text
471         logger.debug("raw[%s]()=%d", type(raw), len(raw))
472
473         doc = bs4.BeautifulSoup(raw, features="html.parser")
474         logger.debug("doc[]='%s'", type(doc))
475
476         navbar = doc.find("div", {"aria-labelledby": "navbarDropdownMenuSoftwares"})
477         logger.debug("navbar[]='%s'", type(navbar))
478         if navbar is None:
479             logger.warning("Cannot find navigation bar, cannot continue!")
480             return 1
481
482         items = navbar.findAll("a", {"class": "dropdown-item"})
483         logger.debug("items[]='%s'", type(items))
484
485         logger.info("Checking %d menu items ...", len(items))
486         for item in items:
487             logger.debug("item[%s]='%s'", type(item), item)
488             if item.text.lower() == "all":
489                 logger.debug("Skipping 'All' menu entry ...")
490                 continue
491
492             logger.debug("Appending item.text='%s' ...", item.text)
493             types.append(tidyup.domain(item.text))
494     else:
495         logger.info("Adding args.software='%s' as type ...", args.software)
496         types.append(args.software)
497
498     logger.info("Fetching %d different table data ...", len(types))
499     for software in types:
500         logger.debug("software='%s' - BEFORE!", software)
501         if args.software is not None and args.software != software:
502             logger.debug("args.software='%s' does not match software='%s' - SKIPPED!", args.software, software)
503             continue
504
505         doc = None
506         try:
507             logger.debug("Fetching table data for software='%s' ...", software)
508             raw = utils.fetch_url(
509                 f"https://{source_domain}/app/views/tabledata.php?software={software}",
510                 network.web_headers,
511                 (config.get("connection_timeout"), config.get("read_timeout"))
512             ).text
513             logger.debug("raw[%s]()=%d", type(raw), len(raw))
514
515             doc = bs4.BeautifulSoup(raw, features="html.parser")
516             logger.debug("doc[]='%s'", type(doc))
517         except network.exceptions as exception:
518             logger.warning("Cannot fetch software='%s' from source_domain='%s': '%s'", software, source_domain, type(exception))
519             continue
520
521         items = doc.findAll("a", {"class": "url"})
522         logger.info("Checking %d items,software='%s' ...", len(items), software)
523         for item in items:
524             logger.debug("item[]='%s'", type(item))
525             domain = item.decode_contents()
526             logger.debug("domain='%s' - AFTER!", domain)
527
528             if domain == "":
529                 logger.debug("domain is empty - SKIPPED!")
530                 continue
531
532             logger.debug("domain='%s' - BEFORE!", domain)
533             domain = domain.encode("idna").decode("utf-8")
534             logger.debug("domain='%s' - AFTER!", domain)
535
536             if not utils.is_domain_wanted(domain):
537                 logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
538                 continue
539             elif instances.is_registered(domain):
540                 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
541                 continue
542             elif instances.is_recent(domain):
543                 logger.debug("domain='%s' is recently being handled - SKIPPED!", domain)
544                 continue
545
546             software = software_helper.alias(software)
547             logger.info("Fetching instances for domain='%s'", domain)
548             federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
549
550     logger.debug("Success! - EXIT!")
551     return 0
552
553 def fetch_todon_wiki(args: argparse.Namespace) -> int:
554     logger.debug("args[]='%s' - CALLED!", type(args))
555
556     logger.debug("Invoking locking.acquire() ...")
557     locking.acquire()
558
559     source_domain = "wiki.todon.eu"
560     if sources.is_recent(source_domain):
561         logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
562         return 0
563     else:
564         logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
565         sources.update(source_domain)
566
567     blocklist = {
568         "silenced": list(),
569         "reject": list(),
570     }
571
572     raw = utils.fetch_url(f"https://{source_domain}/todon/domainblocks", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))).text
573     logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
574
575     doc = bs4.BeautifulSoup(raw, "html.parser")
576     logger.debug("doc[]='%s'", type(doc))
577
578     silenced = doc.find("h3", {"id": "silencedlimited_servers"}).find_next("ul").findAll("li")
579     logger.info("Checking %d silenced/limited entries ...", len(silenced))
580     blocklist["silenced"] = utils.find_domains(silenced, "div")
581
582     suspended = doc.find("h3", {"id": "suspended_servers"}).find_next("ul").findAll("li")
583     logger.info("Checking %d suspended entries ...", len(suspended))
584     blocklist["reject"] = utils.find_domains(suspended, "div")
585
586     blocking = blocklist["silenced"] + blocklist["reject"]
587     blocker = "todon.eu"
588
589     logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
590     instances.set_total_blocks(blocker, blocking)
591
592     blockdict = list()
593     for block_level in blocklist:
594         blockers = blocklist[block_level]
595
596         logger.debug("block_level='%s',blockers()=%d'", block_level, len(blockers))
597         for blocked in blockers:
598             logger.debug("blocked='%s'", blocked)
599
600             if not instances.is_registered(blocked):
601                 try:
602                     logger.info("Fetching instances from domain='%s' ...", blocked)
603                     federation.fetch_instances(blocked, blocker, None, inspect.currentframe().f_code.co_name)
604                 except network.exceptions as exception:
605                     logger.warning("Exception '%s' during fetching instances (fetch_cs) from blocked='%s'", type(exception), blocked)
606                     instances.set_last_error(blocked, exception)
607
608             if blocks.is_instance_blocked(blocker, blocked, block_level):
609                 logger.debug("blocked='%s',block_level='%s' is already blocked - SKIPPED!", blocked, block_level)
610                 continue
611
612             logger.info("Adding new block: blocked='%s',block_level='%s'", blocked, block_level)
613             if processing.block(blocker, blocked, None, block_level) and block_level == "reject" and config.get("bot_enabled"):
614                 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", blocked, block_level, blocker)
615                 blockdict.append({
616                     "blocked": blocked,
617                     "reason" : None,
618                 })
619
620         logger.debug("Invoking commit() ...")
621         database.connection.commit()
622
623         logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
624         if config.get("bot_enabled") and len(blockdict) > 0:
625             logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
626             network.send_bot_post(blocker, blockdict)
627
628     logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
629     if instances.has_pending(blocker):
630         logger.debug("Flushing updates for blocker='%s' ...", blocker)
631         instances.update_data(blocker)
632
633     logger.debug("Success! - EXIT!")
634     return 0
635
636 def fetch_cs(args: argparse.Namespace):
637     logger.debug("args[]='%s' - CALLED!", type(args))
638
639     logger.debug("Invoking locking.acquire() ...")
640     locking.acquire()
641
642     extensions = [
643         "extra",
644         "abbr",
645         "attr_list",
646         "def_list",
647         "fenced_code",
648         "footnotes",
649         "md_in_html",
650         "admonition",
651         "codehilite",
652         "legacy_attrs",
653         "legacy_em",
654         "meta",
655         "nl2br",
656         "sane_lists",
657         "smarty",
658         "toc",
659         "wikilinks"
660     ]
661
662     blocklist = {
663         "silenced": list(),
664         "reject"  : list(),
665     }
666
667     source_domain = "raw.githubusercontent.com"
668     if sources.is_recent(source_domain):
669         logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
670         return 0
671     else:
672         logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
673         sources.update(source_domain)
674
675     raw = utils.fetch_url(f"https://{source_domain}/chaossocial/meta/master/federation.md", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))).text
676     logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
677
678     doc = bs4.BeautifulSoup(markdown.markdown(raw, extensions=extensions), features="html.parser")
679     logger.debug("doc()=%d[]='%s'", len(doc), type(doc))
680
681     silenced = doc.find("h2", {"id": "silenced-instances"}).findNext("table").find("tbody")
682     logger.debug("silenced[%s]()=%d", type(silenced), len(silenced))
683     blocklist["silenced"] = federation.find_domains(silenced)
684
685     blocked = doc.find("h2", {"id": "blocked-instances"}).findNext("table").find("tbody")
686     logger.debug("blocked[%s]()=%d", type(blocked), len(blocked))
687     blocklist["reject"] = federation.find_domains(blocked)
688
689     blocking = blocklist["silenced"] + blocklist["reject"]
690     blocker = "chaos.social"
691
692     logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
693     instances.set_total_blocks(blocker, blocking)
694
695     logger.debug("blocklist[silenced]()=%d,blocklist[reject]()=%d", len(blocklist["silenced"]), len(blocklist["reject"]))
696     if len(blocking) > 0:
697         blockdict = list()
698         for block_level in blocklist:
699             logger.info("block_level='%s' has %d row(s)", block_level, len(blocklist[block_level]))
700
701             for row in blocklist[block_level]:
702                 logger.debug("row[%s]='%s'", type(row), row)
703                 if not "domain" in row:
704                     logger.warning("row[]='%s' has no element 'domain' - SKIPPED!", type(row))
705                     continue
706                 elif not instances.is_registered(row["domain"]):
707                     try:
708                         logger.info("Fetching instances from domain='%s' ...", row["domain"])
709                         federation.fetch_instances(row["domain"], blocker, None, inspect.currentframe().f_code.co_name)
710                     except network.exceptions as exception:
711                         logger.warning("Exception '%s' during fetching instances (fetch_cs) from row[domain]='%s'", type(exception), row["domain"])
712                         instances.set_last_error(row["domain"], exception)
713
714                 if processing.block(blocker, row["domain"], row["reason"], block_level) and block_level == "reject" and config.get("bot_enabled"):
715                     logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", row["domain"], block_level, blocker)
716                     blockdict.append({
717                         "blocked": row["domain"],
718                         "reason" : row["reason"],
719                     })
720
721         logger.debug("Invoking commit() ...")
722         database.connection.commit()
723
724         logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
725         if config.get("bot_enabled") and len(blockdict) > 0:
726             logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
727             network.send_bot_post(blocker, blockdict)
728
729     logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
730     if instances.has_pending(blocker):
731         logger.debug("Flushing updates for blocker='%s' ...", blocker)
732         instances.update_data(blocker)
733
734     logger.debug("Success! - EXIT!")
735     return 0
736
737 def fetch_fba_rss(args: argparse.Namespace) -> int:
738     logger.debug("args[]='%s' - CALLED!", type(args))
739
740     domains = list()
741
742     logger.debug("Invoking locking.acquire() ...")
743     locking.acquire()
744
745     components = urlparse(args.feed)
746
747     if sources.is_recent(components.netloc):
748         logger.info("API from components.netloc='%s' has recently being accessed - EXIT!", components.netloc)
749         return 0
750     else:
751         logger.debug("components.netloc='%s' has not been recently used, marking ...", components.netloc)
752         sources.update(components.netloc)
753
754     logger.info("Fetch FBA-specific RSS args.feed='%s' ...", args.feed)
755     response = utils.fetch_url(args.feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
756
757     logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
758     if response.ok and response.status_code < 300 and len(response.text) > 0:
759         logger.debug("Parsing RSS feed (%d Bytes) ...", len(response.text))
760         rss = atoma.parse_rss_bytes(response.content)
761
762         logger.debug("rss[]='%s'", type(rss))
763         for item in rss.items:
764             logger.debug("item='%s'", item)
765             domain = tidyup.domain(item.link.split("=")[1])
766
767             logger.debug("domain='%s' - AFTER!", domain)
768             if domain == "":
769                 logger.debug("domain is empty - SKIPPED!")
770                 continue
771
772             logger.debug("domain='%s' - BEFORE!", domain)
773             domain = domain.encode("idna").decode("utf-8")
774             logger.debug("domain='%s' - AFTER!", domain)
775
776             if not utils.is_domain_wanted(domain):
777                 logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
778                 continue
779             elif domain in domains:
780                 logger.debug("domain='%s' is already added - SKIPPED!", domain)
781                 continue
782             elif instances.is_registered(domain):
783                 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
784                 continue
785             elif instances.is_recent(domain):
786                 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
787                 continue
788
789             logger.debug("Adding domain='%s'", domain)
790             domains.append(domain)
791
792     logger.debug("domains()=%d", len(domains))
793     if len(domains) > 0:
794         logger.info("Adding %d new instances ...", len(domains))
795         for domain in domains:
796             logger.debug("domain='%s'", domain)
797             try:
798                 logger.info("Fetching instances from domain='%s' ...", domain)
799                 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
800             except network.exceptions as exception:
801                 logger.warning("Exception '%s' during fetching instances (fetch_fba_rss) from domain='%s'", type(exception), domain)
802                 instances.set_last_error(domain, exception)
803                 return 100
804
805     logger.debug("Success! - EXIT!")
806     return 0
807
808 def fetch_fbabot_atom(args: argparse.Namespace) -> int:
809     logger.debug("args[]='%s' - CALLED!", type(args))
810
811     logger.debug("Invoking locking.acquire() ...")
812     locking.acquire()
813
814     source_domain = "ryona.agency"
815     if sources.is_recent(source_domain):
816         logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
817         return 0
818     else:
819         logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
820         sources.update(source_domain)
821
822     feed = f"https://{source_domain}/users/fba/feed.atom"
823
824     domains = list()
825
826     logger.info("Fetching ATOM feed='%s' from FBA bot account ...", feed)
827     response = utils.fetch_url(feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
828
829     logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
830     if response.ok and response.status_code < 300 and len(response.text) > 0:
831         logger.debug("Parsing ATOM feed (%d Bytes) ...", len(response.text))
832         atom = atoma.parse_atom_bytes(response.content)
833
834         logger.debug("atom[]='%s'", type(atom))
835         for entry in atom.entries:
836             logger.debug("entry[]='%s'", type(entry))
837             doc = bs4.BeautifulSoup(entry.content.value, "html.parser")
838             logger.debug("doc[]='%s'", type(doc))
839             for element in doc.findAll("a"):
840                 logger.debug("element[]='%s'", type(element))
841                 for href in element["href"].split(","):
842                     logger.debug("href[%s]='%s' - BEFORE!", type(href), href)
843                     domain = tidyup.domain(href)
844
845                     logger.debug("domain='%s' - AFTER!", domain)
846                     if domain == "":
847                         logger.debug("domain is empty - SKIPPED!")
848                         continue
849
850                     logger.debug("domain='%s' - BEFORE!", domain)
851                     domain = domain.encode("idna").decode("utf-8")
852                     logger.debug("domain='%s' - AFTER!", domain)
853
854                     if not utils.is_domain_wanted(domain):
855                         logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
856                         continue
857                     elif domain in domains:
858                         logger.debug("domain='%s' is already added - SKIPPED!", domain)
859                         continue
860                     elif instances.is_registered(domain):
861                         logger.debug("domain='%s' is already registered - SKIPPED!", domain)
862                         continue
863                     elif instances.is_recent(domain):
864                         logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
865                         continue
866
867                     logger.debug("Adding domain='%s',domains()=%d", domain, len(domains))
868                     domains.append(domain)
869
870     logger.debug("domains()=%d", len(domains))
871     if len(domains) > 0:
872         logger.info("Adding %d new instances ...", len(domains))
873         for domain in domains:
874             logger.debug("domain='%s'", domain)
875             try:
876                 logger.info("Fetching instances from domain='%s' ...", domain)
877                 federation.fetch_instances(domain, source_domain, None, inspect.currentframe().f_code.co_name)
878             except network.exceptions as exception:
879                 logger.warning("Exception '%s' during fetching instances (fetch_fbabot_atom) from domain='%s'", type(exception), domain)
880                 instances.set_last_error(domain, exception)
881                 return 100
882
883     logger.debug("Success! - EXIT!")
884     return 0
885
886 def fetch_instances(args: argparse.Namespace) -> int:
887     logger.debug("args[]='%s' - CALLED!", type(args))
888
889     logger.debug("args.domain='%s' - checking ...", args.domain)
890     if not validators.domain(args.domain):
891         logger.warning("args.domain='%s' is not valid.", args.domain)
892         return 100
893     elif blacklist.is_blacklisted(args.domain):
894         logger.warning("args.domain='%s' is blacklisted, won't check it!", args.domain)
895         return 101
896
897     logger.debug("Invoking locking.acquire() ...")
898     locking.acquire()
899
900     # Initial fetch
901     try:
902         logger.info("Fetching instances from args.domain='%s' ...", args.domain)
903         federation.fetch_instances(args.domain, None, None, inspect.currentframe().f_code.co_name)
904     except network.exceptions as exception:
905         logger.warning("Exception '%s' during fetching instances (fetch_instances) from args.domain='%s'", type(exception), args.domain)
906         instances.set_last_error(args.domain, exception)
907         instances.update_data(args.domain)
908         return 100
909
910     if args.single:
911         logger.debug("Not fetching more instances - EXIT!")
912         return 0
913
914     # Loop through some instances
915     database.cursor.execute(
916         "SELECT domain, origin, software, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'lemmy', 'peertube', 'takahe', 'gotosocial', 'brighteon', 'wildebeest', 'bookwyrm', 'mitra') AND (last_instance_fetch IS NULL OR last_instance_fetch < ?) ORDER BY rowid DESC", [time.time() - config.get("recheck_instance")]
917     )
918
919     rows = database.cursor.fetchall()
920     logger.info("Checking %d entries ...", len(rows))
921     for row in rows:
922         logger.debug("row[domain]='%s'", row["domain"])
923         if row["domain"] == "":
924             logger.debug("row[domain] is empty - SKIPPED!")
925             continue
926
927         logger.debug("row[domain]='%s' - BEFORE!", row["domain"])
928         domain = row["domain"].encode("idna").decode("utf-8")
929         logger.debug("domain='%s' - AFTER!", domain)
930
931         if not utils.is_domain_wanted(domain):
932             logger.warning("Domain domain='%s' is not wanted - SKIPPED!", domain)
933             continue
934
935         try:
936             logger.info("Fetching instances for domain='%s',origin='%s',software='%s',nodeinfo_url='%s'", domain, row["origin"], row["software"], row["nodeinfo_url"])
937             federation.fetch_instances(domain, row["origin"], row["software"], inspect.currentframe().f_code.co_name, row["nodeinfo_url"])
938         except network.exceptions as exception:
939             logger.warning("Exception '%s' during fetching instances (fetch_instances) from domain='%s'", type(exception), domain)
940             instances.set_last_error(domain, exception)
941
942     logger.debug("Success - EXIT!")
943     return 0
944
945 def fetch_oliphant(args: argparse.Namespace) -> int:
946     logger.debug("args[]='%s' - CALLED!", type(args))
947
948     logger.debug("Invoking locking.acquire() ...")
949     locking.acquire()
950
951     source_domain = "codeberg.org"
952     if sources.is_recent(source_domain):
953         logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
954         return 0
955     else:
956         logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
957         sources.update(source_domain)
958
959     # Base URL
960     base_url = f"https://{source_domain}/oliphant/blocklists/raw/branch/main/blocklists"
961
962     # URLs to fetch
963     blocklists = (
964         {
965             "blocker": "artisan.chat",
966             "csv_url": "mastodon/artisan.chat.csv",
967         },{
968             "blocker": "mastodon.art",
969             "csv_url": "mastodon/mastodon.art.csv",
970         },{
971             "blocker": "pleroma.envs.net",
972             "csv_url": "mastodon/pleroma.envs.net.csv",
973         },{
974             "blocker": "oliphant.social",
975             "csv_url": "mastodon/_unified_tier3_blocklist.csv",
976         },{
977             "blocker": "mastodon.online",
978             "csv_url": "mastodon/mastodon.online.csv",
979         },{
980             "blocker": "mastodon.social",
981             "csv_url": "mastodon/mastodon.social.csv",
982         },{
983             "blocker": "mastodon.social",
984             "csv_url": "other/missing-tier0-mastodon.social.csv",
985         },{
986             "blocker": "rage.love",
987             "csv_url": "mastodon/rage.love.csv",
988         },{
989             "blocker": "sunny.garden",
990             "csv_url": "mastodon/sunny.garden.csv",
991         },{
992             "blocker": "sunny.garden",
993             "csv_url": "mastodon/gardenfence.csv",
994         },{
995             "blocker": "solarpunk.moe",
996             "csv_url": "mastodon/solarpunk.moe.csv",
997         },{
998             "blocker": "toot.wales",
999             "csv_url": "mastodon/toot.wales.csv",
1000         },{
1001             "blocker": "union.place",
1002             "csv_url": "mastodon/union.place.csv",
1003         },{
1004             "blocker": "oliphant.social",
1005             "csv_url": "mastodon/birdsite.csv",
1006         }
1007     )
1008
1009     domains = list()
1010
1011     logger.debug("Downloading %d files ...", len(blocklists))
1012     for block in blocklists:
1013         # Is domain given and not equal blocker?
1014         if isinstance(args.domain, str) and args.domain != block["blocker"]:
1015             logger.debug("Skipping blocker='%s', not matching args.domain='%s'", block["blocker"], args.domain)
1016             continue
1017         elif args.domain in domains:
1018             logger.debug("args.domain='%s' already handled - SKIPPED!", args.domain)
1019             continue
1020
1021         # Fetch this URL
1022         logger.info("Fetching csv_url='%s' for blocker='%s' ...", block["csv_url"], block["blocker"])
1023         response = utils.fetch_url(f"{base_url}/{block['csv_url']}", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
1024
1025         logger.debug("response.ok='%s',response.status_code=%d,response.content()=%d", response.ok, response.status_code, len(response.content))
1026         if not response.ok or response.status_code >= 300 or response.content == "":
1027             logger.warning("Could not fetch csv_url='%s' for blocker='%s' - SKIPPED!", block["csv_url"], block["blocker"])
1028             continue
1029
1030         logger.debug("Fetched %d Bytes, parsing CSV ...", len(response.content))
1031         reader = csv.DictReader(response.content.decode("utf-8").splitlines(), dialect="unix")
1032
1033         blockdict = list()
1034
1035         cnt = 0
1036         for row in reader:
1037             logger.debug("row[%s]='%s'", type(row), row)
1038             domain = severity = None
1039             reject_media = reject_reports = False
1040
1041             if "#domain" in row:
1042                 domain = row["#domain"]
1043             elif "domain" in row:
1044                 domain = row["domain"]
1045             else:
1046                 logger.debug("row='%s' does not contain domain column", row)
1047                 continue
1048
1049             if "#severity" in row:
1050                 severity = blocks.alias_block_level(row["#severity"])
1051             elif "severity" in row:
1052                 severity = blocks.alias_block_level(row["severity"])
1053             else:
1054                 logger.debug("row='%s' does not contain severity column", row)
1055                 continue
1056
1057             if "#reject_media" in row and row["#reject_media"].lower() == "true":
1058                 reject_media = True
1059             elif "reject_media" in row and row["reject_media"].lower() == "true":
1060                 reject_media = True
1061
1062             if "#reject_reports" in row and row["#reject_reports"].lower() == "true":
1063                 reject_reports = True
1064             elif "reject_reports" in row and row["reject_reports"].lower() == "true":
1065                 reject_reports = True
1066
1067             cnt = cnt + 1
1068             logger.debug("domain='%s',severity='%s',reject_media='%s',reject_reports='%s'", domain, severity, reject_media, reject_reports)
1069             if domain == "":
1070                 logger.debug("domain is empty - SKIPPED!")
1071                 continue
1072             elif domain.endswith(".onion"):
1073                 logger.debug("domain='%s' is a TOR .onion domain - SKIPPED", domain)
1074                 continue
1075             elif domain.endswith(".arpa"):
1076                 logger.debug("domain='%s' is a reverse IP address - SKIPPED", domain)
1077                 continue
1078             elif domain.endswith(".tld"):
1079                 logger.debug("domain='%s' is a fake domain - SKIPPED", domain)
1080                 continue
1081             elif domain.find("*") >= 0 or domain.find("?") >= 0:
1082                 logger.debug("domain='%s' is obfuscated - Invoking utils.deobfuscate(%s, %s) ...", domain, domain, block["blocker"])
1083                 domain = utils.deobfuscate(domain, block["blocker"])
1084                 logger.debug("domain='%s' - AFTER!", domain)
1085
1086             if not validators.domain(domain):
1087                 logger.debug("domain='%s' is not a valid domain - SKIPPED!")
1088                 continue
1089             elif blacklist.is_blacklisted(domain):
1090                 logger.warning("domain='%s' is blacklisted - SKIPPED!", domain)
1091                 continue
1092
1093             logger.debug("Marking domain='%s' as handled", domain)
1094             domains.append(domain)
1095
1096             logger.debug("Processing domain='%s' ...", domain)
1097             processed = processing.domain(domain, block["blocker"], inspect.currentframe().f_code.co_name)
1098             logger.debug("processed='%s'", processed)
1099
1100             if processing.block(block["blocker"], domain, None, severity) and config.get("bot_enabled"):
1101                 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", domain, block["block_level"], block["blocker"])
1102                 blockdict.append({
1103                     "blocked": domain,
1104                     "reason" : block["reason"],
1105                 })
1106
1107             if reject_media:
1108                 processing.block(block["blocker"], domain, None, "reject_media")
1109             if reject_reports:
1110                 processing.block(block["blocker"], domain, None, "reject_reports")
1111
1112         logger.debug("block[blocker]='%s'", block["blocker"])
1113         if block["blocker"] != "chaos.social":
1114             logger.debug("Invoking instances.set_total_blocks(%s, domains()=%d) ...", block["blocker"], len(domains))
1115             instances.set_total_blocks(block["blocker"], domains)
1116
1117         logger.debug("Checking if blocker='%s' has pending updates ...", block["blocker"])
1118         if instances.has_pending(block["blocker"]):
1119             logger.debug("Flushing updates for block[blocker]='%s' ...", block["blocker"])
1120             instances.update_data(block["blocker"])
1121
1122         logger.debug("Invoking commit() ...")
1123         database.connection.commit()
1124
1125         logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
1126         if config.get("bot_enabled") and len(blockdict) > 0:
1127             logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", block["blocker"], len(blockdict))
1128             network.send_bot_post(block["blocker"], blockdict)
1129
1130     logger.debug("Success! - EXIT!")
1131     return 0
1132
1133 def fetch_txt(args: argparse.Namespace) -> int:
1134     logger.debug("args[]='%s' - CALLED!", type(args))
1135
1136     logger.debug("Invoking locking.acquire() ...")
1137     locking.acquire()
1138
1139     # Static URLs
1140     urls = ({
1141         "blocker": "seirdy.one",
1142         "url"    : "https://seirdy.one/pb/bsl.txt",
1143     },)
1144
1145     logger.info("Checking %d text file(s) ...", len(urls))
1146     for row in urls:
1147         logger.debug("Fetching row[url]='%s' ...", row["url"])
1148         response = utils.fetch_url(row["url"], network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
1149
1150         logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
1151         if response.ok and response.status_code < 300 and response.text != "":
1152             logger.debug("Returned %d Bytes for processing", len(response.text.strip()))
1153             domains = response.text.split("\n")
1154
1155             logger.info("Processing %d domains ...", len(domains))
1156             for domain in domains:
1157                 logger.debug("domain='%s' - BEFORE!", domain)
1158                 domain = tidyup.domain(domain)
1159
1160                 logger.debug("domain='%s' - AFTER!", domain)
1161                 if domain == "":
1162                     logger.debug("domain is empty - SKIPPED!")
1163                     continue
1164                 elif not utils.is_domain_wanted(domain):
1165                     logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
1166                     continue
1167                 elif instances.is_recent(domain):
1168                     logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1169                     continue
1170
1171                 logger.debug("Processing domain='%s',row[blocker]='%s'", domain, row["blocker"])
1172                 processed = processing.domain(domain, row["blocker"], inspect.currentframe().f_code.co_name)
1173
1174                 logger.debug("processed='%s'", processed)
1175                 if not processed:
1176                     logger.debug("domain='%s' was not generically processed - SKIPPED!", domain)
1177                     continue
1178
1179     logger.debug("Success! - EXIT!")
1180     return 0
1181
1182 def fetch_fedipact(args: argparse.Namespace) -> int:
1183     logger.debug("args[]='%s' - CALLED!", type(args))
1184
1185     logger.debug("Invoking locking.acquire() ...")
1186     locking.acquire()
1187
1188     source_domain = "fedipact.online"
1189     if sources.is_recent(source_domain):
1190         logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1191         return 0
1192     else:
1193         logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1194         sources.update(source_domain)
1195
1196     response = utils.fetch_url(
1197         f"https://{source_domain}",
1198         network.web_headers,
1199         (config.get("connection_timeout"), config.get("read_timeout"))
1200     )
1201
1202     logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
1203     if response.ok and response.status_code < 300 and response.text != "":
1204         logger.debug("Parsing %d Bytes ...", len(response.text))
1205
1206         doc = bs4.BeautifulSoup(response.text, "html.parser")
1207         logger.debug("doc[]='%s'", type(doc))
1208
1209         rows = doc.findAll("li")
1210         logger.info("Checking %d row(s) ...", len(rows))
1211         for row in rows:
1212             logger.debug("row[]='%s'", type(row))
1213             domain = tidyup.domain(row.contents[0])
1214
1215             logger.debug("domain='%s' - AFTER!", domain)
1216             if domain == "":
1217                 logger.debug("domain is empty - SKIPPED!")
1218                 continue
1219
1220             logger.debug("domain='%s' - BEFORE!", domain)
1221             domain = domain.encode("idna").decode("utf-8")
1222             logger.debug("domain='%s' - AFTER!", domain)
1223
1224             if not utils.is_domain_wanted(domain):
1225                 logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
1226                 continue
1227             elif instances.is_registered(domain):
1228                 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
1229                 continue
1230             elif instances.is_recent(domain):
1231                 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1232                 continue
1233
1234             logger.info("Fetching domain='%s' ...", domain)
1235             federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
1236
1237     logger.debug("Success! - EXIT!")
1238     return 0
1239
1240 def fetch_joinfediverse(args: argparse.Namespace) -> int:
1241     logger.debug("args[]='%s' - CALLED!", type(args))
1242
1243     logger.debug("Invoking locking.acquire() ...")
1244     locking.acquire()
1245
1246     source_domain = "joinfediverse.wiki"
1247     if sources.is_recent(source_domain):
1248         logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1249         return 0
1250     else:
1251         logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1252         sources.update(source_domain)
1253
1254     raw = utils.fetch_url(
1255         f"https://{source_domain}/FediBlock",
1256         network.web_headers,
1257         (config.get("connection_timeout"), config.get("read_timeout"))
1258     ).text
1259     logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
1260
1261     doc = bs4.BeautifulSoup(raw, "html.parser")
1262     logger.debug("doc[]='%s'", type(doc))
1263
1264     tables = doc.findAll("table", {"class": "wikitable"})
1265
1266     logger.info("Analyzing %d table(s) ...", len(tables))
1267     blocklist = list()
1268     for table in tables:
1269         logger.debug("table[]='%s'", type(table))
1270
1271         rows = table.findAll("tr")
1272         logger.info("Checking %d row(s) ...", len(rows))
1273         block_headers = dict()
1274         for row in rows:
1275             logger.debug("row[%s]='%s'", type(row), row)
1276
1277             headers = row.findAll("th")
1278             logger.debug("Found headers()=%d header(s)", len(headers))
1279             if len(headers) > 1:
1280                 block_headers = dict()
1281                 cnt = 0
1282                 for header in headers:
1283                     cnt = cnt + 1
1284                     logger.debug("header[]='%s',cnt=%d", type(header), cnt)
1285                     text = header.contents[0]
1286
1287                     logger.debug("text[]='%s'", type(text))
1288                     if not isinstance(text, str):
1289                         logger.debug("text[]='%s' is not of type 'str' - SKIPPED!", type(text))
1290                         continue
1291                     elif validators.domain(text.strip()):
1292                         logger.debug("text='%s' is a domain - SKIPPED!", text.strip())
1293                         continue
1294
1295                     text = tidyup.domain(text.strip())
1296                     logger.debug("text='%s' - AFTER!", text)
1297                     if text in ["domain", "instance", "subdomain(s)", "block reason(s)"]:
1298                         logger.debug("Found header: '%s'=%d", text, cnt)
1299                         block_headers[cnt] = text
1300
1301             elif len(block_headers) == 0:
1302                 logger.debug("row is not scrapable - SKIPPED!")
1303                 continue
1304             elif len(block_headers) > 0:
1305                 logger.debug("Found a row with %d scrapable headers ...", len(block_headers))
1306                 cnt = 0
1307                 block = dict()
1308
1309                 for element in row.find_all(["th", "td"]):
1310                     cnt = cnt + 1
1311                     logger.debug("element[]='%s',cnt=%d", type(element), cnt)
1312                     if cnt in block_headers:
1313                         logger.debug("block_headers[%d]='%s'", cnt, block_headers[cnt])
1314
1315                         text = element.text.strip()
1316                         key = block_headers[cnt] if block_headers[cnt] not in ["domain", "instance"] else "blocked"
1317
1318                         logger.debug("cnt=%d is wanted: key='%s',text[%s]='%s'", cnt, key, type(text), text)
1319                         if key in ["domain", "instance"]:
1320                             block[key] = text
1321                         elif key == "reason":
1322                             block[key] = tidyup.reason(text)
1323                         elif key == "subdomain(s)":
1324                             block[key] = list()
1325                             if text != "":
1326                                 block[key] = text.split("/")
1327                         else:
1328                             logger.debug("key='%s'", key)
1329                             block[key] = text
1330
1331                 logger.debug("block()=%d ...", len(block))
1332                 if len(block) > 0:
1333                     logger.debug("Appending block()=%d ...", len(block))
1334                     blocklist.append(block)
1335
1336     logger.debug("blocklist()=%d", len(blocklist))
1337
1338     database.cursor.execute("SELECT domain FROM instances WHERE domain LIKE 'climatejustice.%'")
1339     domains = database.cursor.fetchall()
1340
1341     logger.debug("domains(%d)[]='%s'", len(domains), type(domains))
1342     blocking = list()
1343     for block in blocklist:
1344         logger.debug("block='%s'", block)
1345         if "subdomain(s)" in block and len(block["subdomain(s)"]) > 0:
1346             origin = block["blocked"]
1347             logger.debug("origin='%s'", origin)
1348             for subdomain in block["subdomain(s)"]:
1349                 block["blocked"] = subdomain + "." + origin
1350                 logger.debug("block[blocked]='%s'", block["blocked"])
1351                 blocking.append(block)
1352         else:
1353             blocking.append(block)
1354
1355     logger.debug("blocking()=%d", blocking)
1356     for block in blocking:
1357         logger.debug("block[]='%s'", type(block))
1358         if "blocked" not in block:
1359             raise KeyError(f"block()={len(block)} does not have element 'blocked'")
1360
1361         block["blocked"] = tidyup.domain(block["blocked"]).encode("idna").decode("utf-8")
1362         logger.debug("block[blocked]='%s' - AFTER!", block["blocked"])
1363
1364         if block["blocked"] == "":
1365             logger.debug("block[blocked] is empty - SKIPPED!")
1366             continue
1367         elif not utils.is_domain_wanted(block["blocked"]):
1368             logger.warning("block[blocked]='%s' is not wanted - SKIPPED!", block["blocked"])
1369             continue
1370         elif instances.is_recent(block["blocked"]):
1371             logger.debug("block[blocked]='%s' has been recently checked - SKIPPED!", block["blocked"])
1372             continue
1373
1374         logger.info("Proccessing blocked='%s' ...", block["blocked"])
1375         processing.domain(block["blocked"], "climatejustice.social", inspect.currentframe().f_code.co_name)
1376
1377     blockdict = list()
1378     for blocker in domains:
1379         blocker = blocker[0]
1380         logger.debug("blocker[%s]='%s'", type(blocker), blocker)
1381
1382         for block in blocking:
1383             logger.debug("block[blocked]='%s',block[block reason(s)]='%s' - BEFORE!", block["blocked"], block["block reason(s)"] if "block reason(s)" in block else None)
1384             block["reason"] = tidyup.reason(block["block reason(s)"]) if "block reason(s)" in block else None
1385
1386             logger.debug("block[blocked]='%s',block[reason]='%s' - AFTER!", block["blocked"], block["reason"])
1387             if block["blocked"] == "":
1388                 logger.debug("block[blocked] is empty - SKIPPED!")
1389                 continue
1390             elif not utils.is_domain_wanted(block["blocked"]):
1391                 logger.warning("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
1392                 continue
1393
1394             logger.debug("blocked='%s',reason='%s'", block["blocked"], block["reason"])
1395             if processing.block(blocker, block["blocked"], block["reason"], "reject") and config.get("bot_enabled"):
1396                 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], blocker)
1397                 blockdict.append({
1398                     "blocked": block["blocked"],
1399                     "reason" : block["reason"],
1400                 })
1401
1402         if instances.has_pending(blocker):
1403             logger.debug("Flushing updates for blocker='%s' ...", blocker)
1404             instances.update_data(blocker)
1405
1406         logger.debug("Invoking commit() ...")
1407         database.connection.commit()
1408
1409         logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
1410         if config.get("bot_enabled") and len(blockdict) > 0:
1411             logger.info("Sending bot POST for blocker='%s,blockdict()=%d ...", blocker, len(blockdict))
1412             network.send_bot_post(blocker, blockdict)
1413
1414     logger.debug("Success! - EXIT!")
1415     return 0
1416
1417 def recheck_obfuscation(args: argparse.Namespace) -> int:
1418     logger.debug("args[]='%s' - CALLED!", type(args))
1419
1420     logger.debug("Invoking locking.acquire() ...")
1421     locking.acquire()
1422
1423     if isinstance(args.domain, str) and args.domain != "" and utils.is_domain_wanted(args.domain):
1424         database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1 AND domain = ?", [args.domain])
1425     elif isinstance(args.software, str) and args.software != "" and validators.domain(args.software) == args.software:
1426         database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1 AND software = ?", [args.software])
1427     else:
1428         database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1")
1429
1430     rows = database.cursor.fetchall()
1431     logger.info("Checking %d domains ...", len(rows))
1432     for row in rows:
1433         logger.debug("Fetching peers from domain='%s',software='%s',nodeinfo_url='%s' ...", row["domain"], row["software"], row["nodeinfo_url"])
1434         if (args.force is None or not args.force) and instances.is_recent(row["domain"]) and args.domain is None and args.software is None:
1435             logger.debug("row[domain]='%s' has been recently checked, args.force[]='%s' - SKIPPED!", row["domain"], type(args.force))
1436             continue
1437
1438         blocking = list()
1439         if row["software"] == "pleroma":
1440             logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1441             blocking = pleroma.fetch_blocks(row["domain"], row["nodeinfo_url"])
1442         elif row["software"] == "mastodon":
1443             logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1444             blocking = mastodon.fetch_blocks(row["domain"], row["nodeinfo_url"])
1445         elif row["software"] == "lemmy":
1446             logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1447             blocking = lemmy.fetch_blocks(row["domain"], row["nodeinfo_url"])
1448         elif row["software"] == "friendica":
1449             logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1450             blocking = friendica.fetch_blocks(row["domain"])
1451         elif row["software"] == "misskey":
1452             logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1453             blocking = misskey.fetch_blocks(row["domain"])
1454         else:
1455             logger.warning("Unknown sofware: domain='%s',software='%s'", row["domain"], row["software"])
1456
1457         logger.debug("row[domain]='%s'", row["domain"])
1458         # chaos.social requires special care ...
1459         if row["domain"] != "chaos.social":
1460             logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", row["domain"], len(blocking))
1461             instances.set_total_blocks(row["domain"], blocking)
1462
1463         obfuscated = 0
1464         blockdict = list()
1465
1466         logger.info("Checking %d block(s) from domain='%s' ...", len(blocking), row["domain"])
1467         for block in blocking:
1468             logger.debug("block[blocked]='%s'", block["blocked"])
1469             blocked = None
1470
1471             if block["blocked"] == "":
1472                 logger.debug("block[blocked] is empty - SKIPPED!")
1473                 continue
1474             elif block["blocked"].endswith(".arpa"):
1475                 logger.debug("blocked='%s' is a reversed IP address - SKIPPED!", block["blocked"])
1476                 continue
1477             elif block["blocked"].endswith(".tld"):
1478                 logger.debug("blocked='%s' is a fake domain name - SKIPPED!", block["blocked"])
1479                 continue
1480             elif block["blocked"].endswith(".onion"):
1481                 logger.debug("blocked='%s' is a TOR onion domain name - SKIPPED!", block["blocked"])
1482                 continue
1483             elif block["blocked"].find("*") >= 0 or block["blocked"].find("?") >= 0:
1484                 logger.debug("block='%s' is obfuscated.", block["blocked"])
1485                 obfuscated = obfuscated + 1
1486                 blocked = utils.deobfuscate(block["blocked"], row["domain"], block["hash"] if "hash" in block else None)
1487             elif not utils.is_domain_wanted(block["blocked"]):
1488                 logger.warning("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
1489                 continue
1490             elif blocks.is_instance_blocked(row["domain"], block["blocked"]):
1491                 logger.debug("blocked='%s' is already blocked - SKIPPED!", block["blocked"])
1492                 continue
1493
1494             logger.debug("blocked[%s]='%s',block[blocked]='%s'", type(blocked), blocked, block["blocked"])
1495             if blocked is not None and blocked != block["blocked"]:
1496                 logger.debug("blocked='%s' was deobfuscated to blocked='%s'", block["blocked"], blocked)
1497                 obfuscated = obfuscated - 1
1498                 if blocks.is_instance_blocked(row["domain"], blocked):
1499                     logger.debug("blocked='%s' is already blocked by domain='%s' - SKIPPED!", blocked, row["domain"])
1500                     continue
1501
1502                 block["block_level"] = blocks.alias_block_level(block["block_level"])
1503
1504                 logger.info("blocked='%s' has been deobfuscated to blocked='%s', adding ...", block["blocked"], blocked)
1505                 if processing.block(row["domain"], blocked, block["reason"], block["block_level"]) and block["block_level"] == "reject" and config.get("bot_enabled"):
1506                     logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], row["domain"])
1507                     blockdict.append({
1508                         "blocked": blocked,
1509                         "reason" : block["reason"],
1510                     })
1511
1512         logger.info("domain='%s' has %d obfuscated domain(s)", row["domain"], obfuscated)
1513         if obfuscated == 0 and len(blocking) > 0:
1514             logger.info("Block list from domain='%s' has been fully deobfuscated.", row["domain"])
1515             instances.set_has_obfuscation(row["domain"], False)
1516
1517         if instances.has_pending(row["domain"]):
1518             logger.debug("Flushing updates for blocker='%s' ...", row["domain"])
1519             instances.update_data(row["domain"])
1520
1521         logger.debug("Invoking commit() ...")
1522         database.connection.commit()
1523
1524         logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
1525         if config.get("bot_enabled") and len(blockdict) > 0:
1526             logger.info("Sending bot POST for blocker='%s,blockdict()=%d ...", row["domain"], len(blockdict))
1527             network.send_bot_post(row["domain"], blockdict)
1528
1529     logger.debug("Success! - EXIT!")
1530     return 0
1531
1532 def fetch_fedilist(args: argparse.Namespace) -> int:
1533     logger.debug("args[]='%s' - CALLED!", type(args))
1534
1535     logger.debug("Invoking locking.acquire() ...")
1536     locking.acquire()
1537
1538     source_domain = "demo.fedilist.com"
1539     if sources.is_recent(source_domain):
1540         logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1541         return 0
1542     else:
1543         logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1544         sources.update(source_domain)
1545
1546     url = f"http://{source_domain}/instance/csv?onion=not"
1547     if args.software is not None and args.software != "":
1548         logger.debug("args.software='%s'", args.software)
1549         url = f"http://{source_domain}/instance/csv?software={args.software}&onion=not"
1550
1551     logger.info("Fetching url='%s' ...", url)
1552     response = reqto.get(
1553         url,
1554         headers=network.web_headers,
1555         timeout=(config.get("connection_timeout"), config.get("read_timeout")),
1556         allow_redirects=False
1557     )
1558
1559     logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
1560     if not response.ok or response.status_code >= 300 or len(response.content) == 0:
1561         logger.warning("Failed fetching url='%s': response.ok='%s',response.status_code=%d,response.content()=%d - EXIT!", url, response.ok, response.status_code, len(response.text))
1562         return 1
1563
1564     reader = csv.DictReader(response.content.decode("utf-8").splitlines(), dialect="unix")
1565
1566     logger.debug("reader[]='%s'", type(reader))
1567     for row in reader:
1568         logger.debug("row[]='%s'", type(row))
1569         if "hostname" not in row:
1570             logger.warning("row()=%d has no element 'hostname' - SKIPPED!", len(row))
1571             continue
1572
1573         logger.debug("row[hostname]='%s' - BEFORE!", row["hostname"])
1574         domain = tidyup.domain(row["hostname"])
1575         logger.debug("domain='%s' - AFTER!", domain)
1576
1577         if domain == "":
1578             logger.debug("domain is empty after tidyup: row[hostname]='%s' - SKIPPED!", row["hostname"])
1579             continue
1580
1581         logger.debug("domain='%s' - BEFORE!", domain)
1582         domain = domain.encode("idna").decode("utf-8")
1583         logger.debug("domain='%s' - AFTER!", domain)
1584
1585         if not utils.is_domain_wanted(domain):
1586             logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
1587             continue
1588         elif (args.force is None or not args.force) and instances.is_registered(domain):
1589             logger.debug("domain='%s' is already registered, --force not specified: args.force[]='%s'", domain, type(args.force))
1590             continue
1591         elif instances.is_recent(domain):
1592             logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1593             continue
1594
1595         logger.info("Fetching instances from domain='%s' ...", domain)
1596         federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
1597
1598     logger.debug("Success! - EXIT!")
1599     return 0
1600
1601 def update_nodeinfo(args: argparse.Namespace) -> int:
1602     logger.debug("args[]='%s' - CALLED!", type(args))
1603
1604     logger.debug("Invoking locking.acquire() ...")
1605     locking.acquire()
1606
1607     if args.domain is not None and args.domain != "":
1608         logger.debug("Fetching args.domain='%s'", args.domain)
1609         database.cursor.execute("SELECT domain, software FROM instances WHERE domain = ?", [args.domain])
1610     elif args.software is not None and args.software != "":
1611         logger.info("Fetching domains for args.software='%s'", args.software)
1612         database.cursor.execute("SELECT domain, software FROM instances WHERE software = ?", [args.software])
1613     else:
1614         logger.info("Fetching domains for recently updated ...")
1615         database.cursor.execute("SELECT domain, software FROM instances WHERE last_nodeinfo < ? OR last_nodeinfo IS NULL", [time.time() - config.get("recheck_nodeinfo")])
1616
1617     domains = database.cursor.fetchall()
1618
1619     logger.info("Checking %d domain(s) ...", len(domains))
1620     cnt = 0
1621     for row in domains:
1622         logger.debug("row[]='%s'", type(row))
1623         try:
1624             logger.info("Checking nodeinfo for row[domain]='%s',row[software]='%s' (%s%%) ...", row["domain"], row["software"], "{:5.1f}".format(cnt / len(domains) * 100))
1625             software = federation.determine_software(row["domain"])
1626
1627             logger.debug("Determined software='%s'", software)
1628             if (software != row["software"] and software is not None) or args.force is True:
1629                 logger.warning("Software type for row[domain]='%s' has changed from '%s' to '%s'!", row["domain"], row["software"], software)
1630                 instances.set_software(row["domain"], software)
1631
1632             instances.set_success(row["domain"])
1633         except network.exceptions as exception:
1634             logger.warning("Exception '%s' during updating nodeinfo for row[domain]='%s'", type(exception), row["domain"])
1635             instances.set_last_error(row["domain"], exception)
1636
1637         instances.set_last_nodeinfo(row["domain"])
1638         instances.update_data(row["domain"])
1639         cnt = cnt + 1
1640
1641     logger.debug("Success! - EXIT!")
1642     return 0
1643
1644 def fetch_instances_social(args: argparse.Namespace) -> int:
1645     logger.debug("args[]='%s' - CALLED!", type(args))
1646
1647     logger.debug("Invoking locking.acquire() ...")
1648     locking.acquire()
1649
1650     source_domain = "instances.social"
1651
1652     if config.get("instances_social_api_key") == "":
1653         logger.error("API key not set. Please set in your config.json file.")
1654         return 1
1655     elif sources.is_recent(source_domain):
1656         logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1657         return 0
1658     else:
1659         logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1660         sources.update(source_domain)
1661
1662     headers = {
1663         "Authorization": f"Bearer {config.get('instances_social_api_key')}",
1664     }
1665
1666     fetched = network.get_json_api(
1667         source_domain,
1668         "/api/1.0/instances/list?count=0&sort_by=name",
1669         headers,
1670         (config.get("connection_timeout"), config.get("read_timeout"))
1671     )
1672     logger.debug("fetched[]='%s'", type(fetched))
1673
1674     if "error_message" in fetched:
1675         logger.warning("Error during fetching API result: '%s' - EXIT!", fetched["error_message"])
1676         return 2
1677     elif "exception" in fetched:
1678         logger.warning("Exception '%s' during fetching API result - EXIT!", type(fetched["exception"]))
1679         return 3
1680     elif "json" not in fetched:
1681         logger.warning("fetched has no element 'json' - EXIT!")
1682         return 4
1683     elif "instances" not in fetched["json"]:
1684         logger.warning("fetched[row] has no element 'instances' - EXIT!")
1685         return 5
1686
1687     domains = list()
1688     rows = fetched["json"]["instances"]
1689
1690     logger.info("Checking %d row(s) ...", len(rows))
1691     for row in rows:
1692         logger.debug("row[]='%s'", type(row))
1693         domain = tidyup.domain(row["name"])
1694         logger.debug("domain='%s' - AFTER!", domain)
1695
1696         if domain == "":
1697             logger.debug("domain is empty - SKIPPED!")
1698             continue
1699
1700         logger.debug("domain='%s' - BEFORE!", domain)
1701         domain = domain.encode("idna").decode("utf-8")
1702         logger.debug("domain='%s' - AFTER!", domain)
1703
1704         if not utils.is_domain_wanted(domain):
1705             logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
1706             continue
1707         elif domain in domains:
1708             logger.debug("domain='%s' is already added - SKIPPED!", domain)
1709             continue
1710         elif instances.is_registered(domain):
1711             logger.debug("domain='%s' is already registered - SKIPPED!", domain)
1712             continue
1713         elif instances.is_recent(domain):
1714             logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1715             continue
1716
1717         logger.info("Fetching instances from domain='%s'", domain)
1718         federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
1719
1720     logger.debug("Success! - EXIT!")
1721     return 0
1722
1723 def convert_idna(args: argparse.Namespace) -> int:
1724     logger.debug("args[]='%s' - CALLED!", type(args))
1725
1726     database.cursor.execute("SELECT domain FROM instances WHERE domain NOT LIKE '%xn--%' ORDER BY domain ASC")
1727     rows = database.cursor.fetchall()
1728
1729     logger.debug("rows[]='%s'", type(rows))
1730     instances.translate_idnas(rows, "domain")
1731
1732     database.cursor.execute("SELECT origin FROM instances WHERE origin NOT LIKE '%xn--%' ORDER BY origin ASC")
1733     rows = database.cursor.fetchall()
1734
1735     logger.debug("rows[]='%s'", type(rows))
1736     instances.translate_idnas(rows, "origin")
1737
1738     database.cursor.execute("SELECT blocker FROM blocks WHERE blocker NOT LIKE '%xn--%' ORDER BY blocker ASC")
1739     rows = database.cursor.fetchall()
1740
1741     logger.debug("rows[]='%s'", type(rows))
1742     blocks.translate_idnas(rows, "blocker")
1743
1744     database.cursor.execute("SELECT blocked FROM blocks WHERE blocked NOT LIKE '%xn--%' ORDER BY blocked ASC")
1745     rows = database.cursor.fetchall()
1746
1747     logger.debug("rows[]='%s'", type(rows))
1748     blocks.translate_idnas(rows, "blocked")
1749
1750     logger.debug("Success! - EXIT!")
1751     return 0