]> git.mxchange.org Git - fba.git/blob - fba/commands.py
bd755b73bba4faaac5120cf4bc9e2ecb204f3866
[fba.git] / fba / commands.py
1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
3 #
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
8 #
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 # GNU Affero General Public License for more details.
13 #
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program.  If not, see <https://www.gnu.org/licenses/>.
16
17 import csv
18 import inspect
19 import json
20 import logging
21 import time
22
23 import argparse
24 import atoma
25 import bs4
26 import markdown
27 import reqto
28 import validators
29
30 from fba import csrf
31 from fba import database
32 from fba import utils
33
34 from fba.helpers import blacklist
35 from fba.helpers import config
36 from fba.helpers import cookies
37 from fba.helpers import locking
38 from fba.helpers import software as software_helper
39 from fba.helpers import tidyup
40
41 from fba.http import federation
42 from fba.http import network
43
44 from fba.models import blocks
45 from fba.models import instances
46
47 from fba.networks import friendica
48 from fba.networks import lemmy
49 from fba.networks import mastodon
50 from fba.networks import misskey
51 from fba.networks import pleroma
52
53 logging.basicConfig(level=logging.INFO)
54 logger = logging.getLogger(__name__)
55 #logger.setLevel(logging.DEBUG)
56
57 def check_instance(args: argparse.Namespace) -> int:
58     logger.debug("args.domain='%s' - CALLED!", args.domain)
59     status = 0
60     if not validators.domain(args.domain):
61         logger.warning("args.domain='%s' is not valid", args.domain)
62         status = 100
63     elif blacklist.is_blacklisted(args.domain):
64         logger.warning("args.domain='%s' is blacklisted", args.domain)
65         status = 101
66     elif instances.is_registered(args.domain):
67         logger.warning("args.domain='%s' is already registered", args.domain)
68         status = 102
69     else:
70         logger.info("args.domain='%s' is not known", args.domain)
71
72     logger.debug("status=%d - EXIT!", status)
73     return status
74
75 def fetch_pixelfed_api(args: argparse.Namespace) -> int:
76     logger.debug("args[]='%s' - CALLED!", type(args))
77
78     # No CSRF by default, you don't have to add network.api_headers by yourself here
79     headers = tuple()
80
81     try:
82         logger.debug("Checking CSRF from pixelfed.org")
83         headers = csrf.determine("pixelfed.org", dict())
84     except network.exceptions as exception:
85         logger.warning("Exception '%s' during checking CSRF (fetch_peers,%s) - EXIT!", type(exception), __name__)
86         return list()
87
88     try:
89         logger.debug("Fetching JSON from pixelfed.org API, headers()=%d ...", len(headers))
90         fetched = network.get_json_api(
91             "pixelfed.org",
92             "/api/v1/servers/all.json?scope=All&country=all&language=all",
93             headers,
94             (config.get("connection_timeout"), config.get("read_timeout"))
95         )
96
97         logger.debug("JSON API returned %d elements", len(fetched))
98         if "error_message" in fetched:
99             logger.warning("API returned error_message='%s' - EXIT!", fetched["error_message"])
100             return 101
101         elif "data" not in fetched["json"]:
102             logger.warning("API did not return JSON with 'data' element - EXIT!")
103             return 102
104
105         rows = fetched["json"]["data"]
106         logger.info("Checking %d fetched rows ...", len(rows))
107         for row in rows:
108             logger.debug("row[]='%s'", type(row))
109             if "domain" not in row:
110                 logger.warning("row='%s' does not contain element 'domain' - SKIPPED!", row)
111                 continue
112             elif row["domain"] == "":
113                 logger.debug("row[domain] is empty - SKIPPED!")
114                 continue
115             elif not utils.is_domain_wanted(row["domain"]):
116                 logger.warning("row[domain]='%s' is not wanted - SKIPPED!", row["domain"])
117                 continue
118             elif instances.is_registered(row["domain"]):
119                 logger.debug("row[domain]='%s' is already registered - SKIPPED!", row["domain"])
120                 continue
121             elif instances.is_recent(row["domain"]):
122                 logger.debug("row[domain]='%s' has been recently crawled - SKIPPED!", row["domain"])
123                 continue
124
125             logger.debug("Fetching instances from row[domain]='%s' ...", row["domain"])
126             federation.fetch_instances(row["domain"], None, None, inspect.currentframe().f_code.co_name)
127
128     except network.exceptions as exception:
129         logger.warning("Cannot fetch graphql,exception[%s]:'%s' - EXIT!", type(exception), str(exception))
130         return 103
131
132     logger.debug("Success! - EXIT!")
133     return 0
134
135 def fetch_bkali(args: argparse.Namespace) -> int:
136     logger.debug("args[]='%s' - CALLED!", type(args))
137     domains = list()
138     try:
139         fetched = network.post_json_api("gql.api.bka.li", "/v1/graphql", json.dumps({
140             "query": "query domainlist {nodeinfo(order_by: {domain: asc}) {domain}}"
141         }))
142
143         logger.debug("fetched[]='%s'", type(fetched))
144         if "error_message" in fetched:
145             logger.warning("post_json_api() for 'gql.api.bka.li' returned error message='%s", fetched["error_message"])
146             return 100
147         elif isinstance(fetched["json"], dict) and "error" in fetched["json"] and "message" in fetched["json"]["error"]:
148             logger.warning("post_json_api() returned error: '%s", fetched["error"]["message"])
149             return 101
150
151         rows = fetched["json"]
152
153         logger.debug("rows(%d)[]='%s'", len(rows), type(rows))
154         if len(rows) == 0:
155             raise Exception("WARNING: Returned no records")
156         elif "data" not in rows:
157             raise Exception(f"WARNING: rows()={len(rows)} does not contain key 'data'")
158         elif "nodeinfo" not in rows["data"]:
159             raise Exception(f"WARNING: rows()={len(rows['data'])} does not contain key 'nodeinfo'")
160
161         for entry in rows["data"]["nodeinfo"]:
162             logger.debug("entry[%s]='%s'", type(entry), entry)
163             if "domain" not in entry:
164                 logger.warning("entry()=%d does not contain 'domain' - SKIPPED!", len(entry))
165                 continue
166             elif entry["domain"] == "":
167                 logger.debug("entry[domain] is empty - SKIPPED!")
168                 continue
169             elif not utils.is_domain_wanted(entry["domain"]):
170                 logger.warning("entry[domain]='%s' is not wanted - SKIPPED!", entry["domain"])
171                 continue
172             elif instances.is_registered(entry["domain"]):
173                 logger.debug("entry[domain]='%s' is already registered - SKIPPED!", entry["domain"])
174                 continue
175             elif instances.is_recent(entry["domain"]):
176                 logger.debug("entry[domain]='%s' has been recently crawled - SKIPPED!", entry["domain"])
177                 continue
178
179             logger.debug("Adding domain='%s' ...", entry["domain"])
180             domains.append(entry["domain"])
181
182     except network.exceptions as exception:
183         logger.warning("Cannot fetch graphql,exception[%s]:'%s' - EXIT!", type(exception), str(exception))
184         return 102
185
186     logger.debug("domains()=%d", len(domains))
187     if len(domains) > 0:
188         locking.acquire()
189
190         logger.info("Adding %d new instances ...", len(domains))
191         for domain in domains:
192             try:
193                 logger.info("Fetching instances from domain='%s' ...", domain)
194                 federation.fetch_instances(domain, 'tak.teleyal.blog', None, inspect.currentframe().f_code.co_name)
195             except network.exceptions as exception:
196                 logger.warning("Exception '%s' during fetching instances (fetch_bkali) from domain='%s'", type(exception), domain)
197                 instances.set_last_error(domain, exception)
198                 return 100
199
200     logger.debug("Success - EXIT!")
201     return 0
202
203 def fetch_blocks(args: argparse.Namespace) -> int:
204     logger.debug("args[]='%s' - CALLED!", type(args))
205     if args.domain is not None and args.domain != "":
206         logger.debug("args.domain='%s' - checking ...", args.domain)
207         if not validators.domain(args.domain):
208             logger.warning("args.domain='%s' is not valid.", args.domain)
209             return 100
210         elif blacklist.is_blacklisted(args.domain):
211             logger.warning("args.domain='%s' is blacklisted, won't check it!", args.domain)
212             return 101
213         elif not instances.is_registered(args.domain):
214             logger.warning("args.domain='%s' is not registered, please run ./utils.py fetch_instances '%s' first.", args.domain, args.domain)
215             return 102
216
217     locking.acquire()
218
219     if args.domain is not None and args.domain != "":
220         # Re-check single domain
221         logger.debug("Querying database for single args.domain='%s' ...", args.domain)
222         database.cursor.execute(
223             "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE domain = ?", [args.domain]
224         )
225     elif args.software is not None and args.software != "":
226         # Re-check single software
227         logger.debug("Querying database for args.software='%s' ...", args.software)
228         database.cursor.execute(
229             "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software = ? AND nodeinfo_url IS NOT NULL", [args.software]
230         )
231     else:
232         # Re-check after "timeout" (aka. minimum interval)
233         database.cursor.execute(
234             "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'lemmy', 'friendica', 'misskey') AND (last_blocked IS NULL OR last_blocked < ?) AND nodeinfo_url IS NOT NULL ORDER BY rowid DESC", [time.time() - config.get("recheck_block")]
235         )
236
237     rows = database.cursor.fetchall()
238     logger.info("Checking %d entries ...", len(rows))
239     for blocker, software, origin, nodeinfo_url in rows:
240         logger.debug("blocker='%s',software='%s',origin='%s',nodeinfo_url='%s'", blocker, software, origin, nodeinfo_url)
241         blocker = tidyup.domain(blocker)
242         logger.debug("blocker='%s' - AFTER!", blocker)
243
244         if blocker == "":
245             logger.warning("blocker is now empty!")
246             continue
247         elif nodeinfo_url is None or nodeinfo_url == "":
248             logger.debug("blocker='%s',software='%s' has empty nodeinfo_url", blocker, software)
249             continue
250         elif not utils.is_domain_wanted(blocker):
251             logger.warning("blocker='%s' is not wanted - SKIPPED!", blocker)
252             continue
253
254         logger.debug("blocker='%s'", blocker)
255         instances.set_last_blocked(blocker)
256         instances.set_has_obfuscation(blocker, False)
257
258         blocking = list()
259         blockdict = list()
260         if software == "pleroma":
261             logger.info("blocker='%s',software='%s'", blocker, software)
262             blocking = pleroma.fetch_blocks(blocker, nodeinfo_url)
263         elif software == "mastodon":
264             logger.info("blocker='%s',software='%s'", blocker, software)
265             blocking = mastodon.fetch_blocks(blocker, nodeinfo_url)
266         elif software == "lemmy":
267             logger.info("blocker='%s',software='%s'", blocker, software)
268             blocking = lemmy.fetch_blocks(blocker, nodeinfo_url)
269         elif software == "friendica":
270             logger.info("blocker='%s',software='%s'", blocker, software)
271             blocking = friendica.fetch_blocks(blocker)
272         elif software == "misskey":
273             logger.info("blocker='%s',software='%s'", blocker, software)
274             blocking = misskey.fetch_blocks(blocker)
275         else:
276             logger.warning("Unknown software: blocker='%s',software='%s'", blocker, software)
277
278         logger.info("Checking %d entries from blocker='%s',software='%s' ...", len(blocking), blocker, software)
279         blockdict = list()
280         for block in blocking:
281             logger.debug("blocked='%s',block_level='%s',reason='%s'", block["blocked"], block["block_level"], block["reason"])
282
283             if block["block_level"] == "":
284                 logger.warning("block_level is empty, blocker='%s',blocked='%s'", block["blocker"], block["blocked"])
285                 continue
286
287             logger.debug("blocked='%s',reason='%s' - BEFORE!", block["blocked"], block["reason"])
288             block["blocked"] = tidyup.domain(block["blocked"])
289             block["reason"]  = tidyup.reason(block["reason"]) if block["reason"] is not None and block["reason"] != "" else None
290             logger.debug("blocked='%s',reason='%s' - AFTER!", block["blocked"], block["reason"])
291
292             if block["blocked"] == "":
293                 logger.warning("blocked is empty, blocker='%s'", blocker)
294                 continue
295             elif block["blocked"].endswith(".onion"):
296                 logger.debug("blocked='%s' is a TOR .onion domain - SKIPPED", block["blocked"])
297                 continue
298             elif block["blocked"].endswith(".arpa"):
299                 logger.debug("blocked='%s' is a reverse IP address - SKIPPED", block["blocked"])
300                 continue
301             elif block["blocked"].endswith(".tld"):
302                 logger.debug("blocked='%s' is a fake domain - SKIPPED", block["blocked"])
303                 continue
304             elif block["blocked"].find("*") >= 0:
305                 logger.debug("blocker='%s' uses obfuscated domains", blocker)
306
307                 # Some friendica servers also obscure domains without hash
308                 row = instances.deobfuscate("*", block["blocked"], block["hash"] if "hash" in block else None)
309
310                 logger.debug("row[]='%s'", type(row))
311                 if row is None:
312                     logger.warning("Cannot deobfuscate blocked='%s',blocker='%s',software='%s' - SKIPPED!", block["blocked"], blocker, software)
313                     instances.set_has_obfuscation(blocker, True)
314                     continue
315
316                 block["blocked"] = row["domain"]
317                 origin           = row["origin"]
318                 nodeinfo_url     = row["nodeinfo_url"]
319             elif block["blocked"].find("?") >= 0:
320                 logger.debug("blocker='%s' uses obfuscated domains", blocker)
321
322                 # Some obscure them with question marks, not sure if that's dependent on version or not
323                 row = instances.deobfuscate("?", block["blocked"], block["hash"] if "hash" in block else None)
324
325                 logger.debug("row[]='%s'", type(row))
326                 if row is None:
327                     logger.warning("Cannot deobfuscate blocked='%s',blocker='%s',software='%s' - SKIPPED!", block["blocked"], blocker, software)
328                     instances.set_has_obfuscation(blocker, True)
329                     continue
330
331                 block["blocked"] = row["domain"]
332                 origin           = row["origin"]
333                 nodeinfo_url     = row["nodeinfo_url"]
334
335             logger.debug("Looking up instance by domainm, blocked='%s'", block["blocked"])
336             if block["blocked"] == "":
337                 logger.debug("block[blocked] is empty - SKIPPED!")
338                 continue
339             elif not utils.is_domain_wanted(block["blocked"]):
340                 logger.warning("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
341                 continue
342             elif block["block_level"] in ["accept", "accepted"]:
343                 logger.debug("blocked='%s' is accepted, not wanted here - SKIPPED!", block["blocked"])
344                 continue
345             elif not instances.is_registered(block["blocked"]):
346                 logger.debug("Hash wasn't found, adding: blocked='%s',blocker='%s'", block["blocked"], blocker)
347                 federation.fetch_instances(block["blocked"], blocker, None, inspect.currentframe().f_code.co_name)
348
349             block["block_level"] = utils.alias_block_level(block["block_level"])
350
351             if utils.process_block(blocker, block["blocked"], block["reason"], block["block_level"]) and block["block_level"] == "reject" and config.get("bot_enabled"):
352                 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], blocker)
353                 blockdict.append({
354                     "blocked": block["blocked"],
355                     "reason" : block["reason"],
356                 })
357
358             logger.debug("Invoking cookies.clear(%s) ...", block["blocked"])
359             cookies.clear(block["blocked"])
360
361         logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
362         if instances.has_pending(blocker):
363             logger.debug("Flushing updates for blocker='%s' ...", blocker)
364             instances.update_data(blocker)
365
366         logger.debug("Invoking commit() ...")
367         database.connection.commit()
368
369         logger.debug("Invoking cookies.clear(%s) ...", blocker)
370         cookies.clear(blocker)
371
372         logger.debug("config.get(bot_enabled)='%s',blockdict()=%d'", config.get("bot_enabled"), len(blockdict))
373         if config.get("bot_enabled") and len(blockdict) > 0:
374             logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
375             network.send_bot_post(blocker, blockdict)
376
377     logger.debug("Success! - EXIT!")
378     return 0
379
380 def fetch_observer(args: argparse.Namespace) -> int:
381     logger.debug("args[]='%s' - CALLED!", type(args))
382
383     # Acquire lock
384     locking.acquire()
385
386     logger.info("Fetching software list ...")
387     raw = utils.fetch_url(
388         "https://fediverse.observer",
389         network.web_headers,
390         (config.get("connection_timeout"), config.get("read_timeout"))
391     ).text
392     logger.debug("raw[%s]()=%d", type(raw), len(raw))
393
394     doc = bs4.BeautifulSoup(raw, features="html.parser")
395     logger.debug("doc[]='%s'", type(doc))
396
397     items = doc.find("div", {"aria-labelledby": "navbarDropdownMenuSoftwares"}).findAll("a", {"class": "dropdown-item"})
398     logger.debug("items[]='%s'", type(items))
399
400     types = list()
401
402     logger.info("Checking %d menu items ...", len(items))
403     for item in items:
404         logger.debug("item[%s]='%s'", type(item), item)
405         if item.text.lower() == "all":
406             logger.debug("Skipping 'All' menu entry ...")
407             continue
408
409         logger.debug("Appending item.text='%s' ...", item.text)
410         types.append(tidyup.domain(item.text))
411
412     logger.info("Fetching %d different table data ...", len(types))
413     for software in types:
414         logger.debug("software='%s' - BEFORE!", software)
415         if args.software is not None and args.software != software:
416             logger.debug("args.software='%s' does not match software='%s' - SKIPPED!", args.software, software)
417             continue
418
419         doc = None
420         try:
421             logger.debug("Fetching table data for software='%s' ...", software)
422             raw = utils.fetch_url(
423                 f"https://fediverse.observer/app/views/tabledata.php?software={software}",
424                 network.web_headers,
425                 (config.get("connection_timeout"), config.get("read_timeout"))
426             ).text
427             logger.debug("raw[%s]()=%d", type(raw), len(raw))
428
429             doc = bs4.BeautifulSoup(raw, features="html.parser")
430             logger.debug("doc[]='%s'", type(doc))
431         except network.exceptions as exception:
432             logger.warning("Cannot fetch software='%s' from fediverse.observer: '%s'", software, type(exception))
433             continue
434
435         items = doc.findAll("a", {"class": "url"})
436         logger.info("Checking %d items,software='%s' ...", len(items), software)
437         for item in items:
438             logger.debug("item[]='%s'", type(item))
439             domain = item.decode_contents()
440
441             logger.debug("domain='%s' - AFTER!", domain)
442             if domain == "":
443                 logger.debug("domain is empty - SKIPPED!")
444                 continue
445             elif not utils.is_domain_wanted(domain):
446                 logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
447                 continue
448             elif instances.is_registered(domain):
449                 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
450                 continue
451             elif instances.is_recent(domain):
452                 logger.debug("domain='%s' is recently being handled - SKIPPED!", domain)
453                 continue
454
455             software = software_helper.alias(software)
456             logger.info("Fetching instances for domain='%s'", domain)
457             federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
458
459     logger.debug("Success! - EXIT!")
460     return 0
461
462 def fetch_todon_wiki(args: argparse.Namespace) -> int:
463     logger.debug("args[]='%s' - CALLED!", type(args))
464
465     locking.acquire()
466     blocklist = {
467         "silenced": list(),
468         "reject": list(),
469     }
470
471     raw = utils.fetch_url("https://wiki.todon.eu/todon/domainblocks", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))).text
472     logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
473
474     doc = bs4.BeautifulSoup(raw, "html.parser")
475     logger.debug("doc[]='%s'", type(doc))
476
477     silenced = doc.find("h3", {"id": "silencedlimited_servers"}).find_next("ul").findAll("li")
478     logger.info("Checking %d silenced/limited entries ...", len(silenced))
479     blocklist["silenced"] = utils.find_domains(silenced, "div")
480
481     suspended = doc.find("h3", {"id": "suspended_servers"}).find_next("ul").findAll("li")
482     logger.info("Checking %d suspended entries ...", len(suspended))
483     blocklist["reject"] = utils.find_domains(suspended, "div")
484
485     blockdict = list()
486     for block_level in blocklist:
487         blockers = blocklist[block_level]
488
489         logger.debug("block_level='%s',blockers()=%d'", block_level, len(blockers))
490         for blocked in blockers:
491             logger.debug("blocked='%s'", blocked)
492
493             if not instances.is_registered(blocked):
494                 try:
495                     logger.info("Fetching instances from domain='%s' ...", blocked)
496                     federation.fetch_instances(blocked, 'chaos.social', None, inspect.currentframe().f_code.co_name)
497                 except network.exceptions as exception:
498                     logger.warning("Exception '%s' during fetching instances (fetch_cs) from blocked='%s'", type(exception), blocked)
499                     instances.set_last_error(blocked, exception)
500
501             if blocks.is_instance_blocked("todon.eu", blocked, block_level):
502                 logger.debug("blocked='%s',block_level='%s' is already blocked - SKIPPED!", blocked, block_level)
503                 continue
504
505             logger.info("Adding new block: blocked='%s',block_level='%s'", blocked, block_level)
506             if utils.process_block("todon.eu", blocked, None, block_level) and block_level == "reject" and config.get("bot_enabled"):
507                 logger.debug("Appending blocked='%s',reason='%s' for blocker='todon.eu' ...", blocked, block_level)
508                 blockdict.append({
509                     "blocked": blocked,
510                     "reason" : None,
511                 })
512
513         logger.debug("Invoking commit() ...")
514         database.connection.commit()
515
516         logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
517         if config.get("bot_enabled") and len(blockdict) > 0:
518             logger.info("Sending bot POST for blocker='todon.eu',blockdict()=%d ...", len(blockdict))
519             network.send_bot_post("todon.eu", blockdict)
520
521     logger.debug("Success! - EXIT!")
522     return 0
523
524 def fetch_cs(args: argparse.Namespace):
525     logger.debug("args[]='%s' - CALLED!", type(args))
526     extensions = [
527         "extra",
528         "abbr",
529         "attr_list",
530         "def_list",
531         "fenced_code",
532         "footnotes",
533         "md_in_html",
534         "admonition",
535         "codehilite",
536         "legacy_attrs",
537         "legacy_em",
538         "meta",
539         "nl2br",
540         "sane_lists",
541         "smarty",
542         "toc",
543         "wikilinks"
544     ]
545
546     domains = {
547         "silenced": list(),
548         "reject"  : list(),
549     }
550
551     raw = utils.fetch_url("https://raw.githubusercontent.com/chaossocial/meta/master/federation.md", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))).text
552     logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
553
554     doc = bs4.BeautifulSoup(markdown.markdown(raw, extensions=extensions), features="html.parser")
555     logger.debug("doc()=%d[]='%s'", len(doc), type(doc))
556
557     silenced = doc.find("h2", {"id": "silenced-instances"}).findNext("table").find("tbody")
558     logger.debug("silenced[%s]()=%d", type(silenced), len(silenced))
559     domains["silenced"] = federation.find_domains(silenced)
560
561     blocked = doc.find("h2", {"id": "blocked-instances"}).findNext("table").find("tbody")
562     logger.debug("blocked[%s]()=%d", type(blocked), len(blocked))
563     domains["reject"] = federation.find_domains(blocked)
564
565     logger.debug("domains[silenced]()=%d,domains[reject]()=%d", len(domains["silenced"]), len(domains["reject"]))
566     blockdict = list()
567     if len(domains) > 0:
568         locking.acquire()
569
570         for block_level in domains:
571             logger.info("block_level='%s' has %d row(s)", block_level, len(domains[block_level]))
572
573             for row in domains[block_level]:
574                 logger.debug("row[%s]='%s'", type(row), row)
575                 if instances.is_recent(row["domain"], "last_blocked"):
576                     logger.debug("row[domain]='%s' has been recently crawled - SKIPPED!", row["domain"])
577                     continue
578                 elif not instances.is_registered(row["domain"]):
579                     try:
580                         logger.info("Fetching instances from domain='%s' ...", row["domain"])
581                         federation.fetch_instances(row["domain"], 'chaos.social', None, inspect.currentframe().f_code.co_name)
582                     except network.exceptions as exception:
583                         logger.warning("Exception '%s' during fetching instances (fetch_cs) from row[domain]='%s'", type(exception), row["domain"])
584                         instances.set_last_error(row["domain"], exception)
585
586                 if utils.process_block("chaos.social", row["domain"], row["reason"], block_level) and block_level == "reject" and config.get("bot_enabled"):
587                     logger.debug("Appending blocked='%s',reason='%s' for blocker='chaos.social' ...", row["domain"], block_level)
588                     blockdict.append({
589                         "blocked": row["domain"],
590                         "reason" : row["reason"],
591                     })
592
593         logger.debug("Invoking commit() ...")
594         database.connection.commit()
595
596         logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
597         if config.get("bot_enabled") and len(blockdict) > 0:
598             logger.info("Sending bot POST for blocker='chaos.social',blockdict()=%d ...", len(blockdict))
599             network.send_bot_post("chaos.social", blockdict)
600
601     logger.debug("Success! - EXIT!")
602     return 0
603
604 def fetch_fba_rss(args: argparse.Namespace) -> int:
605     logger.debug("args[]='%s' - CALLED!", type(args))
606     domains = list()
607
608     logger.info("Fetch FBA-specific RSS args.feed='%s' ...", args.feed)
609     response = utils.fetch_url(args.feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
610
611     logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
612     if response.ok and response.status_code < 300 and len(response.text) > 0:
613         logger.debug("Parsing RSS feed (%d Bytes) ...", len(response.text))
614         rss = atoma.parse_rss_bytes(response.content)
615
616         logger.debug("rss[]='%s'", type(rss))
617         for item in rss.items:
618             logger.debug("item='%s'", item)
619             domain = tidyup.domain(item.link.split("=")[1])
620
621             logger.debug("domain='%s' - AFTER!", domain)
622             if domain == "":
623                 logger.debug("domain is empty - SKIPPED!")
624                 continue
625             elif not utils.is_domain_wanted(domain):
626                 logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
627                 continue
628             elif domain in domains:
629                 logger.debug("domain='%s' is already added - SKIPPED!", domain)
630                 continue
631             elif instances.is_registered(domain):
632                 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
633                 continue
634             elif instances.is_recent(domain):
635                 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
636                 continue
637
638             logger.debug("Adding domain='%s'", domain)
639             domains.append(domain)
640
641     logger.debug("domains()=%d", len(domains))
642     if len(domains) > 0:
643         locking.acquire()
644
645         logger.info("Adding %d new instances ...", len(domains))
646         for domain in domains:
647             try:
648                 logger.info("Fetching instances from domain='%s' ...", domain)
649                 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
650             except network.exceptions as exception:
651                 logger.warning("Exception '%s' during fetching instances (fetch_fba_rss) from domain='%s'", type(exception), domain)
652                 instances.set_last_error(domain, exception)
653                 return 100
654
655     logger.debug("Success! - EXIT!")
656     return 0
657
658 def fetch_fbabot_atom(args: argparse.Namespace) -> int:
659     logger.debug("args[]='%s' - CALLED!", type(args))
660     feed = "https://ryona.agency/users/fba/feed.atom"
661
662     domains = list()
663
664     logger.info("Fetching ATOM feed='%s' from FBA bot account ...", feed)
665     response = utils.fetch_url(feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
666
667     logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
668     if response.ok and response.status_code < 300 and len(response.text) > 0:
669         logger.debug("Parsing ATOM feed (%d Bytes) ...", len(response.text))
670         atom = atoma.parse_atom_bytes(response.content)
671
672         logger.debug("atom[]='%s'", type(atom))
673         for entry in atom.entries:
674             logger.debug("entry[]='%s'", type(entry))
675             doc = bs4.BeautifulSoup(entry.content.value, "html.parser")
676             logger.debug("doc[]='%s'", type(doc))
677             for element in doc.findAll("a"):
678                 logger.debug("element[]='%s'", type(element))
679                 for href in element["href"].split(","):
680                     logger.debug("href[%s]='%s' - BEFORE!", type(href), href)
681                     domain = tidyup.domain(href)
682
683                     logger.debug("domain='%s' - AFTER!", domain)
684                     if domain == "":
685                         logger.debug("domain is empty - SKIPPED!")
686                         continue
687                     elif not utils.is_domain_wanted(domain):
688                         logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
689                         continue
690                     elif domain in domains:
691                         logger.debug("domain='%s' is already added - SKIPPED!", domain)
692                         continue
693                     elif instances.is_registered(domain):
694                         logger.debug("domain='%s' is already registered - SKIPPED!", domain)
695                         continue
696                     elif instances.is_recent(domain):
697                         logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
698                         continue
699
700                     logger.debug("Adding domain='%s',domains()=%d", domain, len(domains))
701                     domains.append(domain)
702
703     logger.debug("domains()=%d", len(domains))
704     if len(domains) > 0:
705         locking.acquire()
706
707         logger.info("Adding %d new instances ...", len(domains))
708         for domain in domains:
709             try:
710                 logger.info("Fetching instances from domain='%s' ...", domain)
711                 federation.fetch_instances(domain, "ryona.agency", None, inspect.currentframe().f_code.co_name)
712             except network.exceptions as exception:
713                 logger.warning("Exception '%s' during fetching instances (fetch_fbabot_atom) from domain='%s'", type(exception), domain)
714                 instances.set_last_error(domain, exception)
715                 return 100
716
717     logger.debug("Success! - EXIT!")
718     return 0
719
720 def fetch_instances(args: argparse.Namespace) -> int:
721     logger.debug("args[]='%s' - CALLED!", type(args))
722     locking.acquire()
723
724     # Initial fetch
725     try:
726         logger.info("Fetching instances from args.domain='%s' ...", args.domain)
727         federation.fetch_instances(args.domain, None, None, inspect.currentframe().f_code.co_name)
728     except network.exceptions as exception:
729         logger.warning("Exception '%s' during fetching instances (fetch_instances) from args.domain='%s'", type(exception), args.domain)
730         instances.set_last_error(args.domain, exception)
731         instances.update_data(args.domain)
732         return 100
733
734     if args.single:
735         logger.debug("Not fetching more instances - EXIT!")
736         return 0
737
738     # Loop through some instances
739     database.cursor.execute(
740         "SELECT domain, origin, software, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'lemmy', 'peertube', 'takahe') AND (last_instance_fetch IS NULL OR last_instance_fetch < ?) ORDER BY rowid DESC", [time.time() - config.get("recheck_instance")]
741     )
742
743     rows = database.cursor.fetchall()
744     logger.info("Checking %d entries ...", len(rows))
745     for row in rows:
746         logger.debug("row[domain]='%s'", row["domain"])
747         if row["domain"] == "":
748             logger.debug("row[domain] is empty - SKIPPED!")
749             continue
750         elif not utils.is_domain_wanted(row["domain"]):
751             logger.warning("Domain row[domain]='%s' is not wanted - SKIPPED!", row["domain"])
752             continue
753
754         try:
755             logger.info("Fetching instances for domain='%s',origin='%s',software='%s',nodeinfo_url='%s'", row["domain"], row["origin"], row["software"], row["nodeinfo_url"])
756             federation.fetch_instances(row["domain"], row["origin"], row["software"], inspect.currentframe().f_code.co_name, row["nodeinfo_url"])
757         except network.exceptions as exception:
758             logger.warning("Exception '%s' during fetching instances (fetch_instances) from row[domain]='%s'", type(exception), row["domain"])
759             instances.set_last_error(row["domain"], exception)
760
761     logger.debug("Success - EXIT!")
762     return 0
763
764 def fetch_oliphant(args: argparse.Namespace) -> int:
765     logger.debug("args[]='%s' - CALLED!", type(args))
766     locking.acquire()
767
768     # Base URL
769     base_url = "https://codeberg.org/oliphant/blocklists/raw/branch/main/blocklists"
770
771     # URLs to fetch
772     blocklists = (
773         {
774             "blocker": "artisan.chat",
775             "csv_url": "mastodon/artisan.chat.csv",
776         },{
777             "blocker": "mastodon.art",
778             "csv_url": "mastodon/mastodon.art.csv",
779         },{
780             "blocker": "pleroma.envs.net",
781             "csv_url": "mastodon/pleroma.envs.net.csv",
782         },{
783             "blocker": "oliphant.social",
784             "csv_url": "mastodon/_unified_tier3_blocklist.csv",
785         },{
786             "blocker": "mastodon.online",
787             "csv_url": "mastodon/mastodon.online.csv",
788         },{
789             "blocker": "mastodon.social",
790             "csv_url": "mastodon/mastodon.social.csv",
791         },{
792             "blocker": "mastodon.social",
793             "csv_url": "other/missing-tier0-mastodon.social.csv",
794         },{
795             "blocker": "rage.love",
796             "csv_url": "mastodon/rage.love.csv",
797         },{
798             "blocker": "sunny.garden",
799             "csv_url": "mastodon/sunny.garden.csv",
800         },{
801             "blocker": "solarpunk.moe",
802             "csv_url": "mastodon/solarpunk.moe.csv",
803         },{
804             "blocker": "toot.wales",
805             "csv_url": "mastodon/toot.wales.csv",
806         },{
807             "blocker": "union.place",
808             "csv_url": "mastodon/union.place.csv",
809         }
810     )
811
812     domains = list()
813
814     logger.debug("Downloading %d files ...", len(blocklists))
815     for block in blocklists:
816         # Is domain given and not equal blocker?
817         if isinstance(args.domain, str) and args.domain != block["blocker"]:
818             logger.debug("Skipping blocker='%s', not matching args.domain='%s'", block["blocker"], args.domain)
819             continue
820         elif args.domain in domains:
821             logger.debug("args.domain='%s' already handled - SKIPPED!", args.domain)
822             continue
823         elif instances.is_recent(block["blocker"]):
824             logger.debug("block[blocker]='%s' has been recently crawled - SKIPPED!", block["blocker"])
825             continue
826
827         # Fetch this URL
828         logger.info("Fetching csv_url='%s' for blocker='%s' ...", block["csv_url"], block["blocker"])
829         response = utils.fetch_url(f"{base_url}/{block['csv_url']}", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
830
831         logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
832         if not response.ok or response.status_code > 399 or response.content == "":
833             logger.warning("Could not fetch csv_url='%s' for blocker='%s' - SKIPPED!", block["csv_url"], block["blocker"])
834             continue
835
836         logger.debug("Fetched %d Bytes, parsing CSV ...", len(response.content))
837         reader = csv.DictReader(response.content.decode('utf-8').splitlines(), dialect="unix")
838
839         logger.debug("reader[]='%s'", type(reader))
840         blockdict = list()
841         for row in reader:
842             logger.debug("row[%s]='%s'", type(row), row)
843             domain = severity = None
844             reject_media = reject_reports = False
845             if "#domain" in row:
846                 domain = row["#domain"]
847             elif "domain" in row:
848                 domain = row["domain"]
849             else:
850                 logger.debug("row='%s' does not contain domain column", row)
851                 continue
852
853             if "#severity" in row:
854                 severity = row["#severity"]
855             elif "severity" in row:
856                 severity = row["severity"]
857             else:
858                 logger.debug("row='%s' does not contain severity column", row)
859                 continue
860
861             if "#reject_media" in row and row["#reject_media"].lower() == "true":
862                 reject_media = True
863             elif "reject_media" in row and row["reject_media"].lower() == "true":
864                 reject_media = True
865
866             if "#reject_reports" in row and row["#reject_reports"].lower() == "true":
867                 reject_reports = True
868             elif "reject_reports" in row and row["reject_reports"].lower() == "true":
869                 reject_reports = True
870
871             logger.debug("domain='%s',severity='%s',reject_media='%s',reject_reports='%s'", domain, severity, reject_media, reject_reports)
872             if domain == "":
873                 logger.debug("domain is empty - SKIPPED!")
874                 continue
875             elif not utils.is_domain_wanted(domain):
876                 logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
877                 continue
878
879             logger.debug("Marking domain='%s' as handled", domain)
880             domains.append(domain)
881
882             logger.debug("Processing domain='%s' ...", domain)
883             processed = utils.process_domain(domain, block["blocker"], inspect.currentframe().f_code.co_name)
884             logger.debug("processed='%s'", processed)
885
886             if utils.process_block(block["blocker"], domain, None, "reject") and config.get("bot_enabled"):
887                 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", domain, block["block_level"], block["blocker"])
888                 blockdict.append({
889                     "blocked": domain,
890                     "reason" : block["reason"],
891                 })
892
893             if reject_media:
894                 utils.process_block(block["blocker"], domain, None, "reject_media")
895             if reject_reports:
896                 utils.process_block(block["blocker"], domain, None, "reject_reports")
897
898         logger.debug("Invoking commit() ...")
899         database.connection.commit()
900
901         logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
902         if config.get("bot_enabled") and len(blockdict) > 0:
903             logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", block["blocker"], len(blockdict))
904             network.send_bot_post(block["blocker"], blockdict)
905
906     logger.debug("Success! - EXIT!")
907     return 0
908
909 def fetch_txt(args: argparse.Namespace) -> int:
910     logger.debug("args[]='%s' - CALLED!", type(args))
911     locking.acquire()
912
913     # Static URLs
914     urls = ({
915         "blocker": "seirdy.one",
916         "url"    : "https://seirdy.one/pb/bsl.txt",
917     },)
918
919     logger.info("Checking %d text file(s) ...", len(urls))
920     for row in urls:
921         logger.debug("Fetching row[url]='%s' ...", row["url"])
922         response = utils.fetch_url(row["url"], network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
923
924         logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
925         if response.ok and response.status_code < 300 and response.text != "":
926             logger.debug("Returned %d Bytes for processing", len(response.text.strip()))
927             domains = response.text.split("\n")
928
929             logger.info("Processing %d domains ...", len(domains))
930             for domain in domains:
931                 logger.debug("domain='%s' - BEFORE!", domain)
932                 domain = tidyup.domain(domain)
933
934                 logger.debug("domain='%s' - AFTER!", domain)
935                 if domain == "":
936                     logger.debug("domain is empty - SKIPPED!")
937                     continue
938                 elif not utils.is_domain_wanted(domain):
939                     logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
940                     continue
941                 elif instances.is_recent(domain):
942                     logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
943                     continue
944
945                 logger.debug("Processing domain='%s',row[blocker]='%s'", domain, row["blocker"])
946                 processed = utils.process_domain(domain, row["blocker"], inspect.currentframe().f_code.co_name)
947
948                 logger.debug("processed='%s'", processed)
949                 if not processed:
950                     logger.debug("domain='%s' was not generically processed - SKIPPED!", domain)
951                     continue
952
953     logger.debug("Success! - EXIT!")
954     return 0
955
956 def fetch_fedipact(args: argparse.Namespace) -> int:
957     logger.debug("args[]='%s' - CALLED!", type(args))
958     locking.acquire()
959
960     response = utils.fetch_url("https://fedipact.online", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
961
962     logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
963     if response.ok and response.status_code < 300 and response.text != "":
964         logger.debug("Parsing %d Bytes ...", len(response.text))
965
966         doc = bs4.BeautifulSoup(response.text, "html.parser")
967         logger.debug("doc[]='%s'", type(doc))
968
969         rows = doc.findAll("li")
970         logger.info("Checking %d row(s) ...", len(rows))
971         for row in rows:
972             logger.debug("row[]='%s'", type(row))
973             domain = tidyup.domain(row.contents[0])
974
975             logger.debug("domain='%s' - AFTER!", domain)
976             if domain == "":
977                 logger.debug("domain is empty - SKIPPED!")
978                 continue
979             elif not utils.is_domain_wanted(domain):
980                 logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
981                 continue
982             elif instances.is_registered(domain):
983                 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
984                 continue
985             elif instances.is_recent(domain):
986                 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
987                 continue
988
989             logger.info("Fetching domain='%s' ...", domain)
990             federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
991
992     logger.debug("Success! - EXIT!")
993     return 0
994
995 def fetch_joinfediverse(args: argparse.Namespace) -> int:
996     logger.debug("args[]='%s' - CALLED!", type(args))
997     locking.acquire()
998
999     raw = utils.fetch_url("https://joinfediverse.wiki/FediBlock", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))).text
1000     logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
1001
1002     doc = bs4.BeautifulSoup(raw, "html.parser")
1003     logger.debug("doc[]='%s'", type(doc))
1004
1005     tables = doc.findAll("table", {"class": "wikitable"})
1006
1007     logger.info("Analyzing %d table(s) ...", len(tables))
1008     blocklist = list()
1009     for table in tables:
1010         logger.debug("table[]='%s'", type(table))
1011
1012         rows = table.findAll("tr")
1013         logger.info("Checking %d row(s) ...", len(rows))
1014         block_headers = dict()
1015         for row in rows:
1016             logger.debug("row[%s]='%s'", type(row), row)
1017
1018             headers = row.findAll("th")
1019             logger.debug("Found headers()=%d header(s)", len(headers))
1020             if len(headers) > 1:
1021                 block_headers = dict()
1022                 cnt = 0
1023                 for header in headers:
1024                     cnt = cnt + 1
1025                     logger.debug("header[]='%s',cnt=%d", type(header), cnt)
1026                     text = header.contents[0]
1027
1028                     logger.debug("text[]='%s'", type(text))
1029                     if not isinstance(text, str):
1030                         logger.debug("text[]='%s' is not 'str' - SKIPPED!", type(text))
1031                         continue
1032                     elif validators.domain(text.strip()):
1033                         logger.debug("text='%s' is a domain - SKIPPED!", text.strip())
1034                         continue
1035
1036                     text = tidyup.domain(text.strip())
1037                     logger.debug("text='%s'", text)
1038                     if text in ["domain", "instance", "subdomain(s)", "block reason(s)"]:
1039                         logger.debug("Found header: '%s'=%d", text, cnt)
1040                         block_headers[cnt] = text
1041
1042             elif len(block_headers) == 0:
1043                 logger.debug("row is not scrapable - SKIPPED!")
1044                 continue
1045             elif len(block_headers) > 0:
1046                 logger.debug("Found a row with %d scrapable headers ...", len(block_headers))
1047                 cnt = 0
1048                 block = dict()
1049
1050                 for element in row.find_all(["th", "td"]):
1051                     cnt = cnt + 1
1052                     logger.debug("element[]='%s',cnt=%d", type(element), cnt)
1053                     if cnt in block_headers:
1054                         logger.debug("block_headers[%d]='%s'", cnt, block_headers[cnt])
1055
1056                         text = element.text.strip()
1057                         key = block_headers[cnt] if block_headers[cnt] not in ["domain", "instance"] else "blocked"
1058
1059                         logger.debug("cnt=%d is wanted: key='%s',text[%s]='%s'", cnt, key, type(text), text)
1060                         if key in ["domain", "instance"]:
1061                             block[key] = text
1062                         elif key == "reason":
1063                             block[key] = tidyup.reason(text)
1064                         elif key == "subdomain(s)":
1065                             block[key] = list()
1066                             if text != "":
1067                                 block[key] = text.split("/")
1068                         else:
1069                             logger.debug("key='%s'", key)
1070                             block[key] = text
1071
1072                 logger.debug("block()=%d ...", len(block))
1073                 if len(block) > 0:
1074                     logger.debug("Appending block()=%d ...", len(block))
1075                     blocklist.append(block)
1076
1077     logger.debug("blocklist()=%d", len(blocklist))
1078
1079     database.cursor.execute("SELECT domain FROM instances WHERE domain LIKE 'climatejustice.%'")
1080     domains = database.cursor.fetchall()
1081
1082     logger.debug("domains(%d)[]='%s'", len(domains), type(domains))
1083     blocking = list()
1084     for block in blocklist:
1085         logger.debug("block='%s'", block)
1086         if "subdomain(s)" in block and len(block["subdomain(s)"]) > 0:
1087             origin = block["blocked"]
1088             for subdomain in block["subdomain(s)"]:
1089                 block["blocked"] = subdomain + "." + origin
1090                 blocking.append(block)
1091         else:
1092             blocking.append(block)
1093
1094     logger.debug("blocking()=%d", blocking)
1095     for block in blocking:
1096         logger.debug("block[]='%s'", type(block))
1097         block["blocked"] = tidyup.domain(block["blocked"])
1098
1099         logger.debug("block[blocked]='%s' - AFTER!", block["blocked"])
1100         if block["blocked"] == "":
1101             logger.debug("block[blocked] is empty - SKIPPED!")
1102             continue
1103         elif not utils.is_domain_wanted(block["blocked"]):
1104             logger.warning("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
1105             continue
1106         elif instances.is_recent(block["blocked"]):
1107             logger.debug("blocked='%s' has been recently checked - SKIPPED!", block["blocked"])
1108             continue
1109
1110         logger.info("Proccessing blocked='%s' ...", block["blocked"])
1111         utils.process_domain(block["blocked"], "climatejustice.social", inspect.currentframe().f_code.co_name)
1112
1113     blockdict = list()
1114     for blocker in domains:
1115         blocker = blocker[0]
1116         logger.debug("blocker[%s]='%s'", type(blocker), blocker)
1117
1118         for block in blocking:
1119             logger.debug("block[blocked]='%s',block[reason]='%s' - BEFORE!", block["blocked"], block["reason"])
1120             block["reason"] = tidyup.reason(block["block reason(s)"]) if "block reason(s)" in block else None
1121
1122             logger.debug("block[blocked]='%s',block[reason]='%s' - AFTER!", block["blocked"], block["reason"])
1123             if block["blocked"] == "":
1124                 logger.debug("block[blocked] is empty - SKIPPED!")
1125                 continue
1126             elif not utils.is_domain_wanted(block["blocked"]):
1127                 logger.warning("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
1128                 continue
1129
1130             logger.debug("blocked='%s',reason='%s'", block["blocked"], block["reason"])
1131             if utils.process_block(blocker, block["blocked"], block["reason"], "reject") and config.get("bot_enabled"):
1132                 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], blocker)
1133                 blockdict.append({
1134                     "blocked": block["blocked"],
1135                     "reason" : block["reason"],
1136                 })
1137
1138         if instances.has_pending(blocker):
1139             logger.debug("Flushing updates for blocker='%s' ...", blocker)
1140             instances.update_data(blocker)
1141
1142         logger.debug("Invoking commit() ...")
1143         database.connection.commit()
1144
1145         logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
1146         if config.get("bot_enabled") and len(blockdict) > 0:
1147             logger.info("Sending bot POST for blocker='%s,blockdict()=%d ...", blocker, len(blockdict))
1148             network.send_bot_post(blocker, blockdict)
1149
1150     logger.debug("Success! - EXIT!")
1151     return 0
1152
1153 def recheck_obfuscation(args: argparse.Namespace) -> int:
1154     logger.debug("args[]='%s' - CALLED!", type(args))
1155
1156     locking.acquire()
1157
1158     if isinstance(args.domain, str) and args.domain != "" and utils.is_domain_wanted(args.domain):
1159         database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1 AND domain = ?", [args.domain])
1160     elif isinstance(args.software, str) and args.software != "" and validators.domain(args.software) == args.software:
1161         database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1 AND software = ?", [args.software])
1162     else:
1163         database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1")
1164
1165     rows = database.cursor.fetchall()
1166     logger.info("Checking %d domains ...", len(rows))
1167     for row in rows:
1168         logger.debug("Fetching peers from domain='%s',software='%s',nodeinfo_url='%s' ...", row["domain"], row["software"], row["nodeinfo_url"])
1169
1170         blocking = list()
1171         if row["software"] == "pleroma":
1172             logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1173             blocking = pleroma.fetch_blocks(row["domain"], row["nodeinfo_url"])
1174         elif row["software"] == "mastodon":
1175             logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1176             blocking = mastodon.fetch_blocks(row["domain"], row["nodeinfo_url"])
1177         elif row["software"] == "lemmy":
1178             logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1179             blocking = lemmy.fetch_blocks(row["domain"], row["nodeinfo_url"])
1180         elif row["software"] == "friendica":
1181             logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1182             blocking = friendica.fetch_blocks(row["domain"])
1183         elif row["software"] == "misskey":
1184             logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1185             blocking = misskey.fetch_blocks(row["domain"])
1186         else:
1187             logger.warning("Unknown sofware: domain='%s',software='%s'", row["domain"], row["software"])
1188
1189         logger.info("Checking %d block(s) from domain='%s' ...", len(blocking), row["domain"])
1190         obfuscated = 0
1191         blockdict = list()
1192         for block in blocking:
1193             logger.debug("block[blocked]='%s'", block["blocked"])
1194             blocked = None
1195
1196             if block["blocked"] == "":
1197                 logger.debug("block[blocked] is empty - SKIPPED!")
1198                 continue
1199             elif block["blocked"].endswith(".arpa"):
1200                 logger.debug("blocked='%s' is a reversed IP address - SKIPPED!", block["blocked"])
1201                 continue
1202             elif block["blocked"].endswith(".tld"):
1203                 logger.debug("blocked='%s' is a fake domain name - SKIPPED!", block["blocked"])
1204                 continue
1205             elif block["blocked"].endswith(".onion"):
1206                 logger.debug("blocked='%s' is a TOR onion domain name - SKIPPED!", block["blocked"])
1207                 continue
1208             elif block["blocked"].find("*") >= 0 or block["blocked"].find("?") >= 0:
1209                 logger.debug("block='%s' is obfuscated.", block["blocked"])
1210                 obfuscated = obfuscated + 1
1211                 blocked = utils.deobfuscate_domain(block["blocked"], row["domain"], block["hash"] if "hash" in block else None)
1212             elif not utils.is_domain_wanted(block["blocked"]):
1213                 logger.warning("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
1214                 continue
1215             elif blocks.is_instance_blocked(row["domain"], block["blocked"]):
1216                 logger.debug("blocked='%s' is already blocked - SKIPPED!", block["blocked"])
1217                 continue
1218
1219             logger.debug("blocked[%s]='%s',block[blocked]='%s'", type(blocked), blocked, block["blocked"])
1220             if blocked is not None and blocked != block["blocked"]:
1221                 logger.debug("blocked='%s' was deobfuscated to blocked='%s'", block["blocked"], blocked)
1222                 obfuscated = obfuscated - 1
1223                 if blocks.is_instance_blocked(row["domain"], blocked):
1224                     logger.debug("blocked='%s' is already blocked by domain='%s' - SKIPPED!", blocked, row["domain"])
1225                     continue
1226
1227                 block["block_level"] = utils.alias_block_level(block["block_level"])
1228
1229                 logger.info("blocked='%s' has been deobfuscated to blocked='%s', adding ...", block["blocked"], blocked)
1230                 if utils.process_block(row["domain"], blocked, block["reason"], block["block_level"]) and block["block_level"] == "reject" and config.get("bot_enabled"):
1231                     logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], row["domain"])
1232                     blockdict.append({
1233                         "blocked": blocked,
1234                         "reason" : block["reason"],
1235                     })
1236
1237         logger.info("domain='%s' has %d obfuscated domain(s)", row["domain"], obfuscated)
1238         if obfuscated == 0 and len(blocking) > 0:
1239             logger.info("Block list from domain='%s' has been fully deobfuscated.", row["domain"])
1240             instances.set_has_obfuscation(row["domain"], False)
1241
1242         if instances.has_pending(row["domain"]):
1243             logger.debug("Flushing updates for blocker='%s' ...", row["domain"])
1244             instances.update_data(row["domain"])
1245
1246         logger.debug("Invoking commit() ...")
1247         database.connection.commit()
1248
1249         logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
1250         if config.get("bot_enabled") and len(blockdict) > 0:
1251             logger.info("Sending bot POST for blocker='%s,blockdict()=%d ...", row["domain"], len(blockdict))
1252             network.send_bot_post(row["domain"], blockdict)
1253
1254     logger.debug("Success! - EXIT!")
1255     return 0