]> git.mxchange.org Git - fba.git/blob - fba/commands.py
55be64e8ee8c7421c13ff77749750377c2d2d261
[fba.git] / fba / commands.py
1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
3 #
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
8 #
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 # GNU Affero General Public License for more details.
13 #
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program.  If not, see <https://www.gnu.org/licenses/>.
16
17 import csv
18 import inspect
19 import json
20 import logging
21 import time
22
23 import argparse
24 import atoma
25 import bs4
26 import markdown
27 import reqto
28 import validators
29
30 from fba import csrf
31 from fba import database
32 from fba import utils
33
34 from fba.helpers import blacklist
35 from fba.helpers import config
36 from fba.helpers import cookies
37 from fba.helpers import locking
38 from fba.helpers import software as software_helper
39 from fba.helpers import tidyup
40
41 from fba.http import federation
42 from fba.http import network
43
44 from fba.models import blocks
45 from fba.models import instances
46
47 from fba.networks import friendica
48 from fba.networks import lemmy
49 from fba.networks import mastodon
50 from fba.networks import misskey
51 from fba.networks import pleroma
52
53 logging.basicConfig(level=logging.INFO)
54 logger = logging.getLogger(__name__)
55 #logger.setLevel(logging.DEBUG)
56
57 def check_instance(args: argparse.Namespace) -> int:
58     logger.debug("args.domain='%s' - CALLED!", args.domain)
59     status = 0
60     if not validators.domain(args.domain):
61         logger.warning("args.domain='%s' is not valid", args.domain)
62         status = 100
63     elif blacklist.is_blacklisted(args.domain):
64         logger.warning("args.domain='%s' is blacklisted", args.domain)
65         status = 101
66     elif instances.is_registered(args.domain):
67         logger.warning("args.domain='%s' is already registered", args.domain)
68         status = 102
69     else:
70         logger.info("args.domain='%s' is not known", args.domain)
71
72     logger.debug("status=%d - EXIT!", status)
73     return status
74
75 def fetch_pixelfed_api(args: argparse.Namespace) -> int:
76     logger.debug("args[]='%s' - CALLED!", type(args))
77
78     # No CSRF by default, you don't have to add network.api_headers by yourself here
79     headers = tuple()
80
81     try:
82         logger.debug("Checking CSRF from pixelfed.org")
83         headers = csrf.determine("pixelfed.org", dict())
84     except network.exceptions as exception:
85         logger.warning("Exception '%s' during checking CSRF (fetch_peers,%s) - EXIT!", type(exception), __name__)
86         return list()
87
88     try:
89         logger.debug("Fetching JSON from pixelfed.org API, headers()=%d ...", len(headers))
90         fetched = network.get_json_api(
91             "pixelfed.org",
92             "/api/v1/servers/all.json?scope=All&country=all&language=all",
93             headers,
94             (config.get("connection_timeout"), config.get("read_timeout"))
95         )
96
97         logger.debug("JSON API returned %d elements", len(fetched))
98         if "error_message" in fetched:
99             logger.warning("API returned error_message='%s' - EXIT!", fetched["error_message"])
100             return 101
101         elif "data" not in fetched["json"]:
102             logger.warning("API did not return JSON with 'data' element - EXIT!")
103             return 102
104
105         rows = fetched["json"]["data"]
106         logger.info("Checking %d fetched rows ...", len(rows))
107         for row in rows:
108             logger.debug("row[]='%s'", type(row))
109             if "domain" not in row:
110                 logger.warning("row='%s' does not contain element 'domain' - SKIPPED!", row)
111                 continue
112             elif not utils.is_domain_wanted(row["domain"]):
113                 logger.debug("row[domain]='%s' is not wanted - SKIPPED!", row["domain"])
114                 continue
115             elif instances.is_registered(row["domain"]):
116                 logger.debug("row[domain]='%s' is already registered - SKIPPED!", row["domain"])
117                 continue
118             elif instances.is_recent(row["domain"]):
119                 logger.debug("row[domain]='%s' has been recently crawled - SKIPPED!", row["domain"])
120                 continue
121
122             logger.debug("Fetching instances from row[domain]='%s' ...", row["domain"])
123             federation.fetch_instances(row["domain"], None, None, inspect.currentframe().f_code.co_name)
124
125     except network.exceptions as exception:
126         logger.warning("Cannot fetch graphql,exception[%s]:'%s' - EXIT!", type(exception), str(exception))
127         return 103
128
129     logger.debug("Success! - EXIT!")
130     return 0
131
132 def fetch_bkali(args: argparse.Namespace) -> int:
133     logger.debug("args[]='%s' - CALLED!", type(args))
134     domains = list()
135     try:
136         fetched = network.post_json_api("gql.api.bka.li", "/v1/graphql", json.dumps({
137             "query": "query domainlist {nodeinfo(order_by: {domain: asc}) {domain}}"
138         }))
139
140         logger.debug("fetched[]='%s'", type(fetched))
141         if "error_message" in fetched:
142             logger.warning("post_json_api() for 'gql.api.bka.li' returned error message='%s", fetched["error_message"])
143             return 100
144         elif isinstance(fetched["json"], dict) and "error" in fetched["json"] and "message" in fetched["json"]["error"]:
145             logger.warning("post_json_api() returned error: '%s", fetched["error"]["message"])
146             return 101
147
148         rows = fetched["json"]
149
150         logger.debug("rows(%d)[]='%s'", len(rows), type(rows))
151         if len(rows) == 0:
152             raise Exception("WARNING: Returned no records")
153         elif "data" not in rows:
154             raise Exception(f"WARNING: rows()={len(rows)} does not contain key 'data'")
155         elif "nodeinfo" not in rows["data"]:
156             raise Exception(f"WARNING: rows()={len(rows['data'])} does not contain key 'nodeinfo'")
157
158         for entry in rows["data"]["nodeinfo"]:
159             logger.debug("entry[%s]='%s'", type(entry), entry)
160             if "domain" not in entry:
161                 logger.warning("entry()=%d does not contain 'domain' - SKIPPED!", len(entry))
162                 continue
163             elif not utils.is_domain_wanted(entry["domain"]):
164                 logger.debug("entry[domain]='%s' is not wanted - SKIPPED!", entry["domain"])
165                 continue
166             elif instances.is_registered(entry["domain"]):
167                 logger.debug("entry[domain]='%s' is already registered - SKIPPED!", entry["domain"])
168                 continue
169             elif instances.is_recent(entry["domain"]):
170                 logger.debug("entry[domain]='%s' has been recently crawled - SKIPPED!", entry["domain"])
171                 continue
172
173             logger.debug("Adding domain='%s' ...", entry["domain"])
174             domains.append(entry["domain"])
175
176     except network.exceptions as exception:
177         logger.warning("Cannot fetch graphql,exception[%s]:'%s' - EXIT!", type(exception), str(exception))
178         return 102
179
180     logger.debug("domains()=%d", len(domains))
181     if len(domains) > 0:
182         locking.acquire()
183
184         logger.info("Adding %d new instances ...", len(domains))
185         for domain in domains:
186             try:
187                 logger.info("Fetching instances from domain='%s' ...", domain)
188                 federation.fetch_instances(domain, 'tak.teleyal.blog', None, inspect.currentframe().f_code.co_name)
189             except network.exceptions as exception:
190                 logger.warning("Exception '%s' during fetching instances (fetch_bkali) from domain='%s'", type(exception), domain)
191                 instances.set_last_error(domain, exception)
192                 return 100
193
194     logger.debug("Success - EXIT!")
195     return 0
196
197 def fetch_blocks(args: argparse.Namespace) -> int:
198     logger.debug("args[]='%s' - CALLED!", type(args))
199     if args.domain is not None and args.domain != "":
200         logger.debug("args.domain='%s' - checking ...", args.domain)
201         if not validators.domain(args.domain):
202             logger.warning("args.domain='%s' is not valid.", args.domain)
203             return 100
204         elif blacklist.is_blacklisted(args.domain):
205             logger.warning("args.domain='%s' is blacklisted, won't check it!", args.domain)
206             return 101
207         elif not instances.is_registered(args.domain):
208             logger.warning("args.domain='%s' is not registered, please run ./utils.py fetch_instances '%s' first.", args.domain, args.domain)
209             return 102
210
211     locking.acquire()
212
213     if args.domain is not None and args.domain != "":
214         # Re-check single domain
215         logger.debug("Querying database for single args.domain='%s' ...", args.domain)
216         database.cursor.execute(
217             "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE domain = ?", [args.domain]
218         )
219     elif args.software is not None and args.software != "":
220         # Re-check single software
221         logger.debug("Querying database for args.software='%s' ...", args.software)
222         database.cursor.execute(
223             "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software = ? AND nodeinfo_url IS NOT NULL", [args.software]
224         )
225     else:
226         # Re-check after "timeout" (aka. minimum interval)
227         database.cursor.execute(
228             "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'lemmy', 'friendica', 'misskey') AND (last_blocked IS NULL OR last_blocked < ?) AND nodeinfo_url IS NOT NULL ORDER BY rowid DESC", [time.time() - config.get("recheck_block")]
229         )
230
231     rows = database.cursor.fetchall()
232     logger.info("Checking %d entries ...", len(rows))
233     for blocker, software, origin, nodeinfo_url in rows:
234         logger.debug("blocker='%s',software='%s',origin='%s',nodeinfo_url='%s'", blocker, software, origin, nodeinfo_url)
235         blocker = tidyup.domain(blocker)
236         logger.debug("blocker='%s' - AFTER!", blocker)
237
238         if blocker == "":
239             logger.warning("blocker is now empty!")
240             continue
241         elif nodeinfo_url is None or nodeinfo_url == "":
242             logger.debug("blocker='%s',software='%s' has empty nodeinfo_url", blocker, software)
243             continue
244         elif not utils.is_domain_wanted(blocker):
245             logger.warning("blocker='%s' is not wanted - SKIPPED!", blocker)
246             continue
247
248         logger.debug("blocker='%s'", blocker)
249         instances.set_last_blocked(blocker)
250         instances.set_has_obfuscation(blocker, False)
251
252         blocking = list()
253         blockdict = list()
254         if software == "pleroma":
255             logger.info("blocker='%s',software='%s'", blocker, software)
256             blocking = pleroma.fetch_blocks(blocker, nodeinfo_url)
257         elif software == "mastodon":
258             logger.info("blocker='%s',software='%s'", blocker, software)
259             blocking = mastodon.fetch_blocks(blocker, nodeinfo_url)
260         elif software == "lemmy":
261             logger.info("blocker='%s',software='%s'", blocker, software)
262             blocking = lemmy.fetch_blocks(blocker, nodeinfo_url)
263         elif software == "friendica":
264             logger.info("blocker='%s',software='%s'", blocker, software)
265             blocking = friendica.fetch_blocks(blocker)
266         elif software == "misskey":
267             logger.info("blocker='%s',software='%s'", blocker, software)
268             blocking = misskey.fetch_blocks(blocker)
269         else:
270             logger.warning("Unknown software: blocker='%s',software='%s'", blocker, software)
271
272         logger.info("Checking %d entries from blocker='%s',software='%s' ...", len(blocking), blocker, software)
273         blockdict = list()
274         for block in blocking:
275             logger.debug("blocked='%s',block_level='%s',reason='%s'", block["blocked"], block["block_level"], block["reason"])
276
277             if block["block_level"] == "":
278                 logger.warning("block_level is empty, blocker='%s',blocked='%s'", block["blocker"], block["blocked"])
279                 continue
280
281             logger.debug("blocked='%s',reason='%s' - BEFORE!", block["blocked"], block["reason"])
282             block["blocked"] = tidyup.domain(block["blocked"])
283             block["reason"]  = tidyup.reason(block["reason"]) if block["reason"] is not None and block["reason"] != "" else None
284             logger.debug("blocked='%s',reason='%s' - AFTER!", block["blocked"], block["reason"])
285
286             if block["blocked"] == "":
287                 logger.warning("blocked is empty, blocker='%s'", blocker)
288                 continue
289             elif block["blocked"].endswith(".onion"):
290                 logger.debug("blocked='%s' is a TOR .onion domain - SKIPPED", block["blocked"])
291                 continue
292             elif block["blocked"].endswith(".arpa"):
293                 logger.debug("blocked='%s' is a reverse IP address - SKIPPED", block["blocked"])
294                 continue
295             elif block["blocked"].endswith(".tld"):
296                 logger.debug("blocked='%s' is a fake domain - SKIPPED", block["blocked"])
297                 continue
298             elif block["blocked"].find("*") >= 0:
299                 logger.debug("blocker='%s' uses obfuscated domains", blocker)
300
301                 # Some friendica servers also obscure domains without hash
302                 row = instances.deobfuscate("*", block["blocked"], block["hash"] if "hash" in block else None)
303
304                 logger.debug("row[]='%s'", type(row))
305                 if row is None:
306                     logger.warning("Cannot deobfuscate blocked='%s',blocker='%s',software='%s' - SKIPPED!", block["blocked"], blocker, software)
307                     instances.set_has_obfuscation(blocker, True)
308                     continue
309
310                 block["blocked"] = row["domain"]
311                 origin           = row["origin"]
312                 nodeinfo_url     = row["nodeinfo_url"]
313             elif block["blocked"].find("?") >= 0:
314                 logger.debug("blocker='%s' uses obfuscated domains", blocker)
315
316                 # Some obscure them with question marks, not sure if that's dependent on version or not
317                 row = instances.deobfuscate("?", block["blocked"], block["hash"] if "hash" in block else None)
318
319                 logger.debug("row[]='%s'", type(row))
320                 if row is None:
321                     logger.warning("Cannot deobfuscate blocked='%s',blocker='%s',software='%s' - SKIPPED!", block["blocked"], blocker, software)
322                     instances.set_has_obfuscation(blocker, True)
323                     continue
324
325                 block["blocked"] = row["domain"]
326                 origin           = row["origin"]
327                 nodeinfo_url     = row["nodeinfo_url"]
328
329             logger.debug("Looking up instance by domainm, blocked='%s'", block["blocked"])
330             if not utils.is_domain_wanted(block["blocked"]):
331                 logger.debug("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
332                 continue
333             elif block["block_level"] in ["accept", "accepted"]:
334                 logger.debug("blocked='%s' is accepted, not wanted here - SKIPPED!", block["blocked"])
335                 continue
336             elif not instances.is_registered(block["blocked"]):
337                 logger.debug("Hash wasn't found, adding: blocked='%s',blocker='%s'", block["blocked"], blocker)
338                 federation.fetch_instances(block["blocked"], blocker, None, inspect.currentframe().f_code.co_name)
339
340             block["block_level"] = utils.alias_block_level(block["block_level"])
341
342             if utils.process_block(blocker, block["blocked"], block["reason"], block["block_level"]) and block["block_level"] == "reject" and config.get("bot_enabled"):
343                 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], blocker)
344                 blockdict.append({
345                     "blocked": block["blocked"],
346                     "reason" : block["reason"],
347                 })
348
349             logger.debug("Invoking cookies.clear(%s) ...", block["blocked"])
350             cookies.clear(block["blocked"])
351
352         logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
353         if instances.has_pending(blocker):
354             logger.debug("Flushing updates for blocker='%s' ...", blocker)
355             instances.update_data(blocker)
356
357         logger.debug("Invoking commit() ...")
358         database.connection.commit()
359
360         logger.debug("Invoking cookies.clear(%s) ...", blocker)
361         cookies.clear(blocker)
362
363         logger.debug("config.get(bot_enabled)='%s',blockdict()=%d'", config.get("bot_enabled"), len(blockdict))
364         if config.get("bot_enabled") and len(blockdict) > 0:
365             logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
366             network.send_bot_post(blocker, blockdict)
367
368     logger.debug("Success! - EXIT!")
369     return 0
370
371 def fetch_observer(args: argparse.Namespace) -> int:
372     logger.debug("args[]='%s' - CALLED!", type(args))
373
374     # Acquire lock
375     locking.acquire()
376
377     logger.info("Fetching software list ...")
378     raw = utils.fetch_url(
379         "https://fediverse.observer",
380         network.web_headers,
381         (config.get("connection_timeout"), config.get("read_timeout"))
382     ).text
383     logger.debug("raw[%s]()=%d", type(raw), len(raw))
384
385     doc = bs4.BeautifulSoup(raw, features="html.parser")
386     logger.debug("doc[]='%s'", type(doc))
387
388     items = doc.find("div", {"aria-labelledby": "navbarDropdownMenuSoftwares"}).findAll("a", {"class": "dropdown-item"})
389     logger.debug("items[]='%s'", type(items))
390
391     types = list()
392
393     logger.info("Checking %d menu items ...", len(items))
394     for item in items:
395         logger.debug("item[%s]='%s'", type(item), item)
396         if item.text.lower() == "all":
397             logger.debug("Skipping 'All' menu entry ...")
398             continue
399
400         logger.debug("Appending item.text='%s' ...", item.text)
401         types.append(tidyup.domain(item.text))
402
403     logger.info("Fetching %d different table data ...", len(types))
404     for software in types:
405         logger.debug("software='%s' - BEFORE!", software)
406         if args.software is not None and args.software != software:
407             logger.debug("args.software='%s' does not match software='%s' - SKIPPED!", args.software, software)
408             continue
409
410         doc = None
411         try:
412             logger.debug("Fetching table data for software='%s' ...", software)
413             raw = utils.fetch_url(
414                 f"https://fediverse.observer/app/views/tabledata.php?software={software}",
415                 network.web_headers,
416                 (config.get("connection_timeout"), config.get("read_timeout"))
417             ).text
418             logger.debug("raw[%s]()=%d", type(raw), len(raw))
419
420             doc = bs4.BeautifulSoup(raw, features="html.parser")
421             logger.debug("doc[]='%s'", type(doc))
422         except network.exceptions as exception:
423             logger.warning("Cannot fetch software='%s' from fediverse.observer: '%s'", software, type(exception))
424             continue
425
426         items = doc.findAll("a", {"class": "url"})
427         logger.info("Checking %d items,software='%s' ...", len(items), software)
428         for item in items:
429             logger.debug("item[]='%s'", type(item))
430             domain = item.decode_contents()
431
432             logger.debug("domain='%s'", domain)
433             if not utils.is_domain_wanted(domain):
434                 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
435                 continue
436             elif instances.is_registered(domain):
437                 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
438                 continue
439             elif instances.is_recent(domain):
440                 logger.debug("domain='%s' is recently being handled - SKIPPED!", domain)
441                 continue
442
443             software = software_helper.alias(software)
444             logger.info("Fetching instances for domain='%s'", domain)
445             federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
446
447     logger.debug("Success! - EXIT!")
448     return 0
449
450 def fetch_todon_wiki(args: argparse.Namespace) -> int:
451     logger.debug("args[]='%s' - CALLED!", type(args))
452
453     locking.acquire()
454     blocklist = {
455         "silenced": list(),
456         "reject": list(),
457     }
458
459     raw = utils.fetch_url("https://wiki.todon.eu/todon/domainblocks", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))).text
460     logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
461
462     doc = bs4.BeautifulSoup(raw, "html.parser")
463     logger.debug("doc[]='%s'", type(doc))
464
465     silenced = doc.find("h3", {"id": "silencedlimited_servers"}).find_next("ul").findAll("li")
466     logger.info("Checking %d silenced/limited entries ...", len(silenced))
467     blocklist["silenced"] = utils.find_domains(silenced, "div")
468
469     suspended = doc.find("h3", {"id": "suspended_servers"}).find_next("ul").findAll("li")
470     logger.info("Checking %d suspended entries ...", len(suspended))
471     blocklist["reject"] = utils.find_domains(suspended, "div")
472
473     blockdict = list()
474     for block_level in blocklist:
475         blockers = blocklist[block_level]
476
477         logger.debug("block_level='%s',blockers()=%d'", block_level, len(blockers))
478         for blocked in blockers:
479             logger.debug("blocked='%s'", blocked)
480
481             if not instances.is_registered(blocked):
482                 try:
483                     logger.info("Fetching instances from domain='%s' ...", blocked)
484                     federation.fetch_instances(blocked, 'chaos.social', None, inspect.currentframe().f_code.co_name)
485                 except network.exceptions as exception:
486                     logger.warning("Exception '%s' during fetching instances (fetch_cs) from blocked='%s'", type(exception), blocked)
487                     instances.set_last_error(blocked, exception)
488
489             if blocks.is_instance_blocked("todon.eu", blocked, block_level):
490                 logger.debug("blocked='%s',block_level='%s' is already blocked - SKIPPED!", blocked, block_level)
491                 continue
492
493             logger.info("Adding new block: blocked='%s',block_level='%s'", blocked, block_level)
494             if utils.process_block("todon.eu", blocked, None, block_level) and block_level == "reject" and config.get("bot_enabled"):
495                 logger.debug("Appending blocked='%s',reason='%s' for blocker='todon.eu' ...", blocked, block_level)
496                 blockdict.append({
497                     "blocked": blocked,
498                     "reason" : None,
499                 })
500
501         logger.debug("Invoking commit() ...")
502         database.connection.commit()
503
504         logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
505         if config.get("bot_enabled") and len(blockdict) > 0:
506             logger.info("Sending bot POST for blocker='todon.eu',blockdict()=%d ...", len(blockdict))
507             network.send_bot_post("todon.eu", blockdict)
508
509     logger.debug("Success! - EXIT!")
510     return 0
511
512 def fetch_cs(args: argparse.Namespace):
513     logger.debug("args[]='%s' - CALLED!", type(args))
514     extensions = [
515         "extra",
516         "abbr",
517         "attr_list",
518         "def_list",
519         "fenced_code",
520         "footnotes",
521         "md_in_html",
522         "admonition",
523         "codehilite",
524         "legacy_attrs",
525         "legacy_em",
526         "meta",
527         "nl2br",
528         "sane_lists",
529         "smarty",
530         "toc",
531         "wikilinks"
532     ]
533
534     domains = {
535         "silenced": list(),
536         "reject"  : list(),
537     }
538
539     raw = utils.fetch_url("https://raw.githubusercontent.com/chaossocial/meta/master/federation.md", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))).text
540     logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
541
542     doc = bs4.BeautifulSoup(markdown.markdown(raw, extensions=extensions), features="html.parser")
543     logger.debug("doc()=%d[]='%s'", len(doc), type(doc))
544
545     silenced = doc.find("h2", {"id": "silenced-instances"}).findNext("table").find("tbody")
546     logger.debug("silenced[%s]()=%d", type(silenced), len(silenced))
547     domains["silenced"] = federation.find_domains(silenced)
548
549     blocked = doc.find("h2", {"id": "blocked-instances"}).findNext("table").find("tbody")
550     logger.debug("blocked[%s]()=%d", type(blocked), len(blocked))
551     domains["reject"] = federation.find_domains(blocked)
552
553     logger.debug("domains[silenced]()=%d,domains[reject]()=%d", len(domains["silenced"]), len(domains["reject"]))
554     blockdict = list()
555     if len(domains) > 0:
556         locking.acquire()
557
558         for block_level in domains:
559             logger.info("block_level='%s' has %d row(s)", block_level, len(domains[block_level]))
560
561             for row in domains[block_level]:
562                 logger.debug("row[%s]='%s'", type(row), row)
563                 if instances.is_recent(row["domain"], "last_blocked"):
564                     logger.debug("row[domain]='%s' has been recently crawled - SKIPPED!", row["domain"])
565                     continue
566                 elif not instances.is_registered(row["domain"]):
567                     try:
568                         logger.info("Fetching instances from domain='%s' ...", row["domain"])
569                         federation.fetch_instances(row["domain"], 'chaos.social', None, inspect.currentframe().f_code.co_name)
570                     except network.exceptions as exception:
571                         logger.warning("Exception '%s' during fetching instances (fetch_cs) from row[domain]='%s'", type(exception), row["domain"])
572                         instances.set_last_error(row["domain"], exception)
573
574                 if utils.process_block("chaos.social", row["domain"], row["reason"], block_level) and block_level == "reject" and config.get("bot_enabled"):
575                     logger.debug("Appending blocked='%s',reason='%s' for blocker='chaos.social' ...", row["domain"], block_level)
576                     blockdict.append({
577                         "blocked": row["domain"],
578                         "reason" : row["reason"],
579                     })
580
581         logger.debug("Invoking commit() ...")
582         database.connection.commit()
583
584         logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
585         if config.get("bot_enabled") and len(blockdict) > 0:
586             logger.info("Sending bot POST for blocker='chaos.social',blockdict()=%d ...", len(blockdict))
587             network.send_bot_post("chaos.social", blockdict)
588
589     logger.debug("Success! - EXIT!")
590     return 0
591
592 def fetch_fba_rss(args: argparse.Namespace) -> int:
593     logger.debug("args[]='%s' - CALLED!", type(args))
594     domains = list()
595
596     logger.info("Fetch FBA-specific RSS args.feed='%s' ...", args.feed)
597     response = utils.fetch_url(args.feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
598
599     logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
600     if response.ok and response.status_code < 300 and len(response.text) > 0:
601         logger.debug("Parsing RSS feed (%d Bytes) ...", len(response.text))
602         rss = atoma.parse_rss_bytes(response.content)
603
604         logger.debug("rss[]='%s'", type(rss))
605         for item in rss.items:
606             logger.debug("item='%s'", item)
607             domain = item.link.split("=")[1]
608
609             if not utils.is_domain_wanted(domain):
610                 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
611                 continue
612             elif domain in domains:
613                 logger.debug("domain='%s' is already added - SKIPPED!", domain)
614                 continue
615             elif instances.is_registered(domain):
616                 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
617                 continue
618             elif instances.is_recent(domain):
619                 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
620                 continue
621
622             logger.debug("Adding domain='%s'", domain)
623             domains.append(domain)
624
625     logger.debug("domains()=%d", len(domains))
626     if len(domains) > 0:
627         locking.acquire()
628
629         logger.info("Adding %d new instances ...", len(domains))
630         for domain in domains:
631             try:
632                 logger.info("Fetching instances from domain='%s' ...", domain)
633                 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
634             except network.exceptions as exception:
635                 logger.warning("Exception '%s' during fetching instances (fetch_fba_rss) from domain='%s'", type(exception), domain)
636                 instances.set_last_error(domain, exception)
637                 return 100
638
639     logger.debug("Success! - EXIT!")
640     return 0
641
642 def fetch_fbabot_atom(args: argparse.Namespace) -> int:
643     logger.debug("args[]='%s' - CALLED!", type(args))
644     feed = "https://ryona.agency/users/fba/feed.atom"
645
646     domains = list()
647
648     logger.info("Fetching ATOM feed='%s' from FBA bot account ...", feed)
649     response = utils.fetch_url(feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
650
651     logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
652     if response.ok and response.status_code < 300 and len(response.text) > 0:
653         logger.debug("Parsing ATOM feed (%d Bytes) ...", len(response.text))
654         atom = atoma.parse_atom_bytes(response.content)
655
656         logger.debug("atom[]='%s'", type(atom))
657         for entry in atom.entries:
658             logger.debug("entry[]='%s'", type(entry))
659             doc = bs4.BeautifulSoup(entry.content.value, "html.parser")
660             logger.debug("doc[]='%s'", type(doc))
661             for element in doc.findAll("a"):
662                 for href in element["href"].split(","):
663                     logger.debug("href[%s]='%s", type(href), href)
664                     domain = tidyup.domain(href)
665
666                     logger.debug("domain='%s'", domain)
667                     if not utils.is_domain_wanted(domain):
668                         logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
669                         continue
670                     elif domain in domains:
671                         logger.debug("domain='%s' is already added - SKIPPED!", domain)
672                         continue
673                     elif instances.is_registered(domain):
674                         logger.debug("domain='%s' is already registered - SKIPPED!", domain)
675                         continue
676                     elif instances.is_recent(domain):
677                         logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
678                         continue
679
680                     logger.debug("Adding domain='%s',domains()=%d", domain, len(domains))
681                     domains.append(domain)
682
683     logger.debug("domains()=%d", len(domains))
684     if len(domains) > 0:
685         locking.acquire()
686
687         logger.info("Adding %d new instances ...", len(domains))
688         for domain in domains:
689             try:
690                 logger.info("Fetching instances from domain='%s' ...", domain)
691                 federation.fetch_instances(domain, "ryona.agency", None, inspect.currentframe().f_code.co_name)
692             except network.exceptions as exception:
693                 logger.warning("Exception '%s' during fetching instances (fetch_fbabot_atom) from domain='%s'", type(exception), domain)
694                 instances.set_last_error(domain, exception)
695                 return 100
696
697     logger.debug("Success! - EXIT!")
698     return 0
699
700 def fetch_instances(args: argparse.Namespace) -> int:
701     logger.debug("args[]='%s' - CALLED!", type(args))
702     locking.acquire()
703
704     # Initial fetch
705     try:
706         logger.info("Fetching instances from args.domain='%s' ...", args.domain)
707         federation.fetch_instances(args.domain, None, None, inspect.currentframe().f_code.co_name)
708     except network.exceptions as exception:
709         logger.warning("Exception '%s' during fetching instances (fetch_instances) from args.domain='%s'", type(exception), args.domain)
710         instances.set_last_error(args.domain, exception)
711         instances.update_data(args.domain)
712         return 100
713
714     if args.single:
715         logger.debug("Not fetching more instances - EXIT!")
716         return 0
717
718     # Loop through some instances
719     database.cursor.execute(
720         "SELECT domain, origin, software, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'lemmy', 'peertube', 'takahe') AND (last_instance_fetch IS NULL OR last_instance_fetch < ?) ORDER BY rowid DESC", [time.time() - config.get("recheck_instance")]
721     )
722
723     rows = database.cursor.fetchall()
724     logger.info("Checking %d entries ...", len(rows))
725     for row in rows:
726         logger.debug("domain='%s'", row["domain"])
727         if not utils.is_domain_wanted(row["domain"]):
728             logger.debug("Domain row[domain]='%s' is not wanted - SKIPPED!", row["domain"])
729             continue
730
731         try:
732             logger.info("Fetching instances for domain='%s',origin='%s',software='%s',nodeinfo_url='%s'", row["domain"], row["origin"], row["software"], row["nodeinfo_url"])
733             federation.fetch_instances(row["domain"], row["origin"], row["software"], inspect.currentframe().f_code.co_name, row["nodeinfo_url"])
734         except network.exceptions as exception:
735             logger.warning("Exception '%s' during fetching instances (fetch_instances) from row[domain]='%s'", type(exception), row["domain"])
736             instances.set_last_error(row["domain"], exception)
737
738     logger.debug("Success - EXIT!")
739     return 0
740
741 def fetch_oliphant(args: argparse.Namespace) -> int:
742     logger.debug("args[]='%s' - CALLED!", type(args))
743     locking.acquire()
744
745     # Base URL
746     base_url = "https://codeberg.org/oliphant/blocklists/raw/branch/main/blocklists"
747
748     # URLs to fetch
749     blocklists = (
750         {
751             "blocker": "artisan.chat",
752             "csv_url": "mastodon/artisan.chat.csv",
753         },{
754             "blocker": "mastodon.art",
755             "csv_url": "mastodon/mastodon.art.csv",
756         },{
757             "blocker": "pleroma.envs.net",
758             "csv_url": "mastodon/pleroma.envs.net.csv",
759         },{
760             "blocker": "oliphant.social",
761             "csv_url": "mastodon/_unified_tier3_blocklist.csv",
762         },{
763             "blocker": "mastodon.online",
764             "csv_url": "mastodon/mastodon.online.csv",
765         },{
766             "blocker": "mastodon.social",
767             "csv_url": "mastodon/mastodon.social.csv",
768         },{
769             "blocker": "mastodon.social",
770             "csv_url": "other/missing-tier0-mastodon.social.csv",
771         },{
772             "blocker": "rage.love",
773             "csv_url": "mastodon/rage.love.csv",
774         },{
775             "blocker": "sunny.garden",
776             "csv_url": "mastodon/sunny.garden.csv",
777         },{
778             "blocker": "solarpunk.moe",
779             "csv_url": "mastodon/solarpunk.moe.csv",
780         },{
781             "blocker": "toot.wales",
782             "csv_url": "mastodon/toot.wales.csv",
783         },{
784             "blocker": "union.place",
785             "csv_url": "mastodon/union.place.csv",
786         }
787     )
788
789     domains = list()
790
791     logger.debug("Downloading %d files ...", len(blocklists))
792     for block in blocklists:
793         # Is domain given and not equal blocker?
794         if isinstance(args.domain, str) and args.domain != block["blocker"]:
795             logger.debug("Skipping blocker='%s', not matching args.domain='%s'", block["blocker"], args.domain)
796             continue
797         elif args.domain in domains:
798             logger.debug("args.domain='%s' already handled - SKIPPED!", args.domain)
799             continue
800         elif instances.is_recent(block["blocker"]):
801             logger.debug("block[blocker]='%s' has been recently crawled - SKIPPED!", block["blocker"])
802             continue
803
804         # Fetch this URL
805         logger.info("Fetching csv_url='%s' for blocker='%s' ...", block["csv_url"], block["blocker"])
806         response = utils.fetch_url(f"{base_url}/{block['csv_url']}", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
807
808         logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
809         if not response.ok or response.status_code > 399 or response.content == "":
810             logger.warning("Could not fetch csv_url='%s' for blocker='%s' - SKIPPED!", block["csv_url"], block["blocker"])
811             continue
812
813         logger.debug("Fetched %d Bytes, parsing CSV ...", len(response.content))
814         reader = csv.DictReader(response.content.decode('utf-8').splitlines(), dialect="unix")
815
816         logger.debug("reader[]='%s'", type(reader))
817         blockdict = list()
818         for row in reader:
819             logger.debug("row[%s]='%s'", type(row), row)
820             domain = severity = None
821             reject_media = reject_reports = False
822             if "#domain" in row:
823                 domain = row["#domain"]
824             elif "domain" in row:
825                 domain = row["domain"]
826             else:
827                 logger.debug("row='%s' does not contain domain column", row)
828                 continue
829
830             if "#severity" in row:
831                 severity = row["#severity"]
832             elif "severity" in row:
833                 severity = row["severity"]
834             else:
835                 logger.debug("row='%s' does not contain severity column", row)
836                 continue
837
838             if "#reject_media" in row and row["#reject_media"].lower() == "true":
839                 reject_media = True
840             elif "reject_media" in row and row["reject_media"].lower() == "true":
841                 reject_media = True
842
843             if "#reject_reports" in row and row["#reject_reports"].lower() == "true":
844                 reject_reports = True
845             elif "reject_reports" in row and row["reject_reports"].lower() == "true":
846                 reject_reports = True
847
848             logger.debug("domain='%s',severity='%s',reject_media='%s',reject_reports='%s'", domain, severity, reject_media, reject_reports)
849             if not utils.is_domain_wanted(domain):
850                 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
851                 continue
852
853             logger.debug("Marking domain='%s' as handled", domain)
854             domains.append(domain)
855
856             logger.debug("Processing domain='%s' ...", domain)
857             processed = utils.process_domain(domain, block["blocker"], inspect.currentframe().f_code.co_name)
858             logger.debug("processed='%s'", processed)
859
860             if utils.process_block(block["blocker"], domain, None, "reject") and config.get("bot_enabled"):
861                 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", domain, block["block_level"], block["blocker"])
862                 blockdict.append({
863                     "blocked": domain,
864                     "reason" : block["reason"],
865                 })
866
867             if reject_media:
868                 utils.process_block(block["blocker"], domain, None, "reject_media")
869             if reject_reports:
870                 utils.process_block(block["blocker"], domain, None, "reject_reports")
871
872         logger.debug("Invoking commit() ...")
873         database.connection.commit()
874
875         logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
876         if config.get("bot_enabled") and len(blockdict) > 0:
877             logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", block["blocker"], len(blockdict))
878             network.send_bot_post(block["blocker"], blockdict)
879
880     logger.debug("Success! - EXIT!")
881     return 0
882
883 def fetch_txt(args: argparse.Namespace) -> int:
884     logger.debug("args[]='%s' - CALLED!", type(args))
885     locking.acquire()
886
887     # Static URLs
888     urls = ({
889         "blocker": "seirdy.one",
890         "url"    : "https://seirdy.one/pb/bsl.txt",
891     },)
892
893     logger.info("Checking %d text file(s) ...", len(urls))
894     for row in urls:
895         logger.debug("Fetching row[url]='%s' ...", row["url"])
896         response = utils.fetch_url(row["url"], network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
897
898         logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
899         if response.ok and response.status_code < 300 and response.text != "":
900             logger.debug("Returned %d Bytes for processing", len(response.text.strip()))
901             domains = response.text.split("\n")
902
903             logger.info("Processing %d domains ...", len(domains))
904             for domain in domains:
905                 logger.debug("domain='%s'", domain)
906                 if domain == "":
907                     logger.debug("domain is empty - SKIPPED!")
908                     continue
909                 elif not utils.is_domain_wanted(domain):
910                     logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
911                     continue
912                 elif instances.is_recent(domain):
913                     logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
914                     continue
915
916                 logger.debug("Processing domain='%s',row[blocker]='%s'", domain, row["blocker"])
917                 processed = utils.process_domain(domain, row["blocker"], inspect.currentframe().f_code.co_name)
918
919                 logger.debug("processed='%s'", processed)
920                 if not processed:
921                     logger.debug("domain='%s' was not generically processed - SKIPPED!", domain)
922                     continue
923
924     logger.debug("Success! - EXIT!")
925     return 0
926
927 def fetch_fedipact(args: argparse.Namespace) -> int:
928     logger.debug("args[]='%s' - CALLED!", type(args))
929     locking.acquire()
930
931     response = utils.fetch_url("https://fedipact.online", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
932
933     logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
934     if response.ok and response.status_code < 300 and response.text != "":
935         logger.debug("Parsing %d Bytes ...", len(response.text))
936
937         doc = bs4.BeautifulSoup(response.text, "html.parser")
938         logger.debug("doc[]='%s'", type(doc))
939
940         rows = doc.findAll("li")
941         logger.info("Checking %d row(s) ...", len(rows))
942         for row in rows:
943             logger.debug("row[]='%s'", type(row))
944             domain = tidyup.domain(row.contents[0])
945
946             logger.debug("domain='%s'", domain)
947             if domain == "":
948                 logger.debug("domain is empty - SKIPPED!")
949                 continue
950             elif not utils.is_domain_wanted(domain):
951                 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
952                 continue
953             elif instances.is_registered(domain):
954                 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
955                 continue
956             elif instances.is_recent(domain):
957                 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
958                 continue
959
960             logger.info("Fetching domain='%s' ...", domain)
961             federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
962
963     logger.debug("Success! - EXIT!")
964     return 0
965
966 def fetch_joinfediverse(args: argparse.Namespace) -> int:
967     logger.debug("args[]='%s' - CALLED!", type(args))
968     locking.acquire()
969
970     raw = utils.fetch_url("https://joinfediverse.wiki/FediBlock", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))).text
971     logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
972
973     doc = bs4.BeautifulSoup(raw, "html.parser")
974     logger.debug("doc[]='%s'", type(doc))
975
976     tables = doc.findAll("table", {"class": "wikitable"})
977
978     logger.info("Analyzing %d table(s) ...", len(tables))
979     blocklist = list()
980     for table in tables:
981         logger.debug("table[]='%s'", type(table))
982
983         rows = table.findAll("tr")
984         logger.info("Checking %d row(s) ...", len(rows))
985         block_headers = dict()
986         for row in rows:
987             logger.debug("row[%s]='%s'", type(row), row)
988
989             headers = row.findAll("th")
990             logger.debug("Found headers()=%d header(s)", len(headers))
991             if len(headers) > 1:
992                 block_headers = dict()
993                 cnt = 0
994                 for header in headers:
995                     cnt = cnt + 1
996                     logger.debug("header[]='%s',cnt=%d", type(header), cnt)
997                     text = header.contents[0]
998
999                     logger.debug("text[]='%s'", type(text))
1000                     if not isinstance(text, str):
1001                         logger.debug("text[]='%s' is not 'str' - SKIPPED!", type(text))
1002                         continue
1003                     elif validators.domain(text.strip()):
1004                         logger.debug("text='%s' is a domain - SKIPPED!", text.strip())
1005                         continue
1006
1007                     text = tidyup.domain(text.strip())
1008                     logger.debug("text='%s'", text)
1009                     if text in ["domain", "instance", "subdomain(s)", "block reason(s)"]:
1010                         logger.debug("Found header: '%s'=%d", text, cnt)
1011                         block_headers[cnt] = text
1012
1013             elif len(block_headers) == 0:
1014                 logger.debug("row is not scrapable - SKIPPED!")
1015                 continue
1016             elif len(block_headers) > 0:
1017                 logger.debug("Found a row with %d scrapable headers ...", len(block_headers))
1018                 cnt = 0
1019                 block = dict()
1020
1021                 for element in row.find_all(["th", "td"]):
1022                     cnt = cnt + 1
1023                     logger.debug("element[]='%s',cnt=%d", type(element), cnt)
1024                     if cnt in block_headers:
1025                         logger.debug("block_headers[%d]='%s'", cnt, block_headers[cnt])
1026
1027                         text = element.text.strip()
1028                         key = block_headers[cnt] if block_headers[cnt] not in ["domain", "instance"] else "blocked"
1029
1030                         logger.debug("cnt=%d is wanted: key='%s',text[%s]='%s'", cnt, key, type(text), text)
1031                         if key in ["domain", "instance"]:
1032                             block[key] = text
1033                         elif key == "reason":
1034                             block[key] = tidyup.reason(text)
1035                         elif key == "subdomain(s)":
1036                             block[key] = list()
1037                             if text != "":
1038                                 block[key] = text.split("/")
1039                         else:
1040                             logger.debug("key='%s'", key)
1041                             block[key] = text
1042
1043                 logger.debug("block()=%d ...", len(block))
1044                 if len(block) > 0:
1045                     logger.debug("Appending block()=%d ...", len(block))
1046                     blocklist.append(block)
1047
1048     logger.debug("blocklist()=%d", len(blocklist))
1049
1050     database.cursor.execute("SELECT domain FROM instances WHERE domain LIKE 'climatejustice.%'")
1051     domains = database.cursor.fetchall()
1052
1053     logger.debug("domains(%d)[]='%s'", len(domains), type(domains))
1054     blocking = list()
1055     for block in blocklist:
1056         logger.debug("block='%s'", block)
1057         if "subdomain(s)" in block and len(block["subdomain(s)"]) > 0:
1058             origin = block["blocked"]
1059             for subdomain in block["subdomain(s)"]:
1060                 block["blocked"] = subdomain + "." + origin
1061                 blocking.append(block)
1062         else:
1063             blocking.append(block)
1064
1065     logger.debug("blocking()=%d", blocking)
1066     for block in blocking:
1067         block["blocked"] = tidyup.domain(block["blocked"])
1068
1069         if not utils.is_domain_wanted(block["blocked"]):
1070             logger.debug("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
1071             continue
1072         elif instances.is_recent(block["blocked"]):
1073             logger.debug("blocked='%s' has been recently checked - SKIPPED!", block["blocked"])
1074             continue
1075
1076         logger.info("Proccessing blocked='%s' ...", block["blocked"])
1077         utils.process_domain(block["blocked"], "climatejustice.social", inspect.currentframe().f_code.co_name)
1078
1079     blockdict = list()
1080     for blocker in domains:
1081         blocker = blocker[0]
1082         logger.debug("blocker[%s]='%s'", type(blocker), blocker)
1083
1084         for block in blocking:
1085             block["reason"] = tidyup.reason(block["block reason(s)"]) if "block reason(s)" in block else None
1086
1087             if not utils.is_domain_wanted(block["blocked"]):
1088                 logger.debug("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
1089                 continue
1090
1091             logger.debug("blocked='%s',reason='%s'", block["blocked"], block["reason"])
1092             if utils.process_block(blocker, block["blocked"], block["reason"], "reject") and config.get("bot_enabled"):
1093                 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], blocker)
1094                 blockdict.append({
1095                     "blocked": block["blocked"],
1096                     "reason" : block["reason"],
1097                 })
1098
1099         if instances.has_pending(blocker):
1100             logger.debug("Flushing updates for blocker='%s' ...", blocker)
1101             instances.update_data(blocker)
1102
1103         logger.debug("Invoking commit() ...")
1104         database.connection.commit()
1105
1106         logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
1107         if config.get("bot_enabled") and len(blockdict) > 0:
1108             logger.info("Sending bot POST for blocker='%s,blockdict()=%d ...", blocker, len(blockdict))
1109             network.send_bot_post(blocker, blockdict)
1110
1111     logger.debug("Success! - EXIT!")
1112     return 0
1113
1114 def recheck_obfuscation(args: argparse.Namespace) -> int:
1115     logger.debug("args[]='%s' - CALLED!", type(args))
1116
1117     locking.acquire()
1118
1119     if isinstance(args.domain, str) and args.domain != "" and utils.is_domain_wanted(args.domain):
1120         database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1 AND domain = ?", [args.domain])
1121     elif isinstance(args.software, str) and args.software != "" and validators.domain(args.software) == args.software:
1122         database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1 AND software = ?", [args.software])
1123     else:
1124         database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1")
1125
1126     rows = database.cursor.fetchall()
1127     logger.info("Checking %d domains ...", len(rows))
1128     for row in rows:
1129         logger.debug("Fetching peers from domain='%s',software='%s',nodeinfo_url='%s' ...", row["domain"], row["software"], row["nodeinfo_url"])
1130
1131         blocking = list()
1132         if row["software"] == "pleroma":
1133             logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1134             blocking = pleroma.fetch_blocks(row["domain"], row["nodeinfo_url"])
1135         elif row["software"] == "mastodon":
1136             logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1137             blocking = mastodon.fetch_blocks(row["domain"], row["nodeinfo_url"])
1138         elif row["software"] == "lemmy":
1139             logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1140             blocking = lemmy.fetch_blocks(row["domain"], row["nodeinfo_url"])
1141         elif row["software"] == "friendica":
1142             logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1143             blocking = friendica.fetch_blocks(row["domain"])
1144         elif row["software"] == "misskey":
1145             logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1146             blocking = misskey.fetch_blocks(row["domain"])
1147         else:
1148             logger.warning("Unknown sofware: domain='%s',software='%s'", row["domain"], row["software"])
1149
1150         logger.info("Checking %d block(s) from domain='%s' ...", len(blocking), row["domain"])
1151         obfuscated = 0
1152         blockdict = list()
1153         for block in blocking:
1154             logger.debug("blocked='%s'", block["blocked"])
1155             blocked = None
1156
1157             if block["blocked"].endswith(".arpa"):
1158                 logger.debug("blocked='%s' is a reversed IP address - SKIPPED!", block["blocked"])
1159                 continue
1160             elif block["blocked"].endswith(".tld"):
1161                 logger.debug("blocked='%s' is a fake domain name - SKIPPED!", block["blocked"])
1162                 continue
1163             elif block["blocked"].endswith(".onion"):
1164                 logger.debug("blocked='%s' is a TOR onion domain name - SKIPPED!", block["blocked"])
1165                 continue
1166             elif block["blocked"].find("*") >= 0 or block["blocked"].find("?") >= 0:
1167                 logger.debug("block='%s' is obfuscated.", block["blocked"])
1168                 obfuscated = obfuscated + 1
1169                 blocked = utils.deobfuscate_domain(block["blocked"], row["domain"], block["hash"] if "hash" in block else None)
1170             elif not utils.is_domain_wanted(block["blocked"]):
1171                 logger.debug("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
1172                 continue
1173             elif blocks.is_instance_blocked(row["domain"], block["blocked"]):
1174                 logger.debug("blocked='%s' is already blocked - SKIPPED!", block["blocked"])
1175                 continue
1176
1177             logger.debug("blocked[%s]='%s',block[blocked]='%s'", type(blocked), blocked, block["blocked"])
1178             if blocked is not None and blocked != block["blocked"]:
1179                 logger.debug("blocked='%s' was deobfuscated to blocked='%s'", block["blocked"], blocked)
1180                 obfuscated = obfuscated - 1
1181                 if blocks.is_instance_blocked(row["domain"], blocked):
1182                     logger.debug("blocked='%s' is already blocked by domain='%s' - SKIPPED!", blocked, row["domain"])
1183                     continue
1184
1185                 block["block_level"] = utils.alias_block_level(block["block_level"])
1186
1187                 logger.info("blocked='%s' has been deobfuscated to blocked='%s', adding ...", block["blocked"], blocked)
1188                 if utils.process_block(row["domain"], blocked, block["reason"], block["block_level"]) and block["block_level"] == "reject" and config.get("bot_enabled"):
1189                     logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], row["domain"])
1190                     blockdict.append({
1191                         "blocked": blocked,
1192                         "reason" : block["reason"],
1193                     })
1194
1195         logger.info("domain='%s' has %d obfuscated domain(s)", row["domain"], obfuscated)
1196         if obfuscated == 0 and len(blocking) > 0:
1197             logger.info("Block list from domain='%s' has been fully deobfuscated.", row["domain"])
1198             instances.set_has_obfuscation(row["domain"], False)
1199
1200         if instances.has_pending(row["domain"]):
1201             logger.debug("Flushing updates for blocker='%s' ...", row["domain"])
1202             instances.update_data(row["domain"])
1203
1204         logger.debug("Invoking commit() ...")
1205         database.connection.commit()
1206
1207         logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
1208         if config.get("bot_enabled") and len(blockdict) > 0:
1209             logger.info("Sending bot POST for blocker='%s,blockdict()=%d ...", row["domain"], len(blockdict))
1210             network.send_bot_post(row["domain"], blockdict)
1211
1212     logger.debug("Success! - EXIT!")
1213     return 0