]> git.mxchange.org Git - fba.git/blob - fba/commands.py
Continued:
[fba.git] / fba / commands.py
1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
3 #
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
8 #
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 # GNU Affero General Public License for more details.
13 #
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program.  If not, see <https://www.gnu.org/licenses/>.
16
17 import csv
18 import inspect
19 import json
20 import logging
21 import time
22
23 import argparse
24 import atoma
25 import bs4
26 import markdown
27 import reqto
28 import validators
29
30 from fba import csrf
31 from fba import database
32 from fba import utils
33
34 from fba.helpers import blacklist
35 from fba.helpers import config
36 from fba.helpers import cookies
37 from fba.helpers import locking
38 from fba.helpers import software as software_helper
39 from fba.helpers import tidyup
40
41 from fba.http import federation
42 from fba.http import network
43
44 from fba.models import blocks
45 from fba.models import instances
46
47 from fba.networks import friendica
48 from fba.networks import lemmy
49 from fba.networks import mastodon
50 from fba.networks import misskey
51 from fba.networks import pleroma
52
53 logging.basicConfig(level=logging.INFO)
54 logger = logging.getLogger(__name__)
55 #logger.setLevel(logging.DEBUG)
56
57 def check_instance(args: argparse.Namespace) -> int:
58     logger.debug("args.domain='%s' - CALLED!", args.domain)
59     status = 0
60     if not validators.domain(args.domain):
61         logger.warning("args.domain='%s' is not valid", args.domain)
62         status = 100
63     elif blacklist.is_blacklisted(args.domain):
64         logger.warning("args.domain='%s' is blacklisted", args.domain)
65         status = 101
66     elif instances.is_registered(args.domain):
67         logger.warning("args.domain='%s' is already registered", args.domain)
68         status = 102
69     else:
70         logger.info("args.domain='%s' is not known", args.domain)
71
72     logger.debug("status=%d - EXIT!", status)
73     return status
74
75 def fetch_pixelfed_api(args: argparse.Namespace) -> int:
76     logger.debug("args[]='%s' - CALLED!", type(args))
77
78     # No CSRF by default, you don't have to add network.api_headers by yourself here
79     headers = tuple()
80
81     try:
82         logger.debug("Checking CSRF from pixelfed.org")
83         headers = csrf.determine("pixelfed.org", dict())
84     except network.exceptions as exception:
85         logger.warning("Exception '%s' during checking CSRF (fetch_peers,%s) - EXIT!", type(exception), __name__)
86         return list()
87
88     try:
89         logger.debug("Fetching JSON from pixelfed.org API, headers()=%d ...", len(headers))
90         fetched = network.get_json_api(
91             "pixelfed.org",
92             "/api/v1/servers/all.json?scope=All&country=all&language=all",
93             headers,
94             (config.get("connection_timeout"), config.get("read_timeout"))
95         )
96
97         logger.debug("JSON API returned %d elements", len(fetched))
98         if "error_message" in fetched:
99             logger.warning("API returned error_message='%s' - EXIT!", fetched["error_message"])
100             return 101
101         elif "data" not in fetched["json"]:
102             logger.warning("API did not return JSON with 'data' element - EXIT!")
103             return 102
104
105         rows = fetched["json"]["data"]
106         logger.info("Checking %d fetched rows ...", len(rows))
107         for row in rows:
108             logger.debug("row[]='%s'", type(row))
109             if "domain" not in row:
110                 logger.warning("row='%s' does not contain element 'domain' - SKIPPED!", row)
111                 continue
112             elif not utils.is_domain_wanted(row["domain"]):
113                 logger.debug("row[domain]='%s' is not wanted - SKIPPED!", row["domain"])
114                 continue
115             elif instances.is_registered(row["domain"]):
116                 logger.debug("row[domain]='%s' is already registered - SKIPPED!", row["domain"])
117                 continue
118             elif instances.is_recent(row["domain"]):
119                 logger.debug("row[domain]='%s' has been recently crawled - SKIPPED!", row["domain"])
120                 continue
121
122             logger.debug("Fetching instances from row[domain]='%s' ...", row["domain"])
123             federation.fetch_instances(row["domain"], None, None, inspect.currentframe().f_code.co_name)
124
125     except network.exceptions as exception:
126         logger.warning("Cannot fetch graphql,exception[%s]:'%s' - EXIT!", type(exception), str(exception))
127         return 103
128
129     logger.debug("Success! - EXIT!")
130     return 0
131
132 def fetch_bkali(args: argparse.Namespace) -> int:
133     logger.debug("args[]='%s' - CALLED!", type(args))
134     domains = list()
135     try:
136         fetched = network.post_json_api("gql.api.bka.li", "/v1/graphql", json.dumps({
137             "query": "query domainlist {nodeinfo(order_by: {domain: asc}) {domain}}"
138         }))
139
140         logger.debug("fetched[]='%s'", type(fetched))
141         if "error_message" in fetched:
142             logger.warning("post_json_api() for 'gql.api.bka.li' returned error message='%s", fetched["error_message"])
143             return 100
144         elif isinstance(fetched["json"], dict) and "error" in fetched["json"] and "message" in fetched["json"]["error"]:
145             logger.warning("post_json_api() returned error: '%s", fetched["error"]["message"])
146             return 101
147
148         rows = fetched["json"]
149
150         logger.debug("rows(%d)[]='%s'", len(rows), type(rows))
151         if len(rows) == 0:
152             raise Exception("WARNING: Returned no records")
153         elif "data" not in rows:
154             raise Exception(f"WARNING: rows()={len(rows)} does not contain key 'data'")
155         elif "nodeinfo" not in rows["data"]:
156             raise Exception(f"WARNING: rows()={len(rows['data'])} does not contain key 'nodeinfo'")
157
158         for entry in rows["data"]["nodeinfo"]:
159             logger.debug("entry[%s]='%s'", type(entry), entry)
160             if "domain" not in entry:
161                 logger.warning("entry()=%d does not contain 'domain' - SKIPPED!", len(entry))
162                 continue
163             elif not utils.is_domain_wanted(entry["domain"]):
164                 logger.debug("entry[domain]='%s' is not wanted - SKIPPED!", entry["domain"])
165                 continue
166             elif instances.is_registered(entry["domain"]):
167                 logger.debug("entry[domain]='%s' is already registered - SKIPPED!", entry["domain"])
168                 continue
169             elif instances.is_recent(entry["domain"]):
170                 logger.debug("entry[domain]='%s' has been recently crawled - SKIPPED!", entry["domain"])
171                 continue
172
173             logger.debug("Adding domain='%s' ...", entry["domain"])
174             domains.append(entry["domain"])
175
176     except network.exceptions as exception:
177         logger.warning("Cannot fetch graphql,exception[%s]:'%s' - EXIT!", type(exception), str(exception))
178         return 102
179
180     logger.debug("domains()=%d", len(domains))
181     if len(domains) > 0:
182         locking.acquire()
183
184         logger.info("Adding %d new instances ...", len(domains))
185         for domain in domains:
186             try:
187                 logger.info("Fetching instances from domain='%s' ...", domain)
188                 federation.fetch_instances(domain, 'tak.teleyal.blog', None, inspect.currentframe().f_code.co_name)
189             except network.exceptions as exception:
190                 logger.warning("Exception '%s' during fetching instances (fetch_bkali) from domain='%s'", type(exception), domain)
191                 instances.set_last_error(domain, exception)
192                 return 100
193
194     logger.debug("Success - EXIT!")
195     return 0
196
197 def fetch_blocks(args: argparse.Namespace) -> int:
198     logger.debug("args[]='%s' - CALLED!", type(args))
199     if args.domain is not None and args.domain != "":
200         logger.debug("args.domain='%s' - checking ...", args.domain)
201         if not validators.domain(args.domain):
202             logger.warning("args.domain='%s' is not valid.", args.domain)
203             return 100
204         elif blacklist.is_blacklisted(args.domain):
205             logger.warning("args.domain='%s' is blacklisted, won't check it!", args.domain)
206             return 101
207         elif not instances.is_registered(args.domain):
208             logger.warning("args.domain='%s' is not registered, please run ./utils.py fetch_instances '%s' first.", args.domain, args.domain)
209             return 102
210
211     locking.acquire()
212
213     if args.domain is not None and args.domain != "":
214         # Re-check single domain
215         logger.debug("Querying database for single args.domain='%s' ...", args.domain)
216         database.cursor.execute(
217             "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE domain = ?", [args.domain]
218         )
219     elif args.software is not None and args.software != "":
220         # Re-check single software
221         logger.debug("Querying database for args.software='%s' ...", args.software)
222         database.cursor.execute(
223             "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software = ? AND nodeinfo_url IS NOT NULL", [args.software]
224         )
225     else:
226         # Re-check after "timeout" (aka. minimum interval)
227         database.cursor.execute(
228             "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'lemmy', 'friendica', 'misskey') AND (last_blocked IS NULL OR last_blocked < ?) AND nodeinfo_url IS NOT NULL ORDER BY rowid DESC", [time.time() - config.get("recheck_block")]
229         )
230
231     rows = database.cursor.fetchall()
232     logger.info("Checking %d entries ...", len(rows))
233     for blocker, software, origin, nodeinfo_url in rows:
234         logger.debug("blocker='%s',software='%s',origin='%s',nodeinfo_url='%s'", blocker, software, origin, nodeinfo_url)
235         blocker = tidyup.domain(blocker)
236         logger.debug("blocker='%s' - AFTER!", blocker)
237
238         if blocker == "":
239             logger.warning("blocker is now empty!")
240             continue
241         elif nodeinfo_url is None or nodeinfo_url == "":
242             logger.debug("blocker='%s',software='%s' has empty nodeinfo_url", blocker, software)
243             continue
244         elif not utils.is_domain_wanted(blocker):
245             logger.warning("blocker='%s' is not wanted - SKIPPED!", blocker)
246             continue
247
248         logger.debug("blocker='%s'", blocker)
249         instances.set_last_blocked(blocker)
250         instances.set_has_obfuscation(blocker, False)
251
252         blocking = list()
253         blockdict = list()
254         if software == "pleroma":
255             logger.info("blocker='%s',software='%s'", blocker, software)
256             blocking = pleroma.fetch_blocks(blocker, nodeinfo_url)
257         elif software == "mastodon":
258             logger.info("blocker='%s',software='%s'", blocker, software)
259             blocking = mastodon.fetch_blocks(blocker, nodeinfo_url)
260         elif software == "lemmy":
261             logger.info("blocker='%s',software='%s'", blocker, software)
262             blocking = lemmy.fetch_blocks(blocker, nodeinfo_url)
263         elif software == "friendica":
264             logger.info("blocker='%s',software='%s'", blocker, software)
265             blocking = friendica.fetch_blocks(blocker)
266         elif software == "misskey":
267             logger.info("blocker='%s',software='%s'", blocker, software)
268             blocking = misskey.fetch_blocks(blocker)
269         else:
270             logger.warning("Unknown software: blocker='%s',software='%s'", blocker, software)
271
272         logger.info("Checking %d entries from blocker='%s',software='%s' ...", len(blocking), blocker, software)
273         blockdict = list()
274         for block in blocking:
275             logger.debug("blocked='%s',block_level='%s',reason='%s'", block["blocked"], block["block_level"], block["reason"])
276
277             if block["block_level"] == "":
278                 logger.warning("block_level is empty, blocker='%s',blocked='%s'", block["blocker"], block["blocked"])
279                 continue
280
281             logger.debug("blocked='%s',reason='%s' - BEFORE!", block["blocked"], block["reason"])
282             block["blocked"] = tidyup.domain(block["blocked"])
283             block["reason"]  = tidyup.reason(block["reason"]) if block["reason"] is not None and block["reason"] != "" else None
284             logger.debug("blocked='%s',reason='%s' - AFTER!", block["blocked"], block["reason"])
285
286             if block["blocked"] == "":
287                 logger.warning("blocked is empty, blocker='%s'", blocker)
288                 continue
289             elif block["blocked"].endswith(".onion"):
290                 logger.debug("blocked='%s' is a TOR .onion domain - SKIPPED", block["blocked"])
291                 continue
292             elif block["blocked"].endswith(".arpa"):
293                 logger.debug("blocked='%s' is a reverse IP address - SKIPPED", block["blocked"])
294                 continue
295             elif block["blocked"].endswith(".tld"):
296                 logger.debug("blocked='%s' is a fake domain - SKIPPED", block["blocked"])
297                 continue
298             elif block["blocked"].find("*") >= 0:
299                 logger.debug("blocker='%s' uses obfuscated domains", blocker)
300
301                 # Some friendica servers also obscure domains without hash
302                 row = instances.deobfuscate("*", block["blocked"], block["hash"] if "hash" in block else None)
303
304                 logger.debug("row[]='%s'", type(row))
305                 if row is None:
306                     logger.warning("Cannot deobfuscate blocked='%s',blocker='%s',software='%s' - SKIPPED!", block["blocked"], blocker, software)
307                     instances.set_has_obfuscation(blocker, True)
308                     continue
309
310                 block["blocked"] = row["domain"]
311                 origin           = row["origin"]
312                 nodeinfo_url     = row["nodeinfo_url"]
313             elif block["blocked"].find("?") >= 0:
314                 logger.debug("blocker='%s' uses obfuscated domains", blocker)
315
316                 # Some obscure them with question marks, not sure if that's dependent on version or not
317                 row = instances.deobfuscate("?", block["blocked"], block["hash"] if "hash" in block else None)
318
319                 logger.debug("row[]='%s'", type(row))
320                 if row is None:
321                     logger.warning("Cannot deobfuscate blocked='%s',blocker='%s',software='%s' - SKIPPED!", block["blocked"], blocker, software)
322                     instances.set_has_obfuscation(blocker, True)
323                     continue
324
325                 block["blocked"] = row["domain"]
326                 origin           = row["origin"]
327                 nodeinfo_url     = row["nodeinfo_url"]
328
329             logger.debug("Looking up instance by domainm, blocked='%s'", block["blocked"])
330             if not utils.is_domain_wanted(block["blocked"]):
331                 logger.debug("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
332                 continue
333             elif block["block_level"] in ["accept", "accepted"]:
334                 logger.debug("blocked='%s' is accepted, not wanted here - SKIPPED!", block["blocked"])
335                 continue
336             elif not instances.is_registered(block["blocked"]):
337                 logger.debug("Hash wasn't found, adding: blocked='%s',blocker='%s'", block["blocked"], blocker)
338                 federation.fetch_instances(block["blocked"], blocker, None, inspect.currentframe().f_code.co_name)
339
340             block["block_level"] = utils.alias_block_level(block["block_level"])
341
342             if utils.process_block(blocker, block["blocked"], block["reason"], block["block_level"]) and block["block_level"] == "reject" and config.get("bot_enabled"):
343                 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], blocker)
344                 blockdict.append({
345                     "blocked": block["blocked"],
346                     "reason" : block["reason"],
347                 })
348
349             logger.debug("Invoking cookies.clear(%s) ...", block["blocked"])
350             cookies.clear(block["blocked"])
351
352         logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
353         if instances.has_pending(blocker):
354             logger.debug("Flushing updates for blocker='%s' ...", blocker)
355             instances.update_data(blocker)
356
357         logger.debug("Invoking commit() ...")
358         database.connection.commit()
359
360         logger.debug("Invoking cookies.clear(%s) ...", blocker)
361         cookies.clear(blocker)
362
363         logger.debug("config[bot_enabled]='%s',blockdict()=%d'", config.get("bot_enabled"), len(blockdict))
364         if config.get("bot_enabled") and len(blockdict) > 0:
365             logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
366             network.send_bot_post(blocker, blockdict)
367
368     logger.debug("Success! - EXIT!")
369     return 0
370
371 def fetch_observer(args: argparse.Namespace) -> int:
372     logger.debug("args[]='%s' - CALLED!", type(args))
373
374     # Acquire lock
375     locking.acquire()
376
377     logger.info("Fetching software list ...")
378     raw = utils.fetch_url(
379         "https://fediverse.observer",
380         network.web_headers,
381         (config.get("connection_timeout"), config.get("read_timeout"))
382     ).text
383     logger.debug("raw[%s]()=%d", type(raw), len(raw))
384
385     doc = bs4.BeautifulSoup(raw, features="html.parser")
386     logger.debug("doc[]='%s'", type(doc))
387
388     items = doc.find("div", {"aria-labelledby": "navbarDropdownMenuSoftwares"}).findAll("a", {"class": "dropdown-item"})
389     logger.debug("items[]='%s'", type(items))
390
391     types = list()
392
393     logger.info("Checking %d menu items ...", len(items))
394     for item in items:
395         logger.debug("item[%s]='%s'", type(item), item)
396         if item.text.lower() == "all":
397             logger.debug("Skipping 'All' menu entry ...")
398             continue
399
400         logger.debug("Appending item.text='%s' ...", item.text)
401         types.append(tidyup.domain(item.text))
402
403     logger.info("Fetching %d different table data ...", len(types))
404     for software in types:
405         logger.debug("software='%s' - BEFORE!", software)
406         if args.software is not None and args.software != software:
407             logger.debug("args.software='%s' does not match software='%s' - SKIPPED!", args.software, software)
408             continue
409
410         doc = None
411         try:
412             logger.debug("Fetching table data for software='%s' ...", software)
413             raw = utils.fetch_url(
414                 f"https://fediverse.observer/app/views/tabledata.php?software={software}",
415                 network.web_headers,
416                 (config.get("connection_timeout"), config.get("read_timeout"))
417             ).text
418             logger.debug("raw[%s]()=%d", type(raw), len(raw))
419
420             doc = bs4.BeautifulSoup(raw, features="html.parser")
421             logger.debug("doc[]='%s'", type(doc))
422         except network.exceptions as exception:
423             logger.warning("Cannot fetch software='%s' from fediverse.observer: '%s'", software, type(exception))
424             continue
425
426         items = doc.findAll("a", {"class": "url"})
427         logger.info("Checking %d items,software='%s' ...", len(items), software)
428         for item in items:
429             logger.debug("item[]='%s'", type(item))
430             domain = item.decode_contents()
431
432             logger.debug("domain='%s'", domain)
433             if not utils.is_domain_wanted(domain):
434                 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
435                 continue
436             elif instances.is_registered(domain):
437                 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
438                 continue
439             elif instances.is_recent(domain):
440                 logger.debug("domain='%s' is recently being handled - SKIPPED!", domain)
441                 continue
442
443             software = software_helper.alias(software)
444             logger.info("Fetching instances for domain='%s'", domain)
445             federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
446
447     logger.debug("Success! - EXIT!")
448     return 0
449
450 def fetch_todon_wiki(args: argparse.Namespace) -> int:
451     logger.debug("args[]='%s' - CALLED!", type(args))
452
453     locking.acquire()
454     blocklist = {
455         "silenced": list(),
456         "reject": list(),
457     }
458
459     raw = utils.fetch_url("https://wiki.todon.eu/todon/domainblocks", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))).text
460     logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
461
462     doc = bs4.BeautifulSoup(raw, "html.parser")
463     logger.debug("doc[]='%s'", type(doc))
464
465     silenced = doc.find("h3", {"id": "silencedlimited_servers"}).find_next("ul").findAll("li")
466     logger.info("Checking %d silenced/limited entries ...", len(silenced))
467     blocklist["silenced"] = utils.find_domains(silenced, "div")
468
469     suspended = doc.find("h3", {"id": "suspended_servers"}).find_next("ul").findAll("li")
470     logger.info("Checking %d suspended entries ...", len(suspended))
471     blocklist["reject"] = utils.find_domains(suspended, "div")
472
473     blockdict = list()
474     for block_level in blocklist:
475         blockers = blocklist[block_level]
476
477         logger.debug("block_level='%s',blockers()=%d'", block_level, len(blockers))
478         for blocked in blockers:
479             logger.debug("blocked='%s'", blocked)
480
481             if not instances.is_registered(blocked):
482                 try:
483                     logger.info("Fetching instances from domain='%s' ...", blocked)
484                     federation.fetch_instances(blocked, 'chaos.social', None, inspect.currentframe().f_code.co_name)
485                 except network.exceptions as exception:
486                     logger.warning("Exception '%s' during fetching instances (fetch_cs) from blocked='%s'", type(exception), blocked)
487                     instances.set_last_error(blocked, exception)
488
489             if blocks.is_instance_blocked("todon.eu", blocked, block_level):
490                 logger.debug("blocked='%s',block_level='%s' is already blocked - SKIPPED!", blocked, block_level)
491                 continue
492
493             logger.info("Adding new block: blocked='%s',block_level='%s'", blocked, block_level)
494             if utils.process_block("todon.eu", blocked, None, block_level) and block_level == "reject" and config.get("bot_enabled"):
495                 logger.debug("Appending blocked='%s',reason='%s' for blocker='todon.eu' ...", blocked, block_level)
496                 blockdict.append({
497                     "blocked": blocked,
498                     "reason" : None,
499                 })
500
501         logger.debug("Invoking commit() ...")
502         database.connection.commit()
503
504         if config.get("bot_enabled") and len(blockdict) > 0:
505             logger.info("Sending bot POST for blocker='todon.eu',blockdict()=%d ...", len(blockdict))
506             network.send_bot_post("todon.eu", blockdict)
507
508     logger.debug("Success! - EXIT!")
509     return 0
510
511 def fetch_cs(args: argparse.Namespace):
512     logger.debug("args[]='%s' - CALLED!", type(args))
513     extensions = [
514         "extra",
515         "abbr",
516         "attr_list",
517         "def_list",
518         "fenced_code",
519         "footnotes",
520         "md_in_html",
521         "admonition",
522         "codehilite",
523         "legacy_attrs",
524         "legacy_em",
525         "meta",
526         "nl2br",
527         "sane_lists",
528         "smarty",
529         "toc",
530         "wikilinks"
531     ]
532
533     domains = {
534         "silenced": list(),
535         "reject"  : list(),
536     }
537
538     raw = utils.fetch_url("https://raw.githubusercontent.com/chaossocial/meta/master/federation.md", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))).text
539     logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
540
541     doc = bs4.BeautifulSoup(markdown.markdown(raw, extensions=extensions), features="html.parser")
542     logger.debug("doc()=%d[]='%s'", len(doc), type(doc))
543
544     silenced = doc.find("h2", {"id": "silenced-instances"}).findNext("table").find("tbody")
545     logger.debug("silenced[%s]()=%d", type(silenced), len(silenced))
546     domains["silenced"] = federation.find_domains(silenced)
547
548     blocked = doc.find("h2", {"id": "blocked-instances"}).findNext("table").find("tbody")
549     logger.debug("blocked[%s]()=%d", type(blocked), len(blocked))
550     domains["reject"] = federation.find_domains(blocked)
551
552     logger.debug("domains[silenced]()=%d,domains[reject]()=%d", len(domains["silenced"]), len(domains["reject"]))
553     blockdict = list()
554     if len(domains) > 0:
555         locking.acquire()
556
557         for block_level in domains:
558             logger.info("block_level='%s' has %d row(s)", block_level, len(domains[block_level]))
559
560             for row in domains[block_level]:
561                 logger.debug("row[%s]='%s'", type(row), row)
562                 if instances.is_recent(row["domain"], "last_blocked"):
563                     logger.debug("row[domain]='%s' has been recently crawled - SKIPPED!", row["domain"])
564                     continue
565                 elif not instances.is_registered(row["domain"]):
566                     try:
567                         logger.info("Fetching instances from domain='%s' ...", row["domain"])
568                         federation.fetch_instances(row["domain"], 'chaos.social', None, inspect.currentframe().f_code.co_name)
569                     except network.exceptions as exception:
570                         logger.warning("Exception '%s' during fetching instances (fetch_cs) from row[domain]='%s'", type(exception), row["domain"])
571                         instances.set_last_error(row["domain"], exception)
572
573                 if utils.process_block("chaos.social", row["domain"], row["reason"], block_level) and block_level == "reject" and config.get("bot_enabled"):
574                     logger.debug("Appending blocked='%s',reason='%s' for blocker='chaos.social' ...", row["domain"], block_level)
575                     blockdict.append({
576                         "blocked": row["domain"],
577                         "reason" : row["reason"],
578                     })
579
580         logger.debug("Invoking commit() ...")
581         database.connection.commit()
582
583         if config.get("bot_enabled") and len(blockdict) > 0:
584             logger.info("Sending bot POST for blocker='chaos.social',blockdict()=%d ...", len(blockdict))
585             network.send_bot_post("chaos.social", blockdict)
586
587     logger.debug("Success! - EXIT!")
588     return 0
589
590 def fetch_fba_rss(args: argparse.Namespace) -> int:
591     logger.debug("args[]='%s' - CALLED!", type(args))
592     domains = list()
593
594     logger.info("Fetch FBA-specific RSS args.feed='%s' ...", args.feed)
595     response = utils.fetch_url(args.feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
596
597     logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
598     if response.ok and response.status_code < 300 and len(response.text) > 0:
599         logger.debug("Parsing RSS feed (%d Bytes) ...", len(response.text))
600         rss = atoma.parse_rss_bytes(response.content)
601
602         logger.debug("rss[]='%s'", type(rss))
603         for item in rss.items:
604             logger.debug("item='%s'", item)
605             domain = item.link.split("=")[1]
606
607             if not utils.is_domain_wanted(domain):
608                 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
609                 continue
610             elif domain in domains:
611                 logger.debug("domain='%s' is already added - SKIPPED!", domain)
612                 continue
613             elif instances.is_registered(domain):
614                 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
615                 continue
616             elif instances.is_recent(domain):
617                 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
618                 continue
619
620             logger.debug("Adding domain='%s'", domain)
621             domains.append(domain)
622
623     logger.debug("domains()=%d", len(domains))
624     if len(domains) > 0:
625         locking.acquire()
626
627         logger.info("Adding %d new instances ...", len(domains))
628         for domain in domains:
629             try:
630                 logger.info("Fetching instances from domain='%s' ...", domain)
631                 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
632             except network.exceptions as exception:
633                 logger.warning("Exception '%s' during fetching instances (fetch_fba_rss) from domain='%s'", type(exception), domain)
634                 instances.set_last_error(domain, exception)
635                 return 100
636
637     logger.debug("Success! - EXIT!")
638     return 0
639
640 def fetch_fbabot_atom(args: argparse.Namespace) -> int:
641     logger.debug("args[]='%s' - CALLED!", type(args))
642     feed = "https://ryona.agency/users/fba/feed.atom"
643
644     domains = list()
645
646     logger.info("Fetching ATOM feed='%s' from FBA bot account ...", feed)
647     response = utils.fetch_url(feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
648
649     logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
650     if response.ok and response.status_code < 300 and len(response.text) > 0:
651         logger.debug("Parsing ATOM feed (%d Bytes) ...", len(response.text))
652         atom = atoma.parse_atom_bytes(response.content)
653
654         logger.debug("atom[]='%s'", type(atom))
655         for entry in atom.entries:
656             logger.debug("entry[]='%s'", type(entry))
657             doc = bs4.BeautifulSoup(entry.content.value, "html.parser")
658             logger.debug("doc[]='%s'", type(doc))
659             for element in doc.findAll("a"):
660                 for href in element["href"].split(","):
661                     logger.debug("href[%s]='%s", type(href), href)
662                     domain = tidyup.domain(href)
663
664                     logger.debug("domain='%s'", domain)
665                     if not utils.is_domain_wanted(domain):
666                         logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
667                         continue
668                     elif domain in domains:
669                         logger.debug("domain='%s' is already added - SKIPPED!", domain)
670                         continue
671                     elif instances.is_registered(domain):
672                         logger.debug("domain='%s' is already registered - SKIPPED!", domain)
673                         continue
674                     elif instances.is_recent(domain):
675                         logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
676                         continue
677
678                     logger.debug("Adding domain='%s',domains()=%d", domain, len(domains))
679                     domains.append(domain)
680
681     logger.debug("domains()=%d", len(domains))
682     if len(domains) > 0:
683         locking.acquire()
684
685         logger.info("Adding %d new instances ...", len(domains))
686         for domain in domains:
687             try:
688                 logger.info("Fetching instances from domain='%s' ...", domain)
689                 federation.fetch_instances(domain, "ryona.agency", None, inspect.currentframe().f_code.co_name)
690             except network.exceptions as exception:
691                 logger.warning("Exception '%s' during fetching instances (fetch_fbabot_atom) from domain='%s'", type(exception), domain)
692                 instances.set_last_error(domain, exception)
693                 return 100
694
695     logger.debug("Success! - EXIT!")
696     return 0
697
698 def fetch_instances(args: argparse.Namespace) -> int:
699     logger.debug("args[]='%s' - CALLED!", type(args))
700     locking.acquire()
701
702     # Initial fetch
703     try:
704         logger.info("Fetching instances from args.domain='%s' ...", args.domain)
705         federation.fetch_instances(args.domain, None, None, inspect.currentframe().f_code.co_name)
706     except network.exceptions as exception:
707         logger.warning("Exception '%s' during fetching instances (fetch_instances) from args.domain='%s'", type(exception), args.domain)
708         instances.set_last_error(args.domain, exception)
709         instances.update_data(args.domain)
710         return 100
711
712     if args.single:
713         logger.debug("Not fetching more instances - EXIT!")
714         return 0
715
716     # Loop through some instances
717     database.cursor.execute(
718         "SELECT domain, origin, software, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'lemmy', 'peertube', 'takahe') AND (last_instance_fetch IS NULL OR last_instance_fetch < ?) ORDER BY rowid DESC", [time.time() - config.get("recheck_instance")]
719     )
720
721     rows = database.cursor.fetchall()
722     logger.info("Checking %d entries ...", len(rows))
723     for row in rows:
724         logger.debug("domain='%s'", row["domain"])
725         if not utils.is_domain_wanted(row["domain"]):
726             logger.debug("Domain row[domain]='%s' is not wanted - SKIPPED!", row["domain"])
727             continue
728
729         try:
730             logger.info("Fetching instances for domain='%s',origin='%s',software='%s',nodeinfo_url='%s'", row["domain"], row["origin"], row["software"], row["nodeinfo_url"])
731             federation.fetch_instances(row["domain"], row["origin"], row["software"], inspect.currentframe().f_code.co_name, row["nodeinfo_url"])
732         except network.exceptions as exception:
733             logger.warning("Exception '%s' during fetching instances (fetch_instances) from row[domain]='%s'", type(exception), row["domain"])
734             instances.set_last_error(row["domain"], exception)
735
736     logger.debug("Success - EXIT!")
737     return 0
738
739 def fetch_oliphant(args: argparse.Namespace) -> int:
740     logger.debug("args[]='%s' - CALLED!", type(args))
741     locking.acquire()
742
743     # Base URL
744     base_url = "https://codeberg.org/oliphant/blocklists/raw/branch/main/blocklists"
745
746     # URLs to fetch
747     blocklists = (
748         {
749             "blocker": "artisan.chat",
750             "csv_url": "mastodon/artisan.chat.csv",
751         },{
752             "blocker": "mastodon.art",
753             "csv_url": "mastodon/mastodon.art.csv",
754         },{
755             "blocker": "pleroma.envs.net",
756             "csv_url": "mastodon/pleroma.envs.net.csv",
757         },{
758             "blocker": "oliphant.social",
759             "csv_url": "mastodon/_unified_tier3_blocklist.csv",
760         },{
761             "blocker": "mastodon.online",
762             "csv_url": "mastodon/mastodon.online.csv",
763         },{
764             "blocker": "mastodon.social",
765             "csv_url": "mastodon/mastodon.social.csv",
766         },{
767             "blocker": "mastodon.social",
768             "csv_url": "other/missing-tier0-mastodon.social.csv",
769         },{
770             "blocker": "rage.love",
771             "csv_url": "mastodon/rage.love.csv",
772         },{
773             "blocker": "sunny.garden",
774             "csv_url": "mastodon/sunny.garden.csv",
775         },{
776             "blocker": "solarpunk.moe",
777             "csv_url": "mastodon/solarpunk.moe.csv",
778         },{
779             "blocker": "toot.wales",
780             "csv_url": "mastodon/toot.wales.csv",
781         },{
782             "blocker": "union.place",
783             "csv_url": "mastodon/union.place.csv",
784         }
785     )
786
787     domains = list()
788
789     logger.debug("Downloading %d files ...", len(blocklists))
790     for block in blocklists:
791         # Is domain given and not equal blocker?
792         if isinstance(args.domain, str) and args.domain != block["blocker"]:
793             logger.debug("Skipping blocker='%s', not matching args.domain='%s'", block["blocker"], args.domain)
794             continue
795         elif args.domain in domains:
796             logger.debug("args.domain='%s' already handled - SKIPPED!", args.domain)
797             continue
798         elif instances.is_recent(block["blocker"]):
799             logger.debug("block[blocker]='%s' has been recently crawled - SKIPPED!", block["blocker"])
800             continue
801
802         # Fetch this URL
803         logger.info("Fetching csv_url='%s' for blocker='%s' ...", block["csv_url"], block["blocker"])
804         response = utils.fetch_url(f"{base_url}/{block['csv_url']}", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
805
806         logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
807         if not response.ok or response.status_code > 399 or response.content == "":
808             logger.warning("Could not fetch csv_url='%s' for blocker='%s' - SKIPPED!", block["csv_url"], block["blocker"])
809             continue
810
811         logger.debug("Fetched %d Bytes, parsing CSV ...", len(response.content))
812         reader = csv.DictReader(response.content.decode('utf-8').splitlines(), dialect="unix")
813
814         logger.debug("reader[]='%s'", type(reader))
815         blockdict = list()
816         for row in reader:
817             logger.debug("row[%s]='%s'", type(row), row)
818             domain = severity = None
819             reject_media = reject_reports = False
820             if "#domain" in row:
821                 domain = row["#domain"]
822             elif "domain" in row:
823                 domain = row["domain"]
824             else:
825                 logger.debug("row='%s' does not contain domain column", row)
826                 continue
827
828             if "#severity" in row:
829                 severity = row["#severity"]
830             elif "severity" in row:
831                 severity = row["severity"]
832             else:
833                 logger.debug("row='%s' does not contain severity column", row)
834                 continue
835
836             if "#reject_media" in row and row["#reject_media"].lower() == "true":
837                 reject_media = True
838             elif "reject_media" in row and row["reject_media"].lower() == "true":
839                 reject_media = True
840
841             if "#reject_reports" in row and row["#reject_reports"].lower() == "true":
842                 reject_reports = True
843             elif "reject_reports" in row and row["reject_reports"].lower() == "true":
844                 reject_reports = True
845
846             logger.debug("domain='%s',severity='%s',reject_media='%s',reject_reports='%s'", domain, severity, reject_media, reject_reports)
847             if not utils.is_domain_wanted(domain):
848                 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
849                 continue
850
851             logger.debug("Marking domain='%s' as handled", domain)
852             domains.append(domain)
853
854             logger.debug("Processing domain='%s' ...", domain)
855             processed = utils.process_domain(domain, block["blocker"], inspect.currentframe().f_code.co_name)
856             logger.debug("processed='%s'", processed)
857
858             if utils.process_block(block["blocker"], domain, None, "reject") and config.get("bot_enabled"):
859                 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", domain, block["block_level"], block["blocker"])
860                 blockdict.append({
861                     "blocked": domain,
862                     "reason" : block["reason"],
863                 })
864
865             if reject_media:
866                 utils.process_block(block["blocker"], domain, None, "reject_media")
867             if reject_reports:
868                 utils.process_block(block["blocker"], domain, None, "reject_reports")
869
870         logger.debug("Invoking commit() ...")
871         database.connection.commit()
872
873         if config.get("bot_enabled") and len(blockdict) > 0:
874             logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", block["blocker"], len(blockdict))
875             network.send_bot_post(block["blocker"], blockdict)
876
877     logger.debug("Success! - EXIT!")
878     return 0
879
880 def fetch_txt(args: argparse.Namespace) -> int:
881     logger.debug("args[]='%s' - CALLED!", type(args))
882     locking.acquire()
883
884     # Static URLs
885     urls = ({
886         "blocker": "seirdy.one",
887         "url"    : "https://seirdy.one/pb/bsl.txt",
888     },)
889
890     logger.info("Checking %d text file(s) ...", len(urls))
891     for row in urls:
892         logger.debug("Fetching row[url]='%s' ...", row["url"])
893         response = utils.fetch_url(row["url"], network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
894
895         logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
896         if response.ok and response.status_code < 300 and response.text != "":
897             logger.debug("Returned %d Bytes for processing", len(response.text.strip()))
898             domains = response.text.split("\n")
899
900             logger.info("Processing %d domains ...", len(domains))
901             for domain in domains:
902                 logger.debug("domain='%s'", domain)
903                 if domain == "":
904                     logger.debug("domain is empty - SKIPPED!")
905                     continue
906                 elif not utils.is_domain_wanted(domain):
907                     logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
908                     continue
909                 elif instances.is_recent(domain):
910                     logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
911                     continue
912
913                 logger.debug("Processing domain='%s',row[blocker]='%s'", domain, row["blocker"])
914                 processed = utils.process_domain(domain, row["blocker"], inspect.currentframe().f_code.co_name)
915
916                 logger.debug("processed='%s'", processed)
917                 if not processed:
918                     logger.debug("domain='%s' was not generically processed - SKIPPED!", domain)
919                     continue
920
921     logger.debug("Success! - EXIT!")
922     return 0
923
924 def fetch_fedipact(args: argparse.Namespace) -> int:
925     logger.debug("args[]='%s' - CALLED!", type(args))
926     locking.acquire()
927
928     response = utils.fetch_url("https://fedipact.online", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
929
930     logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
931     if response.ok and response.status_code < 300 and response.text != "":
932         logger.debug("Parsing %d Bytes ...", len(response.text))
933
934         doc = bs4.BeautifulSoup(response.text, "html.parser")
935         logger.debug("doc[]='%s'", type(doc))
936
937         rows = doc.findAll("li")
938         logger.info("Checking %d row(s) ...", len(rows))
939         for row in rows:
940             logger.debug("row[]='%s'", type(row))
941             domain = tidyup.domain(row.contents[0])
942
943             logger.debug("domain='%s'", domain)
944             if domain == "":
945                 logger.debug("domain is empty - SKIPPED!")
946                 continue
947             elif not utils.is_domain_wanted(domain):
948                 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
949                 continue
950             elif instances.is_registered(domain):
951                 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
952                 continue
953             elif instances.is_recent(domain):
954                 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
955                 continue
956
957             logger.info("Fetching domain='%s' ...", domain)
958             federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
959
960     logger.debug("Success! - EXIT!")
961     return 0
962
963 def fetch_joinfediverse(args: argparse.Namespace) -> int:
964     logger.debug("args[]='%s' - CALLED!", type(args))
965     locking.acquire()
966
967     raw = utils.fetch_url("https://joinfediverse.wiki/FediBlock", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))).text
968     logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
969
970     doc = bs4.BeautifulSoup(raw, "html.parser")
971     logger.debug("doc[]='%s'", type(doc))
972
973     tables = doc.findAll("table", {"class": "wikitable"})
974
975     logger.info("Analyzing %d table(s) ...", len(tables))
976     blocklist = list()
977     for table in tables:
978         logger.debug("table[]='%s'", type(table))
979
980         rows = table.findAll("tr")
981         logger.info("Checking %d row(s) ...", len(rows))
982         block_headers = dict()
983         for row in rows:
984             logger.debug("row[%s]='%s'", type(row), row)
985
986             headers = row.findAll("th")
987             logger.debug("Found headers()=%d header(s)", len(headers))
988             if len(headers) > 1:
989                 block_headers = dict()
990                 cnt = 0
991                 for header in headers:
992                     cnt = cnt + 1
993                     logger.debug("header[]='%s',cnt=%d", type(header), cnt)
994                     text = header.contents[0]
995
996                     logger.debug("text[]='%s'", type(text))
997                     if not isinstance(text, str):
998                         logger.debug("text[]='%s' is not 'str' - SKIPPED!", type(text))
999                         continue
1000                     elif validators.domain(text.strip()):
1001                         logger.debug("text='%s' is a domain - SKIPPED!", text.strip())
1002                         continue
1003
1004                     text = tidyup.domain(text.strip())
1005                     logger.debug("text='%s'", text)
1006                     if text in ["domain", "instance", "subdomain(s)", "block reason(s)"]:
1007                         logger.debug("Found header: '%s'=%d", text, cnt)
1008                         block_headers[cnt] = text
1009
1010             elif len(block_headers) == 0:
1011                 logger.debug("row is not scrapable - SKIPPED!")
1012                 continue
1013             elif len(block_headers) > 0:
1014                 logger.debug("Found a row with %d scrapable headers ...", len(block_headers))
1015                 cnt = 0
1016                 block = dict()
1017
1018                 for element in row.find_all(["th", "td"]):
1019                     cnt = cnt + 1
1020                     logger.debug("element[]='%s',cnt=%d", type(element), cnt)
1021                     if cnt in block_headers:
1022                         logger.debug("block_headers[%d]='%s'", cnt, block_headers[cnt])
1023
1024                         text = element.text.strip()
1025                         key = block_headers[cnt] if block_headers[cnt] not in ["domain", "instance"] else "blocked"
1026
1027                         logger.debug("cnt=%d is wanted: key='%s',text[%s]='%s'", cnt, key, type(text), text)
1028                         if key in ["domain", "instance"]:
1029                             block[key] = text
1030                         elif key == "reason":
1031                             block[key] = tidyup.reason(text)
1032                         elif key == "subdomain(s)":
1033                             block[key] = list()
1034                             if text != "":
1035                                 block[key] = text.split("/")
1036                         else:
1037                             logger.debug("key='%s'", key)
1038                             block[key] = text
1039
1040                 logger.debug("block()=%d ...", len(block))
1041                 if len(block) > 0:
1042                     logger.debug("Appending block()=%d ...", len(block))
1043                     blocklist.append(block)
1044
1045     logger.debug("blocklist()=%d", len(blocklist))
1046
1047     database.cursor.execute("SELECT domain FROM instances WHERE domain LIKE 'climatejustice.%'")
1048     domains = database.cursor.fetchall()
1049
1050     logger.debug("domains(%d)[]='%s'", len(domains), type(domains))
1051     blocking = list()
1052     for block in blocklist:
1053         logger.debug("block='%s'", block)
1054         if "subdomain(s)" in block and len(block["subdomain(s)"]) > 0:
1055             origin = block["blocked"]
1056             for subdomain in block["subdomain(s)"]:
1057                 block["blocked"] = subdomain + "." + origin
1058                 blocking.append(block)
1059         else:
1060             blocking.append(block)
1061
1062     logger.debug("blocking()=%d", blocking)
1063     for block in blocking:
1064         block["blocked"] = tidyup.domain(block["blocked"])
1065
1066         if not utils.is_domain_wanted(block["blocked"]):
1067             logger.debug("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
1068             continue
1069         elif instances.is_recent(block["blocked"]):
1070             logger.debug("blocked='%s' has been recently checked - SKIPPED!", block["blocked"])
1071             continue
1072
1073         logger.info("Proccessing blocked='%s' ...", block["blocked"])
1074         utils.process_domain(block["blocked"], "climatejustice.social", inspect.currentframe().f_code.co_name)
1075
1076     blockdict = list()
1077     for blocker in domains:
1078         blocker = blocker[0]
1079         logger.debug("blocker[%s]='%s'", type(blocker), blocker)
1080
1081         for block in blocking:
1082             block["reason"] = tidyup.reason(block["block reason(s)"]) if "block reason(s)" in block else None
1083
1084             if not utils.is_domain_wanted(block["blocked"]):
1085                 logger.debug("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
1086                 continue
1087
1088             logger.debug("blocked='%s',reason='%s'", block["blocked"], block["reason"])
1089             if utils.process_block(blocker, block["blocked"], block["reason"], "reject") and config.get("bot_enabled"):
1090                 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], blocker)
1091                 blockdict.append({
1092                     "blocked": block["blocked"],
1093                     "reason" : block["reason"],
1094                 })
1095
1096         if instances.has_pending(blocker):
1097             logger.debug("Flushing updates for blocker='%s' ...", blocker)
1098             instances.update_data(blocker)
1099
1100         logger.debug("Invoking commit() ...")
1101         database.connection.commit()
1102
1103         if config.get("bot_enabled") and len(blockdict) > 0:
1104             logger.info("Sending bot POST for blocker='%s,blockdict()=%d ...", blocker, len(blockdict))
1105             network.send_bot_post(blocker, blockdict)
1106
1107     logger.debug("Success! - EXIT!")
1108     return 0
1109
1110 def recheck_obfuscation(args: argparse.Namespace) -> int:
1111     logger.debug("args[]='%s' - CALLED!", type(args))
1112
1113     locking.acquire()
1114
1115     if isinstance(args.domain, str) and args.domain != "" and utils.is_domain_wanted(args.domain):
1116         database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1 AND domain = ?", [args.domain])
1117     elif isinstance(args.software, str) and args.software != "" and validators.domain(args.software) == args.software:
1118         database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1 AND software = ?", [args.software])
1119     else:
1120         database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1")
1121
1122     rows = database.cursor.fetchall()
1123     logger.info("Checking %d domains ...", len(rows))
1124     for row in rows:
1125         logger.debug("Fetching peers from domain='%s',software='%s',nodeinfo_url='%s' ...", row["domain"], row["software"], row["nodeinfo_url"])
1126
1127         blocking = list()
1128         if row["software"] == "pleroma":
1129             logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1130             blocking = pleroma.fetch_blocks(row["domain"], row["nodeinfo_url"])
1131         elif row["software"] == "mastodon":
1132             logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1133             blocking = mastodon.fetch_blocks(row["domain"], row["nodeinfo_url"])
1134         elif row["software"] == "lemmy":
1135             logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1136             blocking = lemmy.fetch_blocks(row["domain"], row["nodeinfo_url"])
1137         elif row["software"] == "friendica":
1138             logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1139             blocking = friendica.fetch_blocks(row["domain"])
1140         elif row["software"] == "misskey":
1141             logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1142             blocking = misskey.fetch_blocks(row["domain"])
1143         else:
1144             logger.warning("Unknown sofware: domain='%s',software='%s'", row["domain"], row["software"])
1145
1146         logger.info("Checking %d block(s) from domain='%s' ...", len(blocking), row["domain"])
1147         obfuscated = 0
1148         blockdict = list()
1149         for block in blocking:
1150             logger.debug("blocked='%s'", block["blocked"])
1151             blocked = None
1152
1153             if block["blocked"].endswith(".arpa"):
1154                 logger.debug("blocked='%s' is a reversed IP address - SKIPPED!", block["blocked"])
1155                 continue
1156             elif block["blocked"].endswith(".tld"):
1157                 logger.debug("blocked='%s' is a fake domain name - SKIPPED!", block["blocked"])
1158                 continue
1159             elif block["blocked"].endswith(".onion"):
1160                 logger.debug("blocked='%s' is a TOR onion domain name - SKIPPED!", block["blocked"])
1161                 continue
1162             elif block["blocked"].find("*") >= 0 or block["blocked"].find("?") >= 0:
1163                 logger.debug("block='%s' is obfuscated.", block["blocked"])
1164                 obfuscated = obfuscated + 1
1165                 blocked = utils.deobfuscate_domain(block["blocked"], row["domain"], block["hash"] if "hash" in block else None)
1166             elif not utils.is_domain_wanted(block["blocked"]):
1167                 logger.debug("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
1168                 continue
1169             elif blocks.is_instance_blocked(row["domain"], block["blocked"]):
1170                 logger.debug("blocked='%s' is already blocked - SKIPPED!", block["blocked"])
1171                 continue
1172
1173             logger.debug("blocked[%s]='%s',block[blocked]='%s'", type(blocked), blocked, block["blocked"])
1174             if blocked is not None and blocked != block["blocked"]:
1175                 logger.debug("blocked='%s' was deobfuscated to blocked='%s'", block["blocked"], blocked)
1176                 obfuscated = obfuscated - 1
1177                 if blocks.is_instance_blocked(row["domain"], blocked):
1178                     logger.debug("blocked='%s' is already blocked by domain='%s' - SKIPPED!", blocked, row["domain"])
1179                     continue
1180
1181                 block["block_level"] = utils.alias_block_level(block["block_level"])
1182
1183                 logger.info("blocked='%s' has been deobfuscated to blocked='%s', adding ...", block["blocked"], blocked)
1184                 if utils.process_block(row["domain"], blocked, block["reason"], block["block_level"]) and block["block_level"] == "reject" and config.get("bot_enabled"):
1185                     logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], row["domain"])
1186                     blockdict.append({
1187                         "blocked": blocked,
1188                         "reason" : block["reason"],
1189                     })
1190
1191         logger.info("domain='%s' has %d obfuscated domain(s)", row["domain"], obfuscated)
1192         if obfuscated == 0 and len(blocking) > 0:
1193             logger.info("Block list from domain='%s' has been fully deobfuscated.", row["domain"])
1194             instances.set_has_obfuscation(row["domain"], False)
1195
1196         if instances.has_pending(row["domain"]):
1197             logger.debug("Flushing updates for blocker='%s' ...", row["domain"])
1198             instances.update_data(row["domain"])
1199
1200         logger.debug("Invoking commit() ...")
1201         database.connection.commit()
1202
1203         if config.get("bot_enabled") and len(blockdict) > 0:
1204             logger.info("Sending bot POST for blocker='%s,blockdict()=%d ...", row["domain"], len(blockdict))
1205             network.send_bot_post(row["domain"], blockdict)
1206
1207     logger.debug("Success! - EXIT!")
1208     return 0