]> git.mxchange.org Git - fba.git/blob - fba/commands.py
5a59379da16571518780777fdd174aab5c476f33
[fba.git] / fba / commands.py
1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
3 #
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
8 #
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 # GNU Affero General Public License for more details.
13 #
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program.  If not, see <https://www.gnu.org/licenses/>.
16
17 import csv
18 import inspect
19 import json
20 import logging
21 import time
22
23 import argparse
24 import atoma
25 import bs4
26 import markdown
27 import reqto
28 import validators
29
30 from fba import csrf
31 from fba import database
32 from fba import utils
33
34 from fba.helpers import blacklist
35 from fba.helpers import config
36 from fba.helpers import cookies
37 from fba.helpers import locking
38 from fba.helpers import software as software_helper
39 from fba.helpers import tidyup
40
41 from fba.http import federation
42 from fba.http import network
43
44 from fba.models import blocks
45 from fba.models import instances
46
47 from fba.networks import friendica
48 from fba.networks import lemmy
49 from fba.networks import mastodon
50 from fba.networks import misskey
51 from fba.networks import pleroma
52
53 logging.basicConfig(level=logging.INFO)
54 logger = logging.getLogger(__name__)
55 #logger.setLevel(logging.DEBUG)
56
57 def check_instance(args: argparse.Namespace) -> int:
58     logger.debug("args.domain='%s' - CALLED!", args.domain)
59     status = 0
60     if not validators.domain(args.domain):
61         logger.warning("args.domain='%s' is not valid", args.domain)
62         status = 100
63     elif blacklist.is_blacklisted(args.domain):
64         logger.warning("args.domain='%s' is blacklisted", args.domain)
65         status = 101
66     elif instances.is_registered(args.domain):
67         logger.warning("args.domain='%s' is already registered", args.domain)
68         status = 102
69     else:
70         logger.info("args.domain='%s' is not known", args.domain)
71
72     logger.debug("status=%d - EXIT!", status)
73     return status
74
75 def check_nodeinfo(args: argparse.Namespace) -> int:
76     logger.debug("args[]='%s' - CALLED!", type(args))
77
78     # Fetch rows
79     database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE nodeinfo_url IS NOT NULL ORDER BY domain ASC")
80
81     cnt = 0
82     for row in database.cursor.fetchall():
83         logger.debug("Checking row[domain]='%s',row[software]='%s',row[nodeinfo_url]='%s' ...", row["domain"], row["software"], row["nodeinfo_url"])
84         punycode = row["domain"].encode("idna").decode("utf-8")
85
86         if row["nodeinfo_url"].startswith("/"):
87             logger.debug("row[nodeinfo_url]='%s' is a relative URL and always matches", row["nodeinfo_url"])
88             continue
89         elif row["nodeinfo_url"].find(punycode) == -1 and row["nodeinfo_url"].find(row["domain"]) == -1:
90             logger.warning("punycode='%s' is not found in row[nodeinfo_url]='%s',row[software]='%s'", punycode, row["nodeinfo_url"], row["software"])
91             cnt = cnt + 1
92
93     logger.info("Found %d row(s)", cnt)
94
95     logger.debug("EXIT!")
96     return 0
97
98 def fetch_pixelfed_api(args: argparse.Namespace) -> int:
99     logger.debug("args[]='%s' - CALLED!", type(args))
100
101     # No CSRF by default, you don't have to add network.api_headers by yourself here
102     headers = tuple()
103
104     try:
105         logger.debug("Checking CSRF from pixelfed.org")
106         headers = csrf.determine("pixelfed.org", dict())
107     except network.exceptions as exception:
108         logger.warning("Exception '%s' during checking CSRF (fetch_peers,%s) - EXIT!", type(exception), __name__)
109         return list()
110
111     try:
112         logger.debug("Fetching JSON from pixelfed.org API, headers()=%d ...", len(headers))
113         fetched = network.get_json_api(
114             "pixelfed.org",
115             "/api/v1/servers/all.json?scope=All&country=all&language=all",
116             headers,
117             (config.get("connection_timeout"), config.get("read_timeout"))
118         )
119
120         logger.debug("JSON API returned %d elements", len(fetched))
121         if "error_message" in fetched:
122             logger.warning("API returned error_message='%s' - EXIT!", fetched["error_message"])
123             return 101
124         elif "data" not in fetched["json"]:
125             logger.warning("API did not return JSON with 'data' element - EXIT!")
126             return 102
127
128         rows = fetched["json"]["data"]
129         logger.info("Checking %d fetched rows ...", len(rows))
130         for row in rows:
131             logger.debug("row[]='%s'", type(row))
132             if "domain" not in row:
133                 logger.warning("row='%s' does not contain element 'domain' - SKIPPED!", row)
134                 continue
135             elif row["domain"] == "":
136                 logger.debug("row[domain] is empty - SKIPPED!")
137                 continue
138             elif not utils.is_domain_wanted(row["domain"]):
139                 logger.warning("row[domain]='%s' is not wanted - SKIPPED!", row["domain"])
140                 continue
141             elif instances.is_registered(row["domain"]):
142                 logger.debug("row[domain]='%s' is already registered - SKIPPED!", row["domain"])
143                 continue
144             elif instances.is_recent(row["domain"]):
145                 logger.debug("row[domain]='%s' has been recently crawled - SKIPPED!", row["domain"])
146                 continue
147
148             logger.debug("Fetching instances from row[domain]='%s' ...", row["domain"])
149             federation.fetch_instances(row["domain"], None, None, inspect.currentframe().f_code.co_name)
150
151     except network.exceptions as exception:
152         logger.warning("Cannot fetch graphql,exception[%s]:'%s' - EXIT!", type(exception), str(exception))
153         return 103
154
155     logger.debug("Success! - EXIT!")
156     return 0
157
158 def fetch_bkali(args: argparse.Namespace) -> int:
159     logger.debug("args[]='%s' - CALLED!", type(args))
160     domains = list()
161     try:
162         fetched = network.post_json_api("gql.api.bka.li", "/v1/graphql", json.dumps({
163             "query": "query domainlist {nodeinfo(order_by: {domain: asc}) {domain}}"
164         }))
165
166         logger.debug("fetched[]='%s'", type(fetched))
167         if "error_message" in fetched:
168             logger.warning("post_json_api() for 'gql.api.bka.li' returned error message='%s", fetched["error_message"])
169             return 100
170         elif isinstance(fetched["json"], dict) and "error" in fetched["json"] and "message" in fetched["json"]["error"]:
171             logger.warning("post_json_api() returned error: '%s", fetched["error"]["message"])
172             return 101
173
174         rows = fetched["json"]
175
176         logger.debug("rows(%d)[]='%s'", len(rows), type(rows))
177         if len(rows) == 0:
178             raise Exception("WARNING: Returned no records")
179         elif "data" not in rows:
180             raise Exception(f"WARNING: rows()={len(rows)} does not contain key 'data'")
181         elif "nodeinfo" not in rows["data"]:
182             raise Exception(f"WARNING: rows()={len(rows['data'])} does not contain key 'nodeinfo'")
183
184         for entry in rows["data"]["nodeinfo"]:
185             logger.debug("entry[%s]='%s'", type(entry), entry)
186             if "domain" not in entry:
187                 logger.warning("entry()=%d does not contain 'domain' - SKIPPED!", len(entry))
188                 continue
189             elif entry["domain"] == "":
190                 logger.debug("entry[domain] is empty - SKIPPED!")
191                 continue
192             elif not utils.is_domain_wanted(entry["domain"]):
193                 logger.warning("entry[domain]='%s' is not wanted - SKIPPED!", entry["domain"])
194                 continue
195             elif instances.is_registered(entry["domain"]):
196                 logger.debug("entry[domain]='%s' is already registered - SKIPPED!", entry["domain"])
197                 continue
198             elif instances.is_recent(entry["domain"]):
199                 logger.debug("entry[domain]='%s' has been recently crawled - SKIPPED!", entry["domain"])
200                 continue
201
202             logger.debug("Adding domain='%s' ...", entry["domain"])
203             domains.append(entry["domain"])
204
205     except network.exceptions as exception:
206         logger.warning("Cannot fetch graphql,exception[%s]:'%s' - EXIT!", type(exception), str(exception))
207         return 102
208
209     logger.debug("domains()=%d", len(domains))
210     if len(domains) > 0:
211         locking.acquire()
212
213         logger.info("Adding %d new instances ...", len(domains))
214         for domain in domains:
215             try:
216                 logger.info("Fetching instances from domain='%s' ...", domain)
217                 federation.fetch_instances(domain, 'tak.teleyal.blog', None, inspect.currentframe().f_code.co_name)
218             except network.exceptions as exception:
219                 logger.warning("Exception '%s' during fetching instances (fetch_bkali) from domain='%s'", type(exception), domain)
220                 instances.set_last_error(domain, exception)
221                 return 100
222
223     logger.debug("Success - EXIT!")
224     return 0
225
226 def fetch_blocks(args: argparse.Namespace) -> int:
227     logger.debug("args[]='%s' - CALLED!", type(args))
228     if args.domain is not None and args.domain != "":
229         logger.debug("args.domain='%s' - checking ...", args.domain)
230         if not validators.domain(args.domain):
231             logger.warning("args.domain='%s' is not valid.", args.domain)
232             return 100
233         elif blacklist.is_blacklisted(args.domain):
234             logger.warning("args.domain='%s' is blacklisted, won't check it!", args.domain)
235             return 101
236         elif not instances.is_registered(args.domain):
237             logger.warning("args.domain='%s' is not registered, please run ./utils.py fetch_instances '%s' first.", args.domain, args.domain)
238             return 102
239
240     locking.acquire()
241
242     if args.domain is not None and args.domain != "":
243         # Re-check single domain
244         logger.debug("Querying database for single args.domain='%s' ...", args.domain)
245         database.cursor.execute(
246             "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE domain = ?", [args.domain]
247         )
248     elif args.software is not None and args.software != "":
249         # Re-check single software
250         logger.debug("Querying database for args.software='%s' ...", args.software)
251         database.cursor.execute(
252             "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software = ? AND nodeinfo_url IS NOT NULL", [args.software]
253         )
254     else:
255         # Re-check after "timeout" (aka. minimum interval)
256         database.cursor.execute(
257             "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'lemmy', 'friendica', 'misskey') AND (last_blocked IS NULL OR last_blocked < ?) AND nodeinfo_url IS NOT NULL ORDER BY rowid DESC", [time.time() - config.get("recheck_block")]
258         )
259
260     rows = database.cursor.fetchall()
261     logger.info("Checking %d entries ...", len(rows))
262     for blocker, software, origin, nodeinfo_url in rows:
263         logger.debug("blocker='%s',software='%s',origin='%s',nodeinfo_url='%s'", blocker, software, origin, nodeinfo_url)
264         blocker = tidyup.domain(blocker)
265         logger.debug("blocker='%s' - AFTER!", blocker)
266
267         if blocker == "":
268             logger.warning("blocker is now empty!")
269             continue
270         elif nodeinfo_url is None or nodeinfo_url == "":
271             logger.debug("blocker='%s',software='%s' has empty nodeinfo_url", blocker, software)
272             continue
273         elif not utils.is_domain_wanted(blocker):
274             logger.warning("blocker='%s' is not wanted - SKIPPED!", blocker)
275             continue
276
277         logger.debug("blocker='%s'", blocker)
278         instances.set_last_blocked(blocker)
279         instances.set_has_obfuscation(blocker, False)
280
281         blocking = list()
282         blockdict = list()
283         if software == "pleroma":
284             logger.info("blocker='%s',software='%s'", blocker, software)
285             blocking = pleroma.fetch_blocks(blocker, nodeinfo_url)
286         elif software == "mastodon":
287             logger.info("blocker='%s',software='%s'", blocker, software)
288             blocking = mastodon.fetch_blocks(blocker, nodeinfo_url)
289         elif software == "lemmy":
290             logger.info("blocker='%s',software='%s'", blocker, software)
291             blocking = lemmy.fetch_blocks(blocker, nodeinfo_url)
292         elif software == "friendica":
293             logger.info("blocker='%s',software='%s'", blocker, software)
294             blocking = friendica.fetch_blocks(blocker)
295         elif software == "misskey":
296             logger.info("blocker='%s',software='%s'", blocker, software)
297             blocking = misskey.fetch_blocks(blocker)
298         else:
299             logger.warning("Unknown software: blocker='%s',software='%s'", blocker, software)
300
301         logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
302         instances.set_total_blocks(blocker, blocking)
303
304         logger.info("Checking %d entries from blocker='%s',software='%s' ...", len(blocking), blocker, software)
305         blockdict = list()
306         for block in blocking:
307             logger.debug("blocked='%s',block_level='%s',reason='%s'", block["blocked"], block["block_level"], block["reason"])
308
309             if block["block_level"] == "":
310                 logger.warning("block_level is empty, blocker='%s',blocked='%s'", block["blocker"], block["blocked"])
311                 continue
312
313             logger.debug("blocked='%s',reason='%s' - BEFORE!", block["blocked"], block["reason"])
314             block["blocked"] = tidyup.domain(block["blocked"])
315             block["reason"]  = tidyup.reason(block["reason"]) if block["reason"] is not None and block["reason"] != "" else None
316             logger.debug("blocked='%s',reason='%s' - AFTER!", block["blocked"], block["reason"])
317
318             if block["blocked"] == "":
319                 logger.warning("blocked is empty, blocker='%s'", blocker)
320                 continue
321             elif block["blocked"].endswith(".onion"):
322                 logger.debug("blocked='%s' is a TOR .onion domain - SKIPPED", block["blocked"])
323                 continue
324             elif block["blocked"].endswith(".arpa"):
325                 logger.debug("blocked='%s' is a reverse IP address - SKIPPED", block["blocked"])
326                 continue
327             elif block["blocked"].endswith(".tld"):
328                 logger.debug("blocked='%s' is a fake domain - SKIPPED", block["blocked"])
329                 continue
330             elif block["blocked"].find("*") >= 0:
331                 logger.debug("blocker='%s' uses obfuscated domains", blocker)
332
333                 # Some friendica servers also obscure domains without hash
334                 row = instances.deobfuscate("*", block["blocked"], block["hash"] if "hash" in block else None)
335
336                 logger.debug("row[]='%s'", type(row))
337                 if row is None:
338                     logger.warning("Cannot deobfuscate blocked='%s',blocker='%s',software='%s' - SKIPPED!", block["blocked"], blocker, software)
339                     instances.set_has_obfuscation(blocker, True)
340                     continue
341
342                 block["blocked"] = row["domain"]
343                 origin           = row["origin"]
344                 nodeinfo_url     = row["nodeinfo_url"]
345             elif block["blocked"].find("?") >= 0:
346                 logger.debug("blocker='%s' uses obfuscated domains", blocker)
347
348                 # Some obscure them with question marks, not sure if that's dependent on version or not
349                 row = instances.deobfuscate("?", block["blocked"], block["hash"] if "hash" in block else None)
350
351                 logger.debug("row[]='%s'", type(row))
352                 if row is None:
353                     logger.warning("Cannot deobfuscate blocked='%s',blocker='%s',software='%s' - SKIPPED!", block["blocked"], blocker, software)
354                     instances.set_has_obfuscation(blocker, True)
355                     continue
356
357                 block["blocked"] = row["domain"]
358                 origin           = row["origin"]
359                 nodeinfo_url     = row["nodeinfo_url"]
360
361             logger.debug("Looking up instance by domainm, blocked='%s'", block["blocked"])
362             if block["blocked"] == "":
363                 logger.debug("block[blocked] is empty - SKIPPED!")
364                 continue
365             elif not utils.is_domain_wanted(block["blocked"]):
366                 logger.warning("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
367                 continue
368             elif block["block_level"] in ["accept", "accepted"]:
369                 logger.debug("blocked='%s' is accepted, not wanted here - SKIPPED!", block["blocked"])
370                 continue
371             elif not instances.is_registered(block["blocked"]):
372                 logger.debug("Hash wasn't found, adding: blocked='%s',blocker='%s'", block["blocked"], blocker)
373                 federation.fetch_instances(block["blocked"], blocker, None, inspect.currentframe().f_code.co_name)
374
375             block["block_level"] = utils.alias_block_level(block["block_level"])
376
377             if utils.process_block(blocker, block["blocked"], block["reason"], block["block_level"]) and block["block_level"] == "reject" and config.get("bot_enabled"):
378                 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], blocker)
379                 blockdict.append({
380                     "blocked": block["blocked"],
381                     "reason" : block["reason"],
382                 })
383
384             logger.debug("Invoking cookies.clear(%s) ...", block["blocked"])
385             cookies.clear(block["blocked"])
386
387         logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
388         if instances.has_pending(blocker):
389             logger.debug("Flushing updates for blocker='%s' ...", blocker)
390             instances.update_data(blocker)
391
392         logger.debug("Invoking commit() ...")
393         database.connection.commit()
394
395         logger.debug("Invoking cookies.clear(%s) ...", blocker)
396         cookies.clear(blocker)
397
398         logger.debug("config.get(bot_enabled)='%s',blockdict()=%d'", config.get("bot_enabled"), len(blockdict))
399         if config.get("bot_enabled") and len(blockdict) > 0:
400             logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
401             network.send_bot_post(blocker, blockdict)
402
403     logger.debug("Success! - EXIT!")
404     return 0
405
406 def fetch_observer(args: argparse.Namespace) -> int:
407     logger.debug("args[]='%s' - CALLED!", type(args))
408
409     # Acquire lock
410     locking.acquire()
411
412     types = list()
413     if args.software is None:
414         logger.info("Fetching software list ...")
415         raw = utils.fetch_url(
416             "https://fediverse.observer",
417             network.web_headers,
418             (config.get("connection_timeout"), config.get("read_timeout"))
419         ).text
420         logger.debug("raw[%s]()=%d", type(raw), len(raw))
421
422         doc = bs4.BeautifulSoup(raw, features="html.parser")
423         logger.debug("doc[]='%s'", type(doc))
424
425         items = doc.find("div", {"aria-labelledby": "navbarDropdownMenuSoftwares"}).findAll("a", {"class": "dropdown-item"})
426         logger.debug("items[]='%s'", type(items))
427
428         logger.info("Checking %d menu items ...", len(items))
429         for item in items:
430             logger.debug("item[%s]='%s'", type(item), item)
431             if item.text.lower() == "all":
432                 logger.debug("Skipping 'All' menu entry ...")
433                 continue
434
435             logger.debug("Appending item.text='%s' ...", item.text)
436             types.append(tidyup.domain(item.text))
437     else:
438         logger.info("Adding args.software='%s' as type ...", args.software)
439         types.append(args.software)
440
441     logger.info("Fetching %d different table data ...", len(types))
442     for software in types:
443         logger.debug("software='%s' - BEFORE!", software)
444         if args.software is not None and args.software != software:
445             logger.debug("args.software='%s' does not match software='%s' - SKIPPED!", args.software, software)
446             continue
447
448         doc = None
449         try:
450             logger.debug("Fetching table data for software='%s' ...", software)
451             raw = utils.fetch_url(
452                 f"https://fediverse.observer/app/views/tabledata.php?software={software}",
453                 network.web_headers,
454                 (config.get("connection_timeout"), config.get("read_timeout"))
455             ).text
456             logger.debug("raw[%s]()=%d", type(raw), len(raw))
457
458             doc = bs4.BeautifulSoup(raw, features="html.parser")
459             logger.debug("doc[]='%s'", type(doc))
460         except network.exceptions as exception:
461             logger.warning("Cannot fetch software='%s' from fediverse.observer: '%s'", software, type(exception))
462             continue
463
464         items = doc.findAll("a", {"class": "url"})
465         logger.info("Checking %d items,software='%s' ...", len(items), software)
466         for item in items:
467             logger.debug("item[]='%s'", type(item))
468             domain = item.decode_contents()
469
470             logger.debug("domain='%s' - AFTER!", domain)
471             if domain == "":
472                 logger.debug("domain is empty - SKIPPED!")
473                 continue
474             elif not utils.is_domain_wanted(domain):
475                 logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
476                 continue
477             elif instances.is_registered(domain):
478                 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
479                 continue
480             elif instances.is_recent(domain):
481                 logger.debug("domain='%s' is recently being handled - SKIPPED!", domain)
482                 continue
483
484             software = software_helper.alias(software)
485             logger.info("Fetching instances for domain='%s'", domain)
486             federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
487
488     logger.debug("Success! - EXIT!")
489     return 0
490
491 def fetch_todon_wiki(args: argparse.Namespace) -> int:
492     logger.debug("args[]='%s' - CALLED!", type(args))
493
494     locking.acquire()
495     blocklist = {
496         "silenced": list(),
497         "reject": list(),
498     }
499
500     raw = utils.fetch_url("https://wiki.todon.eu/todon/domainblocks", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))).text
501     logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
502
503     doc = bs4.BeautifulSoup(raw, "html.parser")
504     logger.debug("doc[]='%s'", type(doc))
505
506     silenced = doc.find("h3", {"id": "silencedlimited_servers"}).find_next("ul").findAll("li")
507     logger.info("Checking %d silenced/limited entries ...", len(silenced))
508     blocklist["silenced"] = utils.find_domains(silenced, "div")
509
510     suspended = doc.find("h3", {"id": "suspended_servers"}).find_next("ul").findAll("li")
511     logger.info("Checking %d suspended entries ...", len(suspended))
512     blocklist["reject"] = utils.find_domains(suspended, "div")
513
514     blocking = blocklist["silenced"] + blocklist["reject"]
515     blocker = "todon.eu"
516
517     logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
518     instances.set_total_blocks(blocker, blocking)
519
520     blockdict = list()
521     for block_level in blocklist:
522         blockers = blocklist[block_level]
523
524         logger.debug("block_level='%s',blockers()=%d'", block_level, len(blockers))
525         for blocked in blockers:
526             logger.debug("blocked='%s'", blocked)
527
528             if not instances.is_registered(blocked):
529                 try:
530                     logger.info("Fetching instances from domain='%s' ...", blocked)
531                     federation.fetch_instances(blocked, blocker, None, inspect.currentframe().f_code.co_name)
532                 except network.exceptions as exception:
533                     logger.warning("Exception '%s' during fetching instances (fetch_cs) from blocked='%s'", type(exception), blocked)
534                     instances.set_last_error(blocked, exception)
535
536             if blocks.is_instance_blocked(blocker, blocked, block_level):
537                 logger.debug("blocked='%s',block_level='%s' is already blocked - SKIPPED!", blocked, block_level)
538                 continue
539
540             logger.info("Adding new block: blocked='%s',block_level='%s'", blocked, block_level)
541             if utils.process_block(blocker, blocked, None, block_level) and block_level == "reject" and config.get("bot_enabled"):
542                 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", blocked, block_level, blocker)
543                 blockdict.append({
544                     "blocked": blocked,
545                     "reason" : None,
546                 })
547
548         logger.debug("Invoking commit() ...")
549         database.connection.commit()
550
551         logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
552         if config.get("bot_enabled") and len(blockdict) > 0:
553             logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
554             network.send_bot_post(blocker, blockdict)
555
556     logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
557     if instances.has_pending(blocker):
558         logger.debug("Flushing updates for blocker='%s' ...", blocker)
559         instances.update_data(blocker)
560
561     logger.debug("Success! - EXIT!")
562     return 0
563
564 def fetch_cs(args: argparse.Namespace):
565     logger.debug("args[]='%s' - CALLED!", type(args))
566     extensions = [
567         "extra",
568         "abbr",
569         "attr_list",
570         "def_list",
571         "fenced_code",
572         "footnotes",
573         "md_in_html",
574         "admonition",
575         "codehilite",
576         "legacy_attrs",
577         "legacy_em",
578         "meta",
579         "nl2br",
580         "sane_lists",
581         "smarty",
582         "toc",
583         "wikilinks"
584     ]
585
586     domains = {
587         "silenced": list(),
588         "reject"  : list(),
589     }
590
591     raw = utils.fetch_url("https://raw.githubusercontent.com/chaossocial/meta/master/federation.md", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))).text
592     logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
593
594     doc = bs4.BeautifulSoup(markdown.markdown(raw, extensions=extensions), features="html.parser")
595     logger.debug("doc()=%d[]='%s'", len(doc), type(doc))
596
597     silenced = doc.find("h2", {"id": "silenced-instances"}).findNext("table").find("tbody")
598     logger.debug("silenced[%s]()=%d", type(silenced), len(silenced))
599     domains["silenced"] = federation.find_domains(silenced)
600
601     blocked = doc.find("h2", {"id": "blocked-instances"}).findNext("table").find("tbody")
602     logger.debug("blocked[%s]()=%d", type(blocked), len(blocked))
603     domains["reject"] = federation.find_domains(blocked)
604
605     blocking = blocklist["silenced"] + blocklist["reject"]
606     blocker = "chaos.social"
607
608     logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
609     instances.set_total_blocks(blocker, blocking)
610
611     logger.debug("domains[silenced]()=%d,domains[reject]()=%d", len(domains["silenced"]), len(domains["reject"]))
612     blockdict = list()
613     if len(domains) > 0:
614         locking.acquire()
615
616         for block_level in domains:
617             logger.info("block_level='%s' has %d row(s)", block_level, len(domains[block_level]))
618
619             for row in domains[block_level]:
620                 logger.debug("row[%s]='%s'", type(row), row)
621                 if instances.is_recent(row["domain"], "last_blocked"):
622                     logger.debug("row[domain]='%s' has been recently crawled - SKIPPED!", row["domain"])
623                     continue
624                 elif not instances.is_registered(row["domain"]):
625                     try:
626                         logger.info("Fetching instances from domain='%s' ...", row["domain"])
627                         federation.fetch_instances(row["domain"], blocker, None, inspect.currentframe().f_code.co_name)
628                     except network.exceptions as exception:
629                         logger.warning("Exception '%s' during fetching instances (fetch_cs) from row[domain]='%s'", type(exception), row["domain"])
630                         instances.set_last_error(row["domain"], exception)
631
632                 if utils.process_block(blocker, row["domain"], row["reason"], block_level) and block_level == "reject" and config.get("bot_enabled"):
633                     logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", row["domain"], block_level, blocker)
634                     blockdict.append({
635                         "blocked": row["domain"],
636                         "reason" : row["reason"],
637                     })
638
639         logger.debug("Invoking commit() ...")
640         database.connection.commit()
641
642         logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
643         if config.get("bot_enabled") and len(blockdict) > 0:
644             logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
645             network.send_bot_post(blocker, blockdict)
646
647     logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
648     if instances.has_pending(blocker):
649         logger.debug("Flushing updates for blocker='%s' ...", blocker)
650         instances.update_data(blocker)
651
652     logger.debug("Success! - EXIT!")
653     return 0
654
655 def fetch_fba_rss(args: argparse.Namespace) -> int:
656     logger.debug("args[]='%s' - CALLED!", type(args))
657     domains = list()
658
659     logger.info("Fetch FBA-specific RSS args.feed='%s' ...", args.feed)
660     response = utils.fetch_url(args.feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
661
662     logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
663     if response.ok and response.status_code < 300 and len(response.text) > 0:
664         logger.debug("Parsing RSS feed (%d Bytes) ...", len(response.text))
665         rss = atoma.parse_rss_bytes(response.content)
666
667         logger.debug("rss[]='%s'", type(rss))
668         for item in rss.items:
669             logger.debug("item='%s'", item)
670             domain = tidyup.domain(item.link.split("=")[1])
671
672             logger.debug("domain='%s' - AFTER!", domain)
673             if domain == "":
674                 logger.debug("domain is empty - SKIPPED!")
675                 continue
676             elif not utils.is_domain_wanted(domain):
677                 logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
678                 continue
679             elif domain in domains:
680                 logger.debug("domain='%s' is already added - SKIPPED!", domain)
681                 continue
682             elif instances.is_registered(domain):
683                 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
684                 continue
685             elif instances.is_recent(domain):
686                 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
687                 continue
688
689             logger.debug("Adding domain='%s'", domain)
690             domains.append(domain)
691
692     logger.debug("domains()=%d", len(domains))
693     if len(domains) > 0:
694         locking.acquire()
695
696         logger.info("Adding %d new instances ...", len(domains))
697         for domain in domains:
698             try:
699                 logger.info("Fetching instances from domain='%s' ...", domain)
700                 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
701             except network.exceptions as exception:
702                 logger.warning("Exception '%s' during fetching instances (fetch_fba_rss) from domain='%s'", type(exception), domain)
703                 instances.set_last_error(domain, exception)
704                 return 100
705
706     logger.debug("Success! - EXIT!")
707     return 0
708
709 def fetch_fbabot_atom(args: argparse.Namespace) -> int:
710     logger.debug("args[]='%s' - CALLED!", type(args))
711     feed = "https://ryona.agency/users/fba/feed.atom"
712
713     domains = list()
714
715     logger.info("Fetching ATOM feed='%s' from FBA bot account ...", feed)
716     response = utils.fetch_url(feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
717
718     logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
719     if response.ok and response.status_code < 300 and len(response.text) > 0:
720         logger.debug("Parsing ATOM feed (%d Bytes) ...", len(response.text))
721         atom = atoma.parse_atom_bytes(response.content)
722
723         logger.debug("atom[]='%s'", type(atom))
724         for entry in atom.entries:
725             logger.debug("entry[]='%s'", type(entry))
726             doc = bs4.BeautifulSoup(entry.content.value, "html.parser")
727             logger.debug("doc[]='%s'", type(doc))
728             for element in doc.findAll("a"):
729                 logger.debug("element[]='%s'", type(element))
730                 for href in element["href"].split(","):
731                     logger.debug("href[%s]='%s' - BEFORE!", type(href), href)
732                     domain = tidyup.domain(href)
733
734                     logger.debug("domain='%s' - AFTER!", domain)
735                     if domain == "":
736                         logger.debug("domain is empty - SKIPPED!")
737                         continue
738                     elif not utils.is_domain_wanted(domain):
739                         logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
740                         continue
741                     elif domain in domains:
742                         logger.debug("domain='%s' is already added - SKIPPED!", domain)
743                         continue
744                     elif instances.is_registered(domain):
745                         logger.debug("domain='%s' is already registered - SKIPPED!", domain)
746                         continue
747                     elif instances.is_recent(domain):
748                         logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
749                         continue
750
751                     logger.debug("Adding domain='%s',domains()=%d", domain, len(domains))
752                     domains.append(domain)
753
754     logger.debug("domains()=%d", len(domains))
755     if len(domains) > 0:
756         locking.acquire()
757
758         logger.info("Adding %d new instances ...", len(domains))
759         for domain in domains:
760             logger.debug("domain='%s'", domain)
761             try:
762                 logger.info("Fetching instances from domain='%s' ...", domain)
763                 federation.fetch_instances(domain, "ryona.agency", None, inspect.currentframe().f_code.co_name)
764             except network.exceptions as exception:
765                 logger.warning("Exception '%s' during fetching instances (fetch_fbabot_atom) from domain='%s'", type(exception), domain)
766                 instances.set_last_error(domain, exception)
767                 return 100
768
769     logger.debug("Success! - EXIT!")
770     return 0
771
772 def fetch_instances(args: argparse.Namespace) -> int:
773     logger.debug("args[]='%s' - CALLED!", type(args))
774     locking.acquire()
775
776     # Initial fetch
777     try:
778         logger.info("Fetching instances from args.domain='%s' ...", args.domain)
779         federation.fetch_instances(args.domain, None, None, inspect.currentframe().f_code.co_name)
780     except network.exceptions as exception:
781         logger.warning("Exception '%s' during fetching instances (fetch_instances) from args.domain='%s'", type(exception), args.domain)
782         instances.set_last_error(args.domain, exception)
783         instances.update_data(args.domain)
784         return 100
785
786     if args.single:
787         logger.debug("Not fetching more instances - EXIT!")
788         return 0
789
790     # Loop through some instances
791     database.cursor.execute(
792         "SELECT domain, origin, software, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'lemmy', 'peertube', 'takahe') AND (last_instance_fetch IS NULL OR last_instance_fetch < ?) ORDER BY rowid DESC", [time.time() - config.get("recheck_instance")]
793     )
794
795     rows = database.cursor.fetchall()
796     logger.info("Checking %d entries ...", len(rows))
797     for row in rows:
798         logger.debug("row[domain]='%s'", row["domain"])
799         if row["domain"] == "":
800             logger.debug("row[domain] is empty - SKIPPED!")
801             continue
802         elif not utils.is_domain_wanted(row["domain"]):
803             logger.warning("Domain row[domain]='%s' is not wanted - SKIPPED!", row["domain"])
804             continue
805
806         try:
807             logger.info("Fetching instances for domain='%s',origin='%s',software='%s',nodeinfo_url='%s'", row["domain"], row["origin"], row["software"], row["nodeinfo_url"])
808             federation.fetch_instances(row["domain"], row["origin"], row["software"], inspect.currentframe().f_code.co_name, row["nodeinfo_url"])
809         except network.exceptions as exception:
810             logger.warning("Exception '%s' during fetching instances (fetch_instances) from row[domain]='%s'", type(exception), row["domain"])
811             instances.set_last_error(row["domain"], exception)
812
813     logger.debug("Success - EXIT!")
814     return 0
815
816 def fetch_oliphant(args: argparse.Namespace) -> int:
817     logger.debug("args[]='%s' - CALLED!", type(args))
818     locking.acquire()
819
820     # Base URL
821     base_url = "https://codeberg.org/oliphant/blocklists/raw/branch/main/blocklists"
822
823     # URLs to fetch
824     blocklists = (
825         {
826             "blocker": "artisan.chat",
827             "csv_url": "mastodon/artisan.chat.csv",
828         },{
829             "blocker": "mastodon.art",
830             "csv_url": "mastodon/mastodon.art.csv",
831         },{
832             "blocker": "pleroma.envs.net",
833             "csv_url": "mastodon/pleroma.envs.net.csv",
834         },{
835             "blocker": "oliphant.social",
836             "csv_url": "mastodon/_unified_tier3_blocklist.csv",
837         },{
838             "blocker": "mastodon.online",
839             "csv_url": "mastodon/mastodon.online.csv",
840         },{
841             "blocker": "mastodon.social",
842             "csv_url": "mastodon/mastodon.social.csv",
843         },{
844             "blocker": "mastodon.social",
845             "csv_url": "other/missing-tier0-mastodon.social.csv",
846         },{
847             "blocker": "rage.love",
848             "csv_url": "mastodon/rage.love.csv",
849         },{
850             "blocker": "sunny.garden",
851             "csv_url": "mastodon/sunny.garden.csv",
852         },{
853             "blocker": "solarpunk.moe",
854             "csv_url": "mastodon/solarpunk.moe.csv",
855         },{
856             "blocker": "toot.wales",
857             "csv_url": "mastodon/toot.wales.csv",
858         },{
859             "blocker": "union.place",
860             "csv_url": "mastodon/union.place.csv",
861         }
862     )
863
864     domains = list()
865
866     logger.debug("Downloading %d files ...", len(blocklists))
867     for block in blocklists:
868         # Is domain given and not equal blocker?
869         if isinstance(args.domain, str) and args.domain != block["blocker"]:
870             logger.debug("Skipping blocker='%s', not matching args.domain='%s'", block["blocker"], args.domain)
871             continue
872         elif args.domain in domains:
873             logger.debug("args.domain='%s' already handled - SKIPPED!", args.domain)
874             continue
875         elif instances.is_recent(block["blocker"]):
876             logger.debug("block[blocker]='%s' has been recently crawled - SKIPPED!", block["blocker"])
877             continue
878
879         # Fetch this URL
880         logger.info("Fetching csv_url='%s' for blocker='%s' ...", block["csv_url"], block["blocker"])
881         response = utils.fetch_url(f"{base_url}/{block['csv_url']}", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
882
883         logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
884         if not response.ok or response.status_code > 399 or response.content == "":
885             logger.warning("Could not fetch csv_url='%s' for blocker='%s' - SKIPPED!", block["csv_url"], block["blocker"])
886             continue
887
888         logger.debug("Fetched %d Bytes, parsing CSV ...", len(response.content))
889         reader = csv.DictReader(response.content.decode('utf-8').splitlines(), dialect="unix")
890
891         blockdict = list()
892
893         logger.info("Processing %d rows ...", len(reader))
894         cnt = 0
895         for row in reader:
896             logger.debug("row[%s]='%s'", type(row), row)
897             domain = severity = None
898             reject_media = reject_reports = False
899
900             if "#domain" in row:
901                 domain = row["#domain"]
902             elif "domain" in row:
903                 domain = row["domain"]
904             else:
905                 logger.debug("row='%s' does not contain domain column", row)
906                 continue
907
908             if "#severity" in row:
909                 severity = row["#severity"]
910             elif "severity" in row:
911                 severity = row["severity"]
912             else:
913                 logger.debug("row='%s' does not contain severity column", row)
914                 continue
915
916             if "#reject_media" in row and row["#reject_media"].lower() == "true":
917                 reject_media = True
918             elif "reject_media" in row and row["reject_media"].lower() == "true":
919                 reject_media = True
920
921             if "#reject_reports" in row and row["#reject_reports"].lower() == "true":
922                 reject_reports = True
923             elif "reject_reports" in row and row["reject_reports"].lower() == "true":
924                 reject_reports = True
925
926             cnt = cnt + 1
927             logger.debug("domain='%s',severity='%s',reject_media='%s',reject_reports='%s'", domain, severity, reject_media, reject_reports)
928             if domain == "":
929                 logger.debug("domain is empty - SKIPPED!")
930                 continue
931             elif not utils.is_domain_wanted(domain):
932                 logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
933                 continue
934
935             logger.debug("Marking domain='%s' as handled", domain)
936             domains.append(domain)
937
938             logger.debug("Processing domain='%s' ...", domain)
939             processed = utils.process_domain(domain, block["blocker"], inspect.currentframe().f_code.co_name)
940             logger.debug("processed='%s'", processed)
941
942             if utils.process_block(block["blocker"], domain, None, "reject") and config.get("bot_enabled"):
943                 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", domain, block["block_level"], block["blocker"])
944                 blockdict.append({
945                     "blocked": domain,
946                     "reason" : block["reason"],
947                 })
948
949             if reject_media:
950                 utils.process_block(block["blocker"], domain, None, "reject_media")
951             if reject_reports:
952                 utils.process_block(block["blocker"], domain, None, "reject_reports")
953
954         logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", block["blocker"], cnt)
955         instances.set_total_blocks(block["blocker"], cnt)
956
957         logger.debug("Checking if blocker='%s' has pending updates ...", block["blocker"])
958         if instances.has_pending(blocker):
959             logger.debug("Flushing updates for block[blocker]='%s' ...", block["blocker"])
960             instances.update_data(block["blocker"])
961
962         logger.debug("Invoking commit() ...")
963         database.connection.commit()
964
965         logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
966         if config.get("bot_enabled") and len(blockdict) > 0:
967             logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", block["blocker"], len(blockdict))
968             network.send_bot_post(block["blocker"], blockdict)
969
970     logger.debug("Success! - EXIT!")
971     return 0
972
973 def fetch_txt(args: argparse.Namespace) -> int:
974     logger.debug("args[]='%s' - CALLED!", type(args))
975     locking.acquire()
976
977     # Static URLs
978     urls = ({
979         "blocker": "seirdy.one",
980         "url"    : "https://seirdy.one/pb/bsl.txt",
981     },)
982
983     logger.info("Checking %d text file(s) ...", len(urls))
984     for row in urls:
985         logger.debug("Fetching row[url]='%s' ...", row["url"])
986         response = utils.fetch_url(row["url"], network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
987
988         logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
989         if response.ok and response.status_code < 300 and response.text != "":
990             logger.debug("Returned %d Bytes for processing", len(response.text.strip()))
991             domains = response.text.split("\n")
992
993             logger.info("Processing %d domains ...", len(domains))
994             for domain in domains:
995                 logger.debug("domain='%s' - BEFORE!", domain)
996                 domain = tidyup.domain(domain)
997
998                 logger.debug("domain='%s' - AFTER!", domain)
999                 if domain == "":
1000                     logger.debug("domain is empty - SKIPPED!")
1001                     continue
1002                 elif not utils.is_domain_wanted(domain):
1003                     logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
1004                     continue
1005                 elif instances.is_recent(domain):
1006                     logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1007                     continue
1008
1009                 logger.debug("Processing domain='%s',row[blocker]='%s'", domain, row["blocker"])
1010                 processed = utils.process_domain(domain, row["blocker"], inspect.currentframe().f_code.co_name)
1011
1012                 logger.debug("processed='%s'", processed)
1013                 if not processed:
1014                     logger.debug("domain='%s' was not generically processed - SKIPPED!", domain)
1015                     continue
1016
1017     logger.debug("Success! - EXIT!")
1018     return 0
1019
1020 def fetch_fedipact(args: argparse.Namespace) -> int:
1021     logger.debug("args[]='%s' - CALLED!", type(args))
1022     locking.acquire()
1023
1024     response = utils.fetch_url("https://fedipact.online", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
1025
1026     logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
1027     if response.ok and response.status_code < 300 and response.text != "":
1028         logger.debug("Parsing %d Bytes ...", len(response.text))
1029
1030         doc = bs4.BeautifulSoup(response.text, "html.parser")
1031         logger.debug("doc[]='%s'", type(doc))
1032
1033         rows = doc.findAll("li")
1034         logger.info("Checking %d row(s) ...", len(rows))
1035         for row in rows:
1036             logger.debug("row[]='%s'", type(row))
1037             domain = tidyup.domain(row.contents[0])
1038
1039             logger.debug("domain='%s' - AFTER!", domain)
1040             if domain == "":
1041                 logger.debug("domain is empty - SKIPPED!")
1042                 continue
1043             elif not utils.is_domain_wanted(domain):
1044                 logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
1045                 continue
1046             elif instances.is_registered(domain):
1047                 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
1048                 continue
1049             elif instances.is_recent(domain):
1050                 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1051                 continue
1052
1053             logger.info("Fetching domain='%s' ...", domain)
1054             federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
1055
1056     logger.debug("Success! - EXIT!")
1057     return 0
1058
1059 def fetch_joinfediverse(args: argparse.Namespace) -> int:
1060     logger.debug("args[]='%s' - CALLED!", type(args))
1061     locking.acquire()
1062
1063     raw = utils.fetch_url("https://joinfediverse.wiki/FediBlock", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))).text
1064     logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
1065
1066     doc = bs4.BeautifulSoup(raw, "html.parser")
1067     logger.debug("doc[]='%s'", type(doc))
1068
1069     tables = doc.findAll("table", {"class": "wikitable"})
1070
1071     logger.info("Analyzing %d table(s) ...", len(tables))
1072     blocklist = list()
1073     for table in tables:
1074         logger.debug("table[]='%s'", type(table))
1075
1076         rows = table.findAll("tr")
1077         logger.info("Checking %d row(s) ...", len(rows))
1078         block_headers = dict()
1079         for row in rows:
1080             logger.debug("row[%s]='%s'", type(row), row)
1081
1082             headers = row.findAll("th")
1083             logger.debug("Found headers()=%d header(s)", len(headers))
1084             if len(headers) > 1:
1085                 block_headers = dict()
1086                 cnt = 0
1087                 for header in headers:
1088                     cnt = cnt + 1
1089                     logger.debug("header[]='%s',cnt=%d", type(header), cnt)
1090                     text = header.contents[0]
1091
1092                     logger.debug("text[]='%s'", type(text))
1093                     if not isinstance(text, str):
1094                         logger.debug("text[]='%s' is not 'str' - SKIPPED!", type(text))
1095                         continue
1096                     elif validators.domain(text.strip()):
1097                         logger.debug("text='%s' is a domain - SKIPPED!", text.strip())
1098                         continue
1099
1100                     text = tidyup.domain(text.strip())
1101                     logger.debug("text='%s'", text)
1102                     if text in ["domain", "instance", "subdomain(s)", "block reason(s)"]:
1103                         logger.debug("Found header: '%s'=%d", text, cnt)
1104                         block_headers[cnt] = text
1105
1106             elif len(block_headers) == 0:
1107                 logger.debug("row is not scrapable - SKIPPED!")
1108                 continue
1109             elif len(block_headers) > 0:
1110                 logger.debug("Found a row with %d scrapable headers ...", len(block_headers))
1111                 cnt = 0
1112                 block = dict()
1113
1114                 for element in row.find_all(["th", "td"]):
1115                     cnt = cnt + 1
1116                     logger.debug("element[]='%s',cnt=%d", type(element), cnt)
1117                     if cnt in block_headers:
1118                         logger.debug("block_headers[%d]='%s'", cnt, block_headers[cnt])
1119
1120                         text = element.text.strip()
1121                         key = block_headers[cnt] if block_headers[cnt] not in ["domain", "instance"] else "blocked"
1122
1123                         logger.debug("cnt=%d is wanted: key='%s',text[%s]='%s'", cnt, key, type(text), text)
1124                         if key in ["domain", "instance"]:
1125                             block[key] = text
1126                         elif key == "reason":
1127                             block[key] = tidyup.reason(text)
1128                         elif key == "subdomain(s)":
1129                             block[key] = list()
1130                             if text != "":
1131                                 block[key] = text.split("/")
1132                         else:
1133                             logger.debug("key='%s'", key)
1134                             block[key] = text
1135
1136                 logger.debug("block()=%d ...", len(block))
1137                 if len(block) > 0:
1138                     logger.debug("Appending block()=%d ...", len(block))
1139                     blocklist.append(block)
1140
1141     logger.debug("blocklist()=%d", len(blocklist))
1142
1143     database.cursor.execute("SELECT domain FROM instances WHERE domain LIKE 'climatejustice.%'")
1144     domains = database.cursor.fetchall()
1145
1146     logger.debug("domains(%d)[]='%s'", len(domains), type(domains))
1147     blocking = list()
1148     for block in blocklist:
1149         logger.debug("block='%s'", block)
1150         if "subdomain(s)" in block and len(block["subdomain(s)"]) > 0:
1151             origin = block["blocked"]
1152             for subdomain in block["subdomain(s)"]:
1153                 block["blocked"] = subdomain + "." + origin
1154                 blocking.append(block)
1155         else:
1156             blocking.append(block)
1157
1158     logger.debug("blocking()=%d", blocking)
1159     for block in blocking:
1160         logger.debug("block[]='%s'", type(block))
1161         block["blocked"] = tidyup.domain(block["blocked"])
1162
1163         logger.debug("block[blocked]='%s' - AFTER!", block["blocked"])
1164         if block["blocked"] == "":
1165             logger.debug("block[blocked] is empty - SKIPPED!")
1166             continue
1167         elif not utils.is_domain_wanted(block["blocked"]):
1168             logger.warning("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
1169             continue
1170         elif instances.is_recent(block["blocked"]):
1171             logger.debug("blocked='%s' has been recently checked - SKIPPED!", block["blocked"])
1172             continue
1173
1174         logger.info("Proccessing blocked='%s' ...", block["blocked"])
1175         utils.process_domain(block["blocked"], "climatejustice.social", inspect.currentframe().f_code.co_name)
1176
1177     blockdict = list()
1178     for blocker in domains:
1179         blocker = blocker[0]
1180         logger.debug("blocker[%s]='%s'", type(blocker), blocker)
1181
1182         for block in blocking:
1183             logger.debug("block[blocked]='%s',block[reason]='%s' - BEFORE!", block["blocked"], block["reason"])
1184             block["reason"] = tidyup.reason(block["block reason(s)"]) if "block reason(s)" in block else None
1185
1186             logger.debug("block[blocked]='%s',block[reason]='%s' - AFTER!", block["blocked"], block["reason"])
1187             if block["blocked"] == "":
1188                 logger.debug("block[blocked] is empty - SKIPPED!")
1189                 continue
1190             elif not utils.is_domain_wanted(block["blocked"]):
1191                 logger.warning("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
1192                 continue
1193
1194             logger.debug("blocked='%s',reason='%s'", block["blocked"], block["reason"])
1195             if utils.process_block(blocker, block["blocked"], block["reason"], "reject") and config.get("bot_enabled"):
1196                 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], blocker)
1197                 blockdict.append({
1198                     "blocked": block["blocked"],
1199                     "reason" : block["reason"],
1200                 })
1201
1202         if instances.has_pending(blocker):
1203             logger.debug("Flushing updates for blocker='%s' ...", blocker)
1204             instances.update_data(blocker)
1205
1206         logger.debug("Invoking commit() ...")
1207         database.connection.commit()
1208
1209         logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
1210         if config.get("bot_enabled") and len(blockdict) > 0:
1211             logger.info("Sending bot POST for blocker='%s,blockdict()=%d ...", blocker, len(blockdict))
1212             network.send_bot_post(blocker, blockdict)
1213
1214     logger.debug("Success! - EXIT!")
1215     return 0
1216
1217 def recheck_obfuscation(args: argparse.Namespace) -> int:
1218     logger.debug("args[]='%s' - CALLED!", type(args))
1219
1220     locking.acquire()
1221
1222     if isinstance(args.domain, str) and args.domain != "" and utils.is_domain_wanted(args.domain):
1223         database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1 AND domain = ?", [args.domain])
1224     elif isinstance(args.software, str) and args.software != "" and validators.domain(args.software) == args.software:
1225         database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1 AND software = ?", [args.software])
1226     else:
1227         database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1")
1228
1229     rows = database.cursor.fetchall()
1230     logger.info("Checking %d domains ...", len(rows))
1231     for row in rows:
1232         logger.debug("Fetching peers from domain='%s',software='%s',nodeinfo_url='%s' ...", row["domain"], row["software"], row["nodeinfo_url"])
1233         if (args.all is None or not args.all) and instances.is_recent(row["domain"]) and args.domain is None and args.software is None:
1234             logger.debug("row[domain]='%s' has been recently checked, args.all[]='%s' - SKIPPED!", row["domain"], type(args.all))
1235             continue
1236
1237         blocking = list()
1238         if row["software"] == "pleroma":
1239             logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1240             blocking = pleroma.fetch_blocks(row["domain"], row["nodeinfo_url"])
1241         elif row["software"] == "mastodon":
1242             logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1243             blocking = mastodon.fetch_blocks(row["domain"], row["nodeinfo_url"])
1244         elif row["software"] == "lemmy":
1245             logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1246             blocking = lemmy.fetch_blocks(row["domain"], row["nodeinfo_url"])
1247         elif row["software"] == "friendica":
1248             logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1249             blocking = friendica.fetch_blocks(row["domain"])
1250         elif row["software"] == "misskey":
1251             logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1252             blocking = misskey.fetch_blocks(row["domain"])
1253         else:
1254             logger.warning("Unknown sofware: domain='%s',software='%s'", row["domain"], row["software"])
1255
1256         logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", row["domain"], len(blocking))
1257         instances.set_total_blocks(row["domain"], blocking)
1258
1259         logger.info("Checking %d block(s) from domain='%s' ...", len(blocking), row["domain"])
1260         obfuscated = 0
1261         blockdict = list()
1262         for block in blocking:
1263             logger.debug("block[blocked]='%s'", block["blocked"])
1264             blocked = None
1265
1266             if block["blocked"] == "":
1267                 logger.debug("block[blocked] is empty - SKIPPED!")
1268                 continue
1269             elif block["blocked"].endswith(".arpa"):
1270                 logger.debug("blocked='%s' is a reversed IP address - SKIPPED!", block["blocked"])
1271                 continue
1272             elif block["blocked"].endswith(".tld"):
1273                 logger.debug("blocked='%s' is a fake domain name - SKIPPED!", block["blocked"])
1274                 continue
1275             elif block["blocked"].endswith(".onion"):
1276                 logger.debug("blocked='%s' is a TOR onion domain name - SKIPPED!", block["blocked"])
1277                 continue
1278             elif block["blocked"].find("*") >= 0 or block["blocked"].find("?") >= 0:
1279                 logger.debug("block='%s' is obfuscated.", block["blocked"])
1280                 obfuscated = obfuscated + 1
1281                 blocked = utils.deobfuscate_domain(block["blocked"], row["domain"], block["hash"] if "hash" in block else None)
1282             elif not utils.is_domain_wanted(block["blocked"]):
1283                 logger.warning("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
1284                 continue
1285             elif blocks.is_instance_blocked(row["domain"], block["blocked"]):
1286                 logger.debug("blocked='%s' is already blocked - SKIPPED!", block["blocked"])
1287                 continue
1288
1289             logger.debug("blocked[%s]='%s',block[blocked]='%s'", type(blocked), blocked, block["blocked"])
1290             if blocked is not None and blocked != block["blocked"]:
1291                 logger.debug("blocked='%s' was deobfuscated to blocked='%s'", block["blocked"], blocked)
1292                 obfuscated = obfuscated - 1
1293                 if blocks.is_instance_blocked(row["domain"], blocked):
1294                     logger.debug("blocked='%s' is already blocked by domain='%s' - SKIPPED!", blocked, row["domain"])
1295                     continue
1296
1297                 block["block_level"] = utils.alias_block_level(block["block_level"])
1298
1299                 logger.info("blocked='%s' has been deobfuscated to blocked='%s', adding ...", block["blocked"], blocked)
1300                 if utils.process_block(row["domain"], blocked, block["reason"], block["block_level"]) and block["block_level"] == "reject" and config.get("bot_enabled"):
1301                     logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], row["domain"])
1302                     blockdict.append({
1303                         "blocked": blocked,
1304                         "reason" : block["reason"],
1305                     })
1306
1307         logger.info("domain='%s' has %d obfuscated domain(s)", row["domain"], obfuscated)
1308         if obfuscated == 0 and len(blocking) > 0:
1309             logger.info("Block list from domain='%s' has been fully deobfuscated.", row["domain"])
1310             instances.set_has_obfuscation(row["domain"], False)
1311
1312         if instances.has_pending(row["domain"]):
1313             logger.debug("Flushing updates for blocker='%s' ...", row["domain"])
1314             instances.update_data(row["domain"])
1315
1316         logger.debug("Invoking commit() ...")
1317         database.connection.commit()
1318
1319         logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
1320         if config.get("bot_enabled") and len(blockdict) > 0:
1321             logger.info("Sending bot POST for blocker='%s,blockdict()=%d ...", row["domain"], len(blockdict))
1322             network.send_bot_post(row["domain"], blockdict)
1323
1324     logger.debug("Success! - EXIT!")
1325     return 0
1326
1327 def fetch_fedilist(args: argparse.Namespace) -> int:
1328     logger.debug("args[]='%s' - CALLED!", type(args))
1329
1330     url = "http://demo.fedilist.com/instance/csv?onion=not"
1331     if args.software is not None and args.software != "":
1332         logger.debug("args.software='%s'", args.software)
1333         url = f"http://demo.fedilist.com/instance/csv?software={args.software}&onion=not"
1334
1335     locking.acquire()
1336
1337     logger.info("Fetching url='%s' from fedilist.com ...", url)
1338     response = reqto.get(
1339         url,
1340         headers=network.web_headers,
1341         timeout=(config.get("connection_timeout"), config.get("read_timeout")),
1342         allow_redirects=False
1343     )
1344
1345     logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
1346     reader = csv.DictReader(response.content.decode('utf-8').splitlines(), dialect="unix")
1347
1348     logger.debug("reader[]='%s'", type(reader))
1349     blockdict = list()
1350     for row in reader:
1351         logger.debug("row[]='%s'", type(row))
1352         domain = tidyup.domain(row["hostname"])
1353         logger.debug("domain='%s' - AFTER!", domain)
1354
1355         if domain == "":
1356             logger.debug("domain is empty after tidyup: row[hostname]='%s' - SKIPPED!", row["hostname"])
1357             continue
1358         elif not utils.is_domain_wanted(domain):
1359             logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
1360             continue
1361         elif (args.all is None or not args.all) and instances.is_registered(domain):
1362             logger.debug("domain='%s' is already registered, --all not specified: args.all[]='%s'", type(args.all))
1363             continue
1364         elif instances.is_recent(domain):
1365             logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1366             continue
1367
1368         logger.info("Fetching instances from domain='%s' ...", domain)
1369         federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
1370
1371     logger.debug("Success! - EXIT!")
1372     return 0
1373
1374 def update_nodeinfo(args: argparse.Namespace) -> int:
1375     logger.debug("args[]='%s' - CALLED!", type(args))
1376
1377     if args.domain is not None and args.domain != "":
1378         logger.debug("Fetching args.domain='%s'", args.domain)
1379         database.cursor.execute("SELECT domain FROM instances WHERE domain = ?", [args.domain])
1380     elif args.software is not None and args.software != "":
1381         logger.info("Fetching domains for args.software='%s'", args.software)
1382         database.cursor.execute("SELECT domain FROM instances WHERE software = ?", [args.software])
1383     else:
1384         logger.info("Fetching domains for recently updated ...")
1385         database.cursor.execute("SELECT domain FROM instances WHERE last_nodeinfo < ? OR last_nodeinfo IS NULL", [time.time() - config.get("recheck_block")])
1386
1387     domains = database.cursor.fetchall()
1388
1389     logger.info("Checking %d domain(s) ...", len(domains))
1390     for row in domains:
1391         logger.debug("row[]='%s'", type(row))
1392         try:
1393             logger.info("Updating nodeinfo for row[domain]='%s' ...", row["domain"])
1394             software = federation.determine_software(row["domain"])
1395
1396             logger.info("Determined software='%s'", software)
1397             instances.set_software(row["domain"], software)
1398             instances.set_success(row["domain"])
1399         except network.exceptions as exception:
1400             logger.warning("Exception '%s' during updating nodeinfo for row[domain]='%s'", type(exception), row["domain"])
1401             instances.set_last_error(row["domain"], exception)
1402
1403         instances.set_last_nodeinfo(row["domain"])
1404         instances.update_data(row["domain"])
1405
1406     logger.debug("Success! - EXIT!")
1407     return 0