]> git.mxchange.org Git - fba.git/blob - fba/commands.py
Continued:
[fba.git] / fba / commands.py
1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
3 #
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
8 #
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 # GNU Affero General Public License for more details.
13 #
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program.  If not, see <https://www.gnu.org/licenses/>.
16
17 import csv
18 import inspect
19 import json
20 import logging
21 import time
22
23 import argparse
24 import atoma
25 import bs4
26 import markdown
27 import reqto
28 import validators
29
30 from fba import csrf
31 from fba import database
32 from fba import utils
33
34 from fba.helpers import blacklist
35 from fba.helpers import config
36 from fba.helpers import cookies
37 from fba.helpers import locking
38 from fba.helpers import software as software_helper
39 from fba.helpers import tidyup
40
41 from fba.http import federation
42 from fba.http import network
43
44 from fba.models import blocks
45 from fba.models import instances
46
47 from fba.networks import friendica
48 from fba.networks import lemmy
49 from fba.networks import mastodon
50 from fba.networks import misskey
51 from fba.networks import pleroma
52
53 logging.basicConfig(level=logging.INFO)
54 logger = logging.getLogger(__name__)
55 #logger.setLevel(logging.DEBUG)
56
57 def check_instance(args: argparse.Namespace) -> int:
58     logger.debug("args.domain='%s' - CALLED!", args.domain)
59     status = 0
60     if not validators.domain(args.domain):
61         logger.warning("args.domain='%s' is not valid", args.domain)
62         status = 100
63     elif blacklist.is_blacklisted(args.domain):
64         logger.warning("args.domain='%s' is blacklisted", args.domain)
65         status = 101
66     elif instances.is_registered(args.domain):
67         logger.warning("args.domain='%s' is already registered", args.domain)
68         status = 102
69     else:
70         logger.info("args.domain='%s' is not known", args.domain)
71
72     logger.debug("status=%d - EXIT!", status)
73     return status
74
75 def check_nodeinfo(args: argparse.Namespace) -> int:
76     logger.debug("args[]='%s' - CALLED!", type(args))
77
78     # Fetch rows
79     database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE nodeinfo_url IS NOT NULL ORDER BY domain ASC")
80
81     cnt = 0
82     for row in database.cursor.fetchall():
83         logger.debug("Checking row[domain]='%s',row[software]='%s',row[nodeinfo_url]='%s' ...", row["domain"], row["software"], row["nodeinfo_url"])
84         punycode = row["domain"].encode("idna").decode("utf-8")
85
86         if row["nodeinfo_url"].startswith("/"):
87             logger.debug("row[nodeinfo_url]='%s' is a relative URL and always matches", row["nodeinfo_url"])
88             continue
89         elif row["nodeinfo_url"].find(punycode) == -1 and row["nodeinfo_url"].find(row["domain"]) == -1:
90             logger.warning("punycode='%s' is not found in row[nodeinfo_url]='%s',row[software]='%s'", punycode, row["nodeinfo_url"], row["software"])
91             cnt = cnt + 1
92
93     logger.info("Found %d row(s)", cnt)
94
95     logger.debug("EXIT!")
96     return 0
97
98 def fetch_pixelfed_api(args: argparse.Namespace) -> int:
99     logger.debug("args[]='%s' - CALLED!", type(args))
100
101     # No CSRF by default, you don't have to add network.api_headers by yourself here
102     headers = tuple()
103     domain = "pixelfed.org"
104
105     try:
106         logger.debug("Checking CSRF from domain='%s' ...", domain)
107         headers = csrf.determine(domain, dict())
108     except network.exceptions as exception:
109         logger.warning("Exception '%s' during checking CSRF (fetch_peers,%s) - EXIT!", type(exception), __name__)
110         return list()
111
112     try:
113         logger.debug("Fetching JSON from pixelfed.org API, headers()=%d ...", len(headers))
114         fetched = network.get_json_api(
115             domain,
116             "/api/v1/servers/all.json?scope=All&country=all&language=all",
117             headers,
118             (config.get("connection_timeout"), config.get("read_timeout"))
119         )
120
121         logger.debug("JSON API returned %d elements", len(fetched))
122         if "error_message" in fetched:
123             logger.warning("API returned error_message='%s' - EXIT!", fetched["error_message"])
124             return 101
125         elif "data" not in fetched["json"]:
126             logger.warning("API did not return JSON with 'data' element - EXIT!")
127             return 102
128
129         rows = fetched["json"]["data"]
130         logger.info("Checking %d fetched rows ...", len(rows))
131         for row in rows:
132             logger.debug("row[]='%s'", type(row))
133             if "domain" not in row:
134                 logger.warning("row='%s' does not contain element 'domain' - SKIPPED!", row)
135                 continue
136             elif row["domain"] == "":
137                 logger.debug("row[domain] is empty - SKIPPED!")
138                 continue
139             elif not utils.is_domain_wanted(row["domain"]):
140                 logger.warning("row[domain]='%s' is not wanted - SKIPPED!", row["domain"])
141                 continue
142             elif instances.is_registered(row["domain"]):
143                 logger.debug("row[domain]='%s' is already registered - SKIPPED!", row["domain"])
144                 continue
145             elif instances.is_recent(row["domain"]):
146                 logger.debug("row[domain]='%s' has been recently crawled - SKIPPED!", row["domain"])
147                 continue
148
149             logger.debug("Fetching instances from row[domain]='%s' ...", row["domain"])
150             federation.fetch_instances(row["domain"], None, None, inspect.currentframe().f_code.co_name)
151
152     except network.exceptions as exception:
153         logger.warning("Cannot fetch graphql,exception[%s]:'%s' - EXIT!", type(exception), str(exception))
154         return 103
155
156     logger.debug("Success! - EXIT!")
157     return 0
158
159 def fetch_bkali(args: argparse.Namespace) -> int:
160     logger.debug("args[]='%s' - CALLED!", type(args))
161     domains = list()
162     try:
163         fetched = network.post_json_api("gql.api.bka.li", "/v1/graphql", json.dumps({
164             "query": "query domainlist {nodeinfo(order_by: {domain: asc}) {domain}}"
165         }))
166
167         logger.debug("fetched[]='%s'", type(fetched))
168         if "error_message" in fetched:
169             logger.warning("post_json_api() for 'gql.api.bka.li' returned error message='%s", fetched["error_message"])
170             return 100
171         elif isinstance(fetched["json"], dict) and "error" in fetched["json"] and "message" in fetched["json"]["error"]:
172             logger.warning("post_json_api() returned error: '%s", fetched["error"]["message"])
173             return 101
174
175         rows = fetched["json"]
176
177         logger.debug("rows(%d)[]='%s'", len(rows), type(rows))
178         if len(rows) == 0:
179             raise Exception("WARNING: Returned no records")
180         elif "data" not in rows:
181             raise Exception(f"WARNING: rows()={len(rows)} does not contain key 'data'")
182         elif "nodeinfo" not in rows["data"]:
183             raise Exception(f"WARNING: rows()={len(rows['data'])} does not contain key 'nodeinfo'")
184
185         for entry in rows["data"]["nodeinfo"]:
186             logger.debug("entry[%s]='%s'", type(entry), entry)
187             if "domain" not in entry:
188                 logger.warning("entry()=%d does not contain 'domain' - SKIPPED!", len(entry))
189                 continue
190             elif entry["domain"] == "":
191                 logger.debug("entry[domain] is empty - SKIPPED!")
192                 continue
193             elif not utils.is_domain_wanted(entry["domain"]):
194                 logger.warning("entry[domain]='%s' is not wanted - SKIPPED!", entry["domain"])
195                 continue
196             elif instances.is_registered(entry["domain"]):
197                 logger.debug("entry[domain]='%s' is already registered - SKIPPED!", entry["domain"])
198                 continue
199             elif instances.is_recent(entry["domain"]):
200                 logger.debug("entry[domain]='%s' has been recently crawled - SKIPPED!", entry["domain"])
201                 continue
202
203             logger.debug("Adding domain='%s' ...", entry["domain"])
204             domains.append(entry["domain"])
205
206     except network.exceptions as exception:
207         logger.warning("Cannot fetch graphql,exception[%s]:'%s' - EXIT!", type(exception), str(exception))
208         return 102
209
210     logger.debug("domains()=%d", len(domains))
211     if len(domains) > 0:
212         locking.acquire()
213
214         logger.info("Adding %d new instances ...", len(domains))
215         for domain in domains:
216             try:
217                 logger.info("Fetching instances from domain='%s' ...", domain)
218                 federation.fetch_instances(domain, 'tak.teleyal.blog', None, inspect.currentframe().f_code.co_name)
219             except network.exceptions as exception:
220                 logger.warning("Exception '%s' during fetching instances (fetch_bkali) from domain='%s'", type(exception), domain)
221                 instances.set_last_error(domain, exception)
222                 return 100
223
224     logger.debug("Success - EXIT!")
225     return 0
226
227 def fetch_blocks(args: argparse.Namespace) -> int:
228     logger.debug("args[]='%s' - CALLED!", type(args))
229     if args.domain is not None and args.domain != "":
230         logger.debug("args.domain='%s' - checking ...", args.domain)
231         if not validators.domain(args.domain):
232             logger.warning("args.domain='%s' is not valid.", args.domain)
233             return 100
234         elif blacklist.is_blacklisted(args.domain):
235             logger.warning("args.domain='%s' is blacklisted, won't check it!", args.domain)
236             return 101
237         elif not instances.is_registered(args.domain):
238             logger.warning("args.domain='%s' is not registered, please run ./utils.py fetch_instances '%s' first.", args.domain, args.domain)
239             return 102
240
241     locking.acquire()
242
243     if args.domain is not None and args.domain != "":
244         # Re-check single domain
245         logger.debug("Querying database for single args.domain='%s' ...", args.domain)
246         database.cursor.execute(
247             "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE domain = ?", [args.domain]
248         )
249     elif args.software is not None and args.software != "":
250         # Re-check single software
251         logger.debug("Querying database for args.software='%s' ...", args.software)
252         database.cursor.execute(
253             "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software = ? AND nodeinfo_url IS NOT NULL", [args.software]
254         )
255     else:
256         # Re-check after "timeout" (aka. minimum interval)
257         database.cursor.execute(
258             "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'lemmy', 'friendica', 'misskey') AND (last_blocked IS NULL OR last_blocked < ?) AND nodeinfo_url IS NOT NULL ORDER BY rowid DESC", [time.time() - config.get("recheck_block")]
259         )
260
261     rows = database.cursor.fetchall()
262     logger.info("Checking %d entries ...", len(rows))
263     for blocker, software, origin, nodeinfo_url in rows:
264         logger.debug("blocker='%s',software='%s',origin='%s',nodeinfo_url='%s'", blocker, software, origin, nodeinfo_url)
265         blocker = tidyup.domain(blocker)
266         logger.debug("blocker='%s' - AFTER!", blocker)
267
268         if blocker == "":
269             logger.warning("blocker is now empty!")
270             continue
271         elif nodeinfo_url is None or nodeinfo_url == "":
272             logger.debug("blocker='%s',software='%s' has empty nodeinfo_url", blocker, software)
273             continue
274         elif not utils.is_domain_wanted(blocker):
275             logger.warning("blocker='%s' is not wanted - SKIPPED!", blocker)
276             continue
277
278         logger.debug("blocker='%s'", blocker)
279         instances.set_last_blocked(blocker)
280         instances.set_has_obfuscation(blocker, False)
281
282         blocking = list()
283         blockdict = list()
284         if software == "pleroma":
285             logger.info("blocker='%s',software='%s'", blocker, software)
286             blocking = pleroma.fetch_blocks(blocker, nodeinfo_url)
287         elif software == "mastodon":
288             logger.info("blocker='%s',software='%s'", blocker, software)
289             blocking = mastodon.fetch_blocks(blocker, nodeinfo_url)
290         elif software == "lemmy":
291             logger.info("blocker='%s',software='%s'", blocker, software)
292             blocking = lemmy.fetch_blocks(blocker, nodeinfo_url)
293         elif software == "friendica":
294             logger.info("blocker='%s',software='%s'", blocker, software)
295             blocking = friendica.fetch_blocks(blocker)
296         elif software == "misskey":
297             logger.info("blocker='%s',software='%s'", blocker, software)
298             blocking = misskey.fetch_blocks(blocker)
299         else:
300             logger.warning("Unknown software: blocker='%s',software='%s'", blocker, software)
301
302         logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
303         instances.set_total_blocks(blocker, blocking)
304
305         logger.info("Checking %d entries from blocker='%s',software='%s' ...", len(blocking), blocker, software)
306         blockdict = list()
307         for block in blocking:
308             logger.debug("blocked='%s',block_level='%s',reason='%s'", block["blocked"], block["block_level"], block["reason"])
309
310             if block["block_level"] == "":
311                 logger.warning("block_level is empty, blocker='%s',blocked='%s'", block["blocker"], block["blocked"])
312                 continue
313
314             logger.debug("blocked='%s',reason='%s' - BEFORE!", block["blocked"], block["reason"])
315             block["blocked"] = tidyup.domain(block["blocked"])
316             block["reason"]  = tidyup.reason(block["reason"]) if block["reason"] is not None and block["reason"] != "" else None
317             logger.debug("blocked='%s',reason='%s' - AFTER!", block["blocked"], block["reason"])
318
319             if block["blocked"] == "":
320                 logger.warning("blocked is empty, blocker='%s'", blocker)
321                 continue
322             elif block["blocked"].endswith(".onion"):
323                 logger.debug("blocked='%s' is a TOR .onion domain - SKIPPED", block["blocked"])
324                 continue
325             elif block["blocked"].endswith(".arpa"):
326                 logger.debug("blocked='%s' is a reverse IP address - SKIPPED", block["blocked"])
327                 continue
328             elif block["blocked"].endswith(".tld"):
329                 logger.debug("blocked='%s' is a fake domain - SKIPPED", block["blocked"])
330                 continue
331             elif block["blocked"].find("*") >= 0:
332                 logger.debug("blocker='%s' uses obfuscated domains", blocker)
333
334                 # Some friendica servers also obscure domains without hash
335                 row = instances.deobfuscate("*", block["blocked"], block["hash"] if "hash" in block else None)
336
337                 logger.debug("row[]='%s'", type(row))
338                 if row is None:
339                     logger.warning("Cannot deobfuscate blocked='%s',blocker='%s',software='%s' - SKIPPED!", block["blocked"], blocker, software)
340                     instances.set_has_obfuscation(blocker, True)
341                     continue
342
343                 block["blocked"] = row["domain"]
344                 origin           = row["origin"]
345                 nodeinfo_url     = row["nodeinfo_url"]
346             elif block["blocked"].find("?") >= 0:
347                 logger.debug("blocker='%s' uses obfuscated domains", blocker)
348
349                 # Some obscure them with question marks, not sure if that's dependent on version or not
350                 row = instances.deobfuscate("?", block["blocked"], block["hash"] if "hash" in block else None)
351
352                 logger.debug("row[]='%s'", type(row))
353                 if row is None:
354                     logger.warning("Cannot deobfuscate blocked='%s',blocker='%s',software='%s' - SKIPPED!", block["blocked"], blocker, software)
355                     instances.set_has_obfuscation(blocker, True)
356                     continue
357
358                 block["blocked"] = row["domain"]
359                 origin           = row["origin"]
360                 nodeinfo_url     = row["nodeinfo_url"]
361
362             logger.debug("Looking up instance by domainm, blocked='%s'", block["blocked"])
363             if block["blocked"] == "":
364                 logger.debug("block[blocked] is empty - SKIPPED!")
365                 continue
366             elif not utils.is_domain_wanted(block["blocked"]):
367                 logger.warning("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
368                 continue
369             elif block["block_level"] in ["accept", "accepted"]:
370                 logger.debug("blocked='%s' is accepted, not wanted here - SKIPPED!", block["blocked"])
371                 continue
372             elif not instances.is_registered(block["blocked"]):
373                 logger.debug("Hash wasn't found, adding: blocked='%s',blocker='%s'", block["blocked"], blocker)
374                 federation.fetch_instances(block["blocked"], blocker, None, inspect.currentframe().f_code.co_name)
375
376             block["block_level"] = utils.alias_block_level(block["block_level"])
377
378             if utils.process_block(blocker, block["blocked"], block["reason"], block["block_level"]) and block["block_level"] == "reject" and config.get("bot_enabled"):
379                 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], blocker)
380                 blockdict.append({
381                     "blocked": block["blocked"],
382                     "reason" : block["reason"],
383                 })
384
385             logger.debug("Invoking cookies.clear(%s) ...", block["blocked"])
386             cookies.clear(block["blocked"])
387
388         logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
389         if instances.has_pending(blocker):
390             logger.debug("Flushing updates for blocker='%s' ...", blocker)
391             instances.update_data(blocker)
392
393         logger.debug("Invoking commit() ...")
394         database.connection.commit()
395
396         logger.debug("Invoking cookies.clear(%s) ...", blocker)
397         cookies.clear(blocker)
398
399         logger.debug("config.get(bot_enabled)='%s',blockdict()=%d'", config.get("bot_enabled"), len(blockdict))
400         if config.get("bot_enabled") and len(blockdict) > 0:
401             logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
402             network.send_bot_post(blocker, blockdict)
403
404     logger.debug("Success! - EXIT!")
405     return 0
406
407 def fetch_observer(args: argparse.Namespace) -> int:
408     logger.debug("args[]='%s' - CALLED!", type(args))
409
410     # Acquire lock
411     locking.acquire()
412
413     types = list()
414     if args.software is None:
415         logger.info("Fetching software list ...")
416         raw = utils.fetch_url(
417             "https://fediverse.observer",
418             network.web_headers,
419             (config.get("connection_timeout"), config.get("read_timeout"))
420         ).text
421         logger.debug("raw[%s]()=%d", type(raw), len(raw))
422
423         doc = bs4.BeautifulSoup(raw, features="html.parser")
424         logger.debug("doc[]='%s'", type(doc))
425
426         items = doc.find("div", {"aria-labelledby": "navbarDropdownMenuSoftwares"}).findAll("a", {"class": "dropdown-item"})
427         logger.debug("items[]='%s'", type(items))
428
429         logger.info("Checking %d menu items ...", len(items))
430         for item in items:
431             logger.debug("item[%s]='%s'", type(item), item)
432             if item.text.lower() == "all":
433                 logger.debug("Skipping 'All' menu entry ...")
434                 continue
435
436             logger.debug("Appending item.text='%s' ...", item.text)
437             types.append(tidyup.domain(item.text))
438     else:
439         logger.info("Adding args.software='%s' as type ...", args.software)
440         types.append(args.software)
441
442     logger.info("Fetching %d different table data ...", len(types))
443     for software in types:
444         logger.debug("software='%s' - BEFORE!", software)
445         if args.software is not None and args.software != software:
446             logger.debug("args.software='%s' does not match software='%s' - SKIPPED!", args.software, software)
447             continue
448
449         doc = None
450         try:
451             logger.debug("Fetching table data for software='%s' ...", software)
452             raw = utils.fetch_url(
453                 f"https://fediverse.observer/app/views/tabledata.php?software={software}",
454                 network.web_headers,
455                 (config.get("connection_timeout"), config.get("read_timeout"))
456             ).text
457             logger.debug("raw[%s]()=%d", type(raw), len(raw))
458
459             doc = bs4.BeautifulSoup(raw, features="html.parser")
460             logger.debug("doc[]='%s'", type(doc))
461         except network.exceptions as exception:
462             logger.warning("Cannot fetch software='%s' from fediverse.observer: '%s'", software, type(exception))
463             continue
464
465         items = doc.findAll("a", {"class": "url"})
466         logger.info("Checking %d items,software='%s' ...", len(items), software)
467         for item in items:
468             logger.debug("item[]='%s'", type(item))
469             domain = item.decode_contents()
470
471             logger.debug("domain='%s' - AFTER!", domain)
472             if domain == "":
473                 logger.debug("domain is empty - SKIPPED!")
474                 continue
475             elif not utils.is_domain_wanted(domain):
476                 logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
477                 continue
478             elif instances.is_registered(domain):
479                 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
480                 continue
481             elif instances.is_recent(domain):
482                 logger.debug("domain='%s' is recently being handled - SKIPPED!", domain)
483                 continue
484
485             software = software_helper.alias(software)
486             logger.info("Fetching instances for domain='%s'", domain)
487             federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
488
489     logger.debug("Success! - EXIT!")
490     return 0
491
492 def fetch_todon_wiki(args: argparse.Namespace) -> int:
493     logger.debug("args[]='%s' - CALLED!", type(args))
494
495     locking.acquire()
496     blocklist = {
497         "silenced": list(),
498         "reject": list(),
499     }
500
501     raw = utils.fetch_url("https://wiki.todon.eu/todon/domainblocks", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))).text
502     logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
503
504     doc = bs4.BeautifulSoup(raw, "html.parser")
505     logger.debug("doc[]='%s'", type(doc))
506
507     silenced = doc.find("h3", {"id": "silencedlimited_servers"}).find_next("ul").findAll("li")
508     logger.info("Checking %d silenced/limited entries ...", len(silenced))
509     blocklist["silenced"] = utils.find_domains(silenced, "div")
510
511     suspended = doc.find("h3", {"id": "suspended_servers"}).find_next("ul").findAll("li")
512     logger.info("Checking %d suspended entries ...", len(suspended))
513     blocklist["reject"] = utils.find_domains(suspended, "div")
514
515     blocking = blocklist["silenced"] + blocklist["reject"]
516     blocker = "todon.eu"
517
518     logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
519     instances.set_total_blocks(blocker, blocking)
520
521     blockdict = list()
522     for block_level in blocklist:
523         blockers = blocklist[block_level]
524
525         logger.debug("block_level='%s',blockers()=%d'", block_level, len(blockers))
526         for blocked in blockers:
527             logger.debug("blocked='%s'", blocked)
528
529             if not instances.is_registered(blocked):
530                 try:
531                     logger.info("Fetching instances from domain='%s' ...", blocked)
532                     federation.fetch_instances(blocked, blocker, None, inspect.currentframe().f_code.co_name)
533                 except network.exceptions as exception:
534                     logger.warning("Exception '%s' during fetching instances (fetch_cs) from blocked='%s'", type(exception), blocked)
535                     instances.set_last_error(blocked, exception)
536
537             if blocks.is_instance_blocked(blocker, blocked, block_level):
538                 logger.debug("blocked='%s',block_level='%s' is already blocked - SKIPPED!", blocked, block_level)
539                 continue
540
541             logger.info("Adding new block: blocked='%s',block_level='%s'", blocked, block_level)
542             if utils.process_block(blocker, blocked, None, block_level) and block_level == "reject" and config.get("bot_enabled"):
543                 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", blocked, block_level, blocker)
544                 blockdict.append({
545                     "blocked": blocked,
546                     "reason" : None,
547                 })
548
549         logger.debug("Invoking commit() ...")
550         database.connection.commit()
551
552         logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
553         if config.get("bot_enabled") and len(blockdict) > 0:
554             logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
555             network.send_bot_post(blocker, blockdict)
556
557     logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
558     if instances.has_pending(blocker):
559         logger.debug("Flushing updates for blocker='%s' ...", blocker)
560         instances.update_data(blocker)
561
562     logger.debug("Success! - EXIT!")
563     return 0
564
565 def fetch_cs(args: argparse.Namespace):
566     logger.debug("args[]='%s' - CALLED!", type(args))
567     extensions = [
568         "extra",
569         "abbr",
570         "attr_list",
571         "def_list",
572         "fenced_code",
573         "footnotes",
574         "md_in_html",
575         "admonition",
576         "codehilite",
577         "legacy_attrs",
578         "legacy_em",
579         "meta",
580         "nl2br",
581         "sane_lists",
582         "smarty",
583         "toc",
584         "wikilinks"
585     ]
586
587     domains = {
588         "silenced": list(),
589         "reject"  : list(),
590     }
591
592     raw = utils.fetch_url("https://raw.githubusercontent.com/chaossocial/meta/master/federation.md", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))).text
593     logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
594
595     doc = bs4.BeautifulSoup(markdown.markdown(raw, extensions=extensions), features="html.parser")
596     logger.debug("doc()=%d[]='%s'", len(doc), type(doc))
597
598     silenced = doc.find("h2", {"id": "silenced-instances"}).findNext("table").find("tbody")
599     logger.debug("silenced[%s]()=%d", type(silenced), len(silenced))
600     domains["silenced"] = federation.find_domains(silenced)
601
602     blocked = doc.find("h2", {"id": "blocked-instances"}).findNext("table").find("tbody")
603     logger.debug("blocked[%s]()=%d", type(blocked), len(blocked))
604     domains["reject"] = federation.find_domains(blocked)
605
606     blocking = blocklist["silenced"] + blocklist["reject"]
607     blocker = "chaos.social"
608
609     logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
610     instances.set_total_blocks(blocker, blocking)
611
612     logger.debug("domains[silenced]()=%d,domains[reject]()=%d", len(domains["silenced"]), len(domains["reject"]))
613     blockdict = list()
614     if len(domains) > 0:
615         locking.acquire()
616
617         for block_level in domains:
618             logger.info("block_level='%s' has %d row(s)", block_level, len(domains[block_level]))
619
620             for row in domains[block_level]:
621                 logger.debug("row[%s]='%s'", type(row), row)
622                 if instances.is_recent(row["domain"], "last_blocked"):
623                     logger.debug("row[domain]='%s' has been recently crawled - SKIPPED!", row["domain"])
624                     continue
625                 elif not instances.is_registered(row["domain"]):
626                     try:
627                         logger.info("Fetching instances from domain='%s' ...", row["domain"])
628                         federation.fetch_instances(row["domain"], blocker, None, inspect.currentframe().f_code.co_name)
629                     except network.exceptions as exception:
630                         logger.warning("Exception '%s' during fetching instances (fetch_cs) from row[domain]='%s'", type(exception), row["domain"])
631                         instances.set_last_error(row["domain"], exception)
632
633                 if utils.process_block(blocker, row["domain"], row["reason"], block_level) and block_level == "reject" and config.get("bot_enabled"):
634                     logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", row["domain"], block_level, blocker)
635                     blockdict.append({
636                         "blocked": row["domain"],
637                         "reason" : row["reason"],
638                     })
639
640         logger.debug("Invoking commit() ...")
641         database.connection.commit()
642
643         logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
644         if config.get("bot_enabled") and len(blockdict) > 0:
645             logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
646             network.send_bot_post(blocker, blockdict)
647
648     logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
649     if instances.has_pending(blocker):
650         logger.debug("Flushing updates for blocker='%s' ...", blocker)
651         instances.update_data(blocker)
652
653     logger.debug("Success! - EXIT!")
654     return 0
655
656 def fetch_fba_rss(args: argparse.Namespace) -> int:
657     logger.debug("args[]='%s' - CALLED!", type(args))
658     domains = list()
659
660     logger.info("Fetch FBA-specific RSS args.feed='%s' ...", args.feed)
661     response = utils.fetch_url(args.feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
662
663     logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
664     if response.ok and response.status_code < 300 and len(response.text) > 0:
665         logger.debug("Parsing RSS feed (%d Bytes) ...", len(response.text))
666         rss = atoma.parse_rss_bytes(response.content)
667
668         logger.debug("rss[]='%s'", type(rss))
669         for item in rss.items:
670             logger.debug("item='%s'", item)
671             domain = tidyup.domain(item.link.split("=")[1])
672
673             logger.debug("domain='%s' - AFTER!", domain)
674             if domain == "":
675                 logger.debug("domain is empty - SKIPPED!")
676                 continue
677             elif not utils.is_domain_wanted(domain):
678                 logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
679                 continue
680             elif domain in domains:
681                 logger.debug("domain='%s' is already added - SKIPPED!", domain)
682                 continue
683             elif instances.is_registered(domain):
684                 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
685                 continue
686             elif instances.is_recent(domain):
687                 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
688                 continue
689
690             logger.debug("Adding domain='%s'", domain)
691             domains.append(domain)
692
693     logger.debug("domains()=%d", len(domains))
694     if len(domains) > 0:
695         locking.acquire()
696
697         logger.info("Adding %d new instances ...", len(domains))
698         for domain in domains:
699             try:
700                 logger.info("Fetching instances from domain='%s' ...", domain)
701                 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
702             except network.exceptions as exception:
703                 logger.warning("Exception '%s' during fetching instances (fetch_fba_rss) from domain='%s'", type(exception), domain)
704                 instances.set_last_error(domain, exception)
705                 return 100
706
707     logger.debug("Success! - EXIT!")
708     return 0
709
710 def fetch_fbabot_atom(args: argparse.Namespace) -> int:
711     logger.debug("args[]='%s' - CALLED!", type(args))
712     feed = "https://ryona.agency/users/fba/feed.atom"
713
714     domains = list()
715
716     logger.info("Fetching ATOM feed='%s' from FBA bot account ...", feed)
717     response = utils.fetch_url(feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
718
719     logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
720     if response.ok and response.status_code < 300 and len(response.text) > 0:
721         logger.debug("Parsing ATOM feed (%d Bytes) ...", len(response.text))
722         atom = atoma.parse_atom_bytes(response.content)
723
724         logger.debug("atom[]='%s'", type(atom))
725         for entry in atom.entries:
726             logger.debug("entry[]='%s'", type(entry))
727             doc = bs4.BeautifulSoup(entry.content.value, "html.parser")
728             logger.debug("doc[]='%s'", type(doc))
729             for element in doc.findAll("a"):
730                 logger.debug("element[]='%s'", type(element))
731                 for href in element["href"].split(","):
732                     logger.debug("href[%s]='%s' - BEFORE!", type(href), href)
733                     domain = tidyup.domain(href)
734
735                     logger.debug("domain='%s' - AFTER!", domain)
736                     if domain == "":
737                         logger.debug("domain is empty - SKIPPED!")
738                         continue
739                     elif not utils.is_domain_wanted(domain):
740                         logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
741                         continue
742                     elif domain in domains:
743                         logger.debug("domain='%s' is already added - SKIPPED!", domain)
744                         continue
745                     elif instances.is_registered(domain):
746                         logger.debug("domain='%s' is already registered - SKIPPED!", domain)
747                         continue
748                     elif instances.is_recent(domain):
749                         logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
750                         continue
751
752                     logger.debug("Adding domain='%s',domains()=%d", domain, len(domains))
753                     domains.append(domain)
754
755     logger.debug("domains()=%d", len(domains))
756     if len(domains) > 0:
757         locking.acquire()
758
759         logger.info("Adding %d new instances ...", len(domains))
760         for domain in domains:
761             logger.debug("domain='%s'", domain)
762             try:
763                 logger.info("Fetching instances from domain='%s' ...", domain)
764                 federation.fetch_instances(domain, "ryona.agency", None, inspect.currentframe().f_code.co_name)
765             except network.exceptions as exception:
766                 logger.warning("Exception '%s' during fetching instances (fetch_fbabot_atom) from domain='%s'", type(exception), domain)
767                 instances.set_last_error(domain, exception)
768                 return 100
769
770     logger.debug("Success! - EXIT!")
771     return 0
772
773 def fetch_instances(args: argparse.Namespace) -> int:
774     logger.debug("args[]='%s' - CALLED!", type(args))
775     locking.acquire()
776
777     # Initial fetch
778     try:
779         logger.info("Fetching instances from args.domain='%s' ...", args.domain)
780         federation.fetch_instances(args.domain, None, None, inspect.currentframe().f_code.co_name)
781     except network.exceptions as exception:
782         logger.warning("Exception '%s' during fetching instances (fetch_instances) from args.domain='%s'", type(exception), args.domain)
783         instances.set_last_error(args.domain, exception)
784         instances.update_data(args.domain)
785         return 100
786
787     if args.single:
788         logger.debug("Not fetching more instances - EXIT!")
789         return 0
790
791     # Loop through some instances
792     database.cursor.execute(
793         "SELECT domain, origin, software, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'lemmy', 'peertube', 'takahe') AND (last_instance_fetch IS NULL OR last_instance_fetch < ?) ORDER BY rowid DESC", [time.time() - config.get("recheck_instance")]
794     )
795
796     rows = database.cursor.fetchall()
797     logger.info("Checking %d entries ...", len(rows))
798     for row in rows:
799         logger.debug("row[domain]='%s'", row["domain"])
800         if row["domain"] == "":
801             logger.debug("row[domain] is empty - SKIPPED!")
802             continue
803         elif not utils.is_domain_wanted(row["domain"]):
804             logger.warning("Domain row[domain]='%s' is not wanted - SKIPPED!", row["domain"])
805             continue
806
807         try:
808             logger.info("Fetching instances for domain='%s',origin='%s',software='%s',nodeinfo_url='%s'", row["domain"], row["origin"], row["software"], row["nodeinfo_url"])
809             federation.fetch_instances(row["domain"], row["origin"], row["software"], inspect.currentframe().f_code.co_name, row["nodeinfo_url"])
810         except network.exceptions as exception:
811             logger.warning("Exception '%s' during fetching instances (fetch_instances) from row[domain]='%s'", type(exception), row["domain"])
812             instances.set_last_error(row["domain"], exception)
813
814     logger.debug("Success - EXIT!")
815     return 0
816
817 def fetch_oliphant(args: argparse.Namespace) -> int:
818     logger.debug("args[]='%s' - CALLED!", type(args))
819     locking.acquire()
820
821     # Base URL
822     base_url = "https://codeberg.org/oliphant/blocklists/raw/branch/main/blocklists"
823
824     # URLs to fetch
825     blocklists = (
826         {
827             "blocker": "artisan.chat",
828             "csv_url": "mastodon/artisan.chat.csv",
829         },{
830             "blocker": "mastodon.art",
831             "csv_url": "mastodon/mastodon.art.csv",
832         },{
833             "blocker": "pleroma.envs.net",
834             "csv_url": "mastodon/pleroma.envs.net.csv",
835         },{
836             "blocker": "oliphant.social",
837             "csv_url": "mastodon/_unified_tier3_blocklist.csv",
838         },{
839             "blocker": "mastodon.online",
840             "csv_url": "mastodon/mastodon.online.csv",
841         },{
842             "blocker": "mastodon.social",
843             "csv_url": "mastodon/mastodon.social.csv",
844         },{
845             "blocker": "mastodon.social",
846             "csv_url": "other/missing-tier0-mastodon.social.csv",
847         },{
848             "blocker": "rage.love",
849             "csv_url": "mastodon/rage.love.csv",
850         },{
851             "blocker": "sunny.garden",
852             "csv_url": "mastodon/sunny.garden.csv",
853         },{
854             "blocker": "solarpunk.moe",
855             "csv_url": "mastodon/solarpunk.moe.csv",
856         },{
857             "blocker": "toot.wales",
858             "csv_url": "mastodon/toot.wales.csv",
859         },{
860             "blocker": "union.place",
861             "csv_url": "mastodon/union.place.csv",
862         }
863     )
864
865     domains = list()
866
867     logger.debug("Downloading %d files ...", len(blocklists))
868     for block in blocklists:
869         # Is domain given and not equal blocker?
870         if isinstance(args.domain, str) and args.domain != block["blocker"]:
871             logger.debug("Skipping blocker='%s', not matching args.domain='%s'", block["blocker"], args.domain)
872             continue
873         elif args.domain in domains:
874             logger.debug("args.domain='%s' already handled - SKIPPED!", args.domain)
875             continue
876         elif instances.is_recent(block["blocker"]):
877             logger.debug("block[blocker]='%s' has been recently crawled - SKIPPED!", block["blocker"])
878             continue
879
880         # Fetch this URL
881         logger.info("Fetching csv_url='%s' for blocker='%s' ...", block["csv_url"], block["blocker"])
882         response = utils.fetch_url(f"{base_url}/{block['csv_url']}", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
883
884         logger.debug("response.ok='%s',response.status_code=%d,response.content()=%d", response.ok, response.status_code, len(response.content))
885         if not response.ok or response.status_code >= 300 or response.content == "":
886             logger.warning("Could not fetch csv_url='%s' for blocker='%s' - SKIPPED!", block["csv_url"], block["blocker"])
887             continue
888
889         logger.debug("Fetched %d Bytes, parsing CSV ...", len(response.content))
890         reader = csv.DictReader(response.content.decode('utf-8').splitlines(), dialect="unix")
891
892         blockdict = list()
893
894         logger.info("Processing %d rows ...", len(reader))
895         cnt = 0
896         for row in reader:
897             logger.debug("row[%s]='%s'", type(row), row)
898             domain = severity = None
899             reject_media = reject_reports = False
900
901             if "#domain" in row:
902                 domain = row["#domain"]
903             elif "domain" in row:
904                 domain = row["domain"]
905             else:
906                 logger.debug("row='%s' does not contain domain column", row)
907                 continue
908
909             if "#severity" in row:
910                 severity = row["#severity"]
911             elif "severity" in row:
912                 severity = row["severity"]
913             else:
914                 logger.debug("row='%s' does not contain severity column", row)
915                 continue
916
917             if "#reject_media" in row and row["#reject_media"].lower() == "true":
918                 reject_media = True
919             elif "reject_media" in row and row["reject_media"].lower() == "true":
920                 reject_media = True
921
922             if "#reject_reports" in row and row["#reject_reports"].lower() == "true":
923                 reject_reports = True
924             elif "reject_reports" in row and row["reject_reports"].lower() == "true":
925                 reject_reports = True
926
927             cnt = cnt + 1
928             logger.debug("domain='%s',severity='%s',reject_media='%s',reject_reports='%s'", domain, severity, reject_media, reject_reports)
929             if domain == "":
930                 logger.debug("domain is empty - SKIPPED!")
931                 continue
932             elif not utils.is_domain_wanted(domain):
933                 logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
934                 continue
935
936             logger.debug("Marking domain='%s' as handled", domain)
937             domains.append(domain)
938
939             logger.debug("Processing domain='%s' ...", domain)
940             processed = utils.process_domain(domain, block["blocker"], inspect.currentframe().f_code.co_name)
941             logger.debug("processed='%s'", processed)
942
943             if utils.process_block(block["blocker"], domain, None, "reject") and config.get("bot_enabled"):
944                 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", domain, block["block_level"], block["blocker"])
945                 blockdict.append({
946                     "blocked": domain,
947                     "reason" : block["reason"],
948                 })
949
950             if reject_media:
951                 utils.process_block(block["blocker"], domain, None, "reject_media")
952             if reject_reports:
953                 utils.process_block(block["blocker"], domain, None, "reject_reports")
954
955         logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", block["blocker"], cnt)
956         instances.set_total_blocks(block["blocker"], cnt)
957
958         logger.debug("Checking if blocker='%s' has pending updates ...", block["blocker"])
959         if instances.has_pending(block["blocker"]):
960             logger.debug("Flushing updates for block[blocker]='%s' ...", block["blocker"])
961             instances.update_data(block["blocker"])
962
963         logger.debug("Invoking commit() ...")
964         database.connection.commit()
965
966         logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
967         if config.get("bot_enabled") and len(blockdict) > 0:
968             logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", block["blocker"], len(blockdict))
969             network.send_bot_post(block["blocker"], blockdict)
970
971     logger.debug("Success! - EXIT!")
972     return 0
973
974 def fetch_txt(args: argparse.Namespace) -> int:
975     logger.debug("args[]='%s' - CALLED!", type(args))
976     locking.acquire()
977
978     # Static URLs
979     urls = ({
980         "blocker": "seirdy.one",
981         "url"    : "https://seirdy.one/pb/bsl.txt",
982     },)
983
984     logger.info("Checking %d text file(s) ...", len(urls))
985     for row in urls:
986         logger.debug("Fetching row[url]='%s' ...", row["url"])
987         response = utils.fetch_url(row["url"], network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
988
989         logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
990         if response.ok and response.status_code < 300 and response.text != "":
991             logger.debug("Returned %d Bytes for processing", len(response.text.strip()))
992             domains = response.text.split("\n")
993
994             logger.info("Processing %d domains ...", len(domains))
995             for domain in domains:
996                 logger.debug("domain='%s' - BEFORE!", domain)
997                 domain = tidyup.domain(domain)
998
999                 logger.debug("domain='%s' - AFTER!", domain)
1000                 if domain == "":
1001                     logger.debug("domain is empty - SKIPPED!")
1002                     continue
1003                 elif not utils.is_domain_wanted(domain):
1004                     logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
1005                     continue
1006                 elif instances.is_recent(domain):
1007                     logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1008                     continue
1009
1010                 logger.debug("Processing domain='%s',row[blocker]='%s'", domain, row["blocker"])
1011                 processed = utils.process_domain(domain, row["blocker"], inspect.currentframe().f_code.co_name)
1012
1013                 logger.debug("processed='%s'", processed)
1014                 if not processed:
1015                     logger.debug("domain='%s' was not generically processed - SKIPPED!", domain)
1016                     continue
1017
1018     logger.debug("Success! - EXIT!")
1019     return 0
1020
1021 def fetch_fedipact(args: argparse.Namespace) -> int:
1022     logger.debug("args[]='%s' - CALLED!", type(args))
1023     locking.acquire()
1024
1025     response = utils.fetch_url("https://fedipact.online", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
1026
1027     logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
1028     if response.ok and response.status_code < 300 and response.text != "":
1029         logger.debug("Parsing %d Bytes ...", len(response.text))
1030
1031         doc = bs4.BeautifulSoup(response.text, "html.parser")
1032         logger.debug("doc[]='%s'", type(doc))
1033
1034         rows = doc.findAll("li")
1035         logger.info("Checking %d row(s) ...", len(rows))
1036         for row in rows:
1037             logger.debug("row[]='%s'", type(row))
1038             domain = tidyup.domain(row.contents[0])
1039
1040             logger.debug("domain='%s' - AFTER!", domain)
1041             if domain == "":
1042                 logger.debug("domain is empty - SKIPPED!")
1043                 continue
1044             elif not utils.is_domain_wanted(domain):
1045                 logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
1046                 continue
1047             elif instances.is_registered(domain):
1048                 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
1049                 continue
1050             elif instances.is_recent(domain):
1051                 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1052                 continue
1053
1054             logger.info("Fetching domain='%s' ...", domain)
1055             federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
1056
1057     logger.debug("Success! - EXIT!")
1058     return 0
1059
1060 def fetch_joinfediverse(args: argparse.Namespace) -> int:
1061     logger.debug("args[]='%s' - CALLED!", type(args))
1062     locking.acquire()
1063
1064     raw = utils.fetch_url("https://joinfediverse.wiki/FediBlock", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))).text
1065     logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
1066
1067     doc = bs4.BeautifulSoup(raw, "html.parser")
1068     logger.debug("doc[]='%s'", type(doc))
1069
1070     tables = doc.findAll("table", {"class": "wikitable"})
1071
1072     logger.info("Analyzing %d table(s) ...", len(tables))
1073     blocklist = list()
1074     for table in tables:
1075         logger.debug("table[]='%s'", type(table))
1076
1077         rows = table.findAll("tr")
1078         logger.info("Checking %d row(s) ...", len(rows))
1079         block_headers = dict()
1080         for row in rows:
1081             logger.debug("row[%s]='%s'", type(row), row)
1082
1083             headers = row.findAll("th")
1084             logger.debug("Found headers()=%d header(s)", len(headers))
1085             if len(headers) > 1:
1086                 block_headers = dict()
1087                 cnt = 0
1088                 for header in headers:
1089                     cnt = cnt + 1
1090                     logger.debug("header[]='%s',cnt=%d", type(header), cnt)
1091                     text = header.contents[0]
1092
1093                     logger.debug("text[]='%s'", type(text))
1094                     if not isinstance(text, str):
1095                         logger.debug("text[]='%s' is not 'str' - SKIPPED!", type(text))
1096                         continue
1097                     elif validators.domain(text.strip()):
1098                         logger.debug("text='%s' is a domain - SKIPPED!", text.strip())
1099                         continue
1100
1101                     text = tidyup.domain(text.strip())
1102                     logger.debug("text='%s'", text)
1103                     if text in ["domain", "instance", "subdomain(s)", "block reason(s)"]:
1104                         logger.debug("Found header: '%s'=%d", text, cnt)
1105                         block_headers[cnt] = text
1106
1107             elif len(block_headers) == 0:
1108                 logger.debug("row is not scrapable - SKIPPED!")
1109                 continue
1110             elif len(block_headers) > 0:
1111                 logger.debug("Found a row with %d scrapable headers ...", len(block_headers))
1112                 cnt = 0
1113                 block = dict()
1114
1115                 for element in row.find_all(["th", "td"]):
1116                     cnt = cnt + 1
1117                     logger.debug("element[]='%s',cnt=%d", type(element), cnt)
1118                     if cnt in block_headers:
1119                         logger.debug("block_headers[%d]='%s'", cnt, block_headers[cnt])
1120
1121                         text = element.text.strip()
1122                         key = block_headers[cnt] if block_headers[cnt] not in ["domain", "instance"] else "blocked"
1123
1124                         logger.debug("cnt=%d is wanted: key='%s',text[%s]='%s'", cnt, key, type(text), text)
1125                         if key in ["domain", "instance"]:
1126                             block[key] = text
1127                         elif key == "reason":
1128                             block[key] = tidyup.reason(text)
1129                         elif key == "subdomain(s)":
1130                             block[key] = list()
1131                             if text != "":
1132                                 block[key] = text.split("/")
1133                         else:
1134                             logger.debug("key='%s'", key)
1135                             block[key] = text
1136
1137                 logger.debug("block()=%d ...", len(block))
1138                 if len(block) > 0:
1139                     logger.debug("Appending block()=%d ...", len(block))
1140                     blocklist.append(block)
1141
1142     logger.debug("blocklist()=%d", len(blocklist))
1143
1144     database.cursor.execute("SELECT domain FROM instances WHERE domain LIKE 'climatejustice.%'")
1145     domains = database.cursor.fetchall()
1146
1147     logger.debug("domains(%d)[]='%s'", len(domains), type(domains))
1148     blocking = list()
1149     for block in blocklist:
1150         logger.debug("block='%s'", block)
1151         if "subdomain(s)" in block and len(block["subdomain(s)"]) > 0:
1152             origin = block["blocked"]
1153             for subdomain in block["subdomain(s)"]:
1154                 block["blocked"] = subdomain + "." + origin
1155                 blocking.append(block)
1156         else:
1157             blocking.append(block)
1158
1159     logger.debug("blocking()=%d", blocking)
1160     for block in blocking:
1161         logger.debug("block[]='%s'", type(block))
1162         block["blocked"] = tidyup.domain(block["blocked"])
1163
1164         logger.debug("block[blocked]='%s' - AFTER!", block["blocked"])
1165         if block["blocked"] == "":
1166             logger.debug("block[blocked] is empty - SKIPPED!")
1167             continue
1168         elif not utils.is_domain_wanted(block["blocked"]):
1169             logger.warning("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
1170             continue
1171         elif instances.is_recent(block["blocked"]):
1172             logger.debug("blocked='%s' has been recently checked - SKIPPED!", block["blocked"])
1173             continue
1174
1175         logger.info("Proccessing blocked='%s' ...", block["blocked"])
1176         utils.process_domain(block["blocked"], "climatejustice.social", inspect.currentframe().f_code.co_name)
1177
1178     blockdict = list()
1179     for blocker in domains:
1180         blocker = blocker[0]
1181         logger.debug("blocker[%s]='%s'", type(blocker), blocker)
1182
1183         for block in blocking:
1184             logger.debug("block[blocked]='%s',block[reason]='%s' - BEFORE!", block["blocked"], block["reason"])
1185             block["reason"] = tidyup.reason(block["block reason(s)"]) if "block reason(s)" in block else None
1186
1187             logger.debug("block[blocked]='%s',block[reason]='%s' - AFTER!", block["blocked"], block["reason"])
1188             if block["blocked"] == "":
1189                 logger.debug("block[blocked] is empty - SKIPPED!")
1190                 continue
1191             elif not utils.is_domain_wanted(block["blocked"]):
1192                 logger.warning("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
1193                 continue
1194
1195             logger.debug("blocked='%s',reason='%s'", block["blocked"], block["reason"])
1196             if utils.process_block(blocker, block["blocked"], block["reason"], "reject") and config.get("bot_enabled"):
1197                 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], blocker)
1198                 blockdict.append({
1199                     "blocked": block["blocked"],
1200                     "reason" : block["reason"],
1201                 })
1202
1203         if instances.has_pending(blocker):
1204             logger.debug("Flushing updates for blocker='%s' ...", blocker)
1205             instances.update_data(blocker)
1206
1207         logger.debug("Invoking commit() ...")
1208         database.connection.commit()
1209
1210         logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
1211         if config.get("bot_enabled") and len(blockdict) > 0:
1212             logger.info("Sending bot POST for blocker='%s,blockdict()=%d ...", blocker, len(blockdict))
1213             network.send_bot_post(blocker, blockdict)
1214
1215     logger.debug("Success! - EXIT!")
1216     return 0
1217
1218 def recheck_obfuscation(args: argparse.Namespace) -> int:
1219     logger.debug("args[]='%s' - CALLED!", type(args))
1220
1221     locking.acquire()
1222
1223     if isinstance(args.domain, str) and args.domain != "" and utils.is_domain_wanted(args.domain):
1224         database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1 AND domain = ?", [args.domain])
1225     elif isinstance(args.software, str) and args.software != "" and validators.domain(args.software) == args.software:
1226         database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1 AND software = ?", [args.software])
1227     else:
1228         database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1")
1229
1230     rows = database.cursor.fetchall()
1231     logger.info("Checking %d domains ...", len(rows))
1232     for row in rows:
1233         logger.debug("Fetching peers from domain='%s',software='%s',nodeinfo_url='%s' ...", row["domain"], row["software"], row["nodeinfo_url"])
1234         if (args.all is None or not args.all) and instances.is_recent(row["domain"]) and args.domain is None and args.software is None:
1235             logger.debug("row[domain]='%s' has been recently checked, args.all[]='%s' - SKIPPED!", row["domain"], type(args.all))
1236             continue
1237
1238         blocking = list()
1239         if row["software"] == "pleroma":
1240             logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1241             blocking = pleroma.fetch_blocks(row["domain"], row["nodeinfo_url"])
1242         elif row["software"] == "mastodon":
1243             logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1244             blocking = mastodon.fetch_blocks(row["domain"], row["nodeinfo_url"])
1245         elif row["software"] == "lemmy":
1246             logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1247             blocking = lemmy.fetch_blocks(row["domain"], row["nodeinfo_url"])
1248         elif row["software"] == "friendica":
1249             logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1250             blocking = friendica.fetch_blocks(row["domain"])
1251         elif row["software"] == "misskey":
1252             logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1253             blocking = misskey.fetch_blocks(row["domain"])
1254         else:
1255             logger.warning("Unknown sofware: domain='%s',software='%s'", row["domain"], row["software"])
1256
1257         logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", row["domain"], len(blocking))
1258         instances.set_total_blocks(row["domain"], blocking)
1259
1260         logger.info("Checking %d block(s) from domain='%s' ...", len(blocking), row["domain"])
1261         obfuscated = 0
1262         blockdict = list()
1263         for block in blocking:
1264             logger.debug("block[blocked]='%s'", block["blocked"])
1265             blocked = None
1266
1267             if block["blocked"] == "":
1268                 logger.debug("block[blocked] is empty - SKIPPED!")
1269                 continue
1270             elif block["blocked"].endswith(".arpa"):
1271                 logger.debug("blocked='%s' is a reversed IP address - SKIPPED!", block["blocked"])
1272                 continue
1273             elif block["blocked"].endswith(".tld"):
1274                 logger.debug("blocked='%s' is a fake domain name - SKIPPED!", block["blocked"])
1275                 continue
1276             elif block["blocked"].endswith(".onion"):
1277                 logger.debug("blocked='%s' is a TOR onion domain name - SKIPPED!", block["blocked"])
1278                 continue
1279             elif block["blocked"].find("*") >= 0 or block["blocked"].find("?") >= 0:
1280                 logger.debug("block='%s' is obfuscated.", block["blocked"])
1281                 obfuscated = obfuscated + 1
1282                 blocked = utils.deobfuscate_domain(block["blocked"], row["domain"], block["hash"] if "hash" in block else None)
1283             elif not utils.is_domain_wanted(block["blocked"]):
1284                 logger.warning("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
1285                 continue
1286             elif blocks.is_instance_blocked(row["domain"], block["blocked"]):
1287                 logger.debug("blocked='%s' is already blocked - SKIPPED!", block["blocked"])
1288                 continue
1289
1290             logger.debug("blocked[%s]='%s',block[blocked]='%s'", type(blocked), blocked, block["blocked"])
1291             if blocked is not None and blocked != block["blocked"]:
1292                 logger.debug("blocked='%s' was deobfuscated to blocked='%s'", block["blocked"], blocked)
1293                 obfuscated = obfuscated - 1
1294                 if blocks.is_instance_blocked(row["domain"], blocked):
1295                     logger.debug("blocked='%s' is already blocked by domain='%s' - SKIPPED!", blocked, row["domain"])
1296                     continue
1297
1298                 block["block_level"] = utils.alias_block_level(block["block_level"])
1299
1300                 logger.info("blocked='%s' has been deobfuscated to blocked='%s', adding ...", block["blocked"], blocked)
1301                 if utils.process_block(row["domain"], blocked, block["reason"], block["block_level"]) and block["block_level"] == "reject" and config.get("bot_enabled"):
1302                     logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], row["domain"])
1303                     blockdict.append({
1304                         "blocked": blocked,
1305                         "reason" : block["reason"],
1306                     })
1307
1308         logger.info("domain='%s' has %d obfuscated domain(s)", row["domain"], obfuscated)
1309         if obfuscated == 0 and len(blocking) > 0:
1310             logger.info("Block list from domain='%s' has been fully deobfuscated.", row["domain"])
1311             instances.set_has_obfuscation(row["domain"], False)
1312
1313         if instances.has_pending(row["domain"]):
1314             logger.debug("Flushing updates for blocker='%s' ...", row["domain"])
1315             instances.update_data(row["domain"])
1316
1317         logger.debug("Invoking commit() ...")
1318         database.connection.commit()
1319
1320         logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
1321         if config.get("bot_enabled") and len(blockdict) > 0:
1322             logger.info("Sending bot POST for blocker='%s,blockdict()=%d ...", row["domain"], len(blockdict))
1323             network.send_bot_post(row["domain"], blockdict)
1324
1325     logger.debug("Success! - EXIT!")
1326     return 0
1327
1328 def fetch_fedilist(args: argparse.Namespace) -> int:
1329     logger.debug("args[]='%s' - CALLED!", type(args))
1330
1331     url = "http://demo.fedilist.com/instance/csv?onion=not"
1332     if args.software is not None and args.software != "":
1333         logger.debug("args.software='%s'", args.software)
1334         url = f"http://demo.fedilist.com/instance/csv?software={args.software}&onion=not"
1335
1336     locking.acquire()
1337
1338     logger.info("Fetching url='%s' from fedilist.com ...", url)
1339     response = reqto.get(
1340         url,
1341         headers=network.web_headers,
1342         timeout=(config.get("connection_timeout"), config.get("read_timeout")),
1343         allow_redirects=False
1344     )
1345
1346     logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
1347     reader = csv.DictReader(response.content.decode('utf-8').splitlines(), dialect="unix")
1348
1349     logger.debug("reader[]='%s'", type(reader))
1350     blockdict = list()
1351     for row in reader:
1352         logger.debug("row[]='%s'", type(row))
1353         domain = tidyup.domain(row["hostname"])
1354         logger.debug("domain='%s' - AFTER!", domain)
1355
1356         if domain == "":
1357             logger.debug("domain is empty after tidyup: row[hostname]='%s' - SKIPPED!", row["hostname"])
1358             continue
1359         elif not utils.is_domain_wanted(domain):
1360             logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
1361             continue
1362         elif (args.all is None or not args.all) and instances.is_registered(domain):
1363             logger.debug("domain='%s' is already registered, --all not specified: args.all[]='%s'", type(args.all))
1364             continue
1365         elif instances.is_recent(domain):
1366             logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1367             continue
1368
1369         logger.info("Fetching instances from domain='%s' ...", domain)
1370         federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
1371
1372     logger.debug("Success! - EXIT!")
1373     return 0
1374
1375 def update_nodeinfo(args: argparse.Namespace) -> int:
1376     logger.debug("args[]='%s' - CALLED!", type(args))
1377
1378     locking.acquire()
1379
1380     if args.domain is not None and args.domain != "":
1381         logger.debug("Fetching args.domain='%s'", args.domain)
1382         database.cursor.execute("SELECT domain, software FROM instances WHERE domain = ?", [args.domain])
1383     elif args.software is not None and args.software != "":
1384         logger.info("Fetching domains for args.software='%s'", args.software)
1385         database.cursor.execute("SELECT domain, software FROM instances WHERE software = ?", [args.software])
1386     else:
1387         logger.info("Fetching domains for recently updated ...")
1388         database.cursor.execute("SELECT domain, software FROM instances WHERE last_nodeinfo < ? OR last_nodeinfo IS NULL", [time.time() - config.get("recheck_nodeinfo")])
1389
1390     domains = database.cursor.fetchall()
1391
1392     logger.info("Checking %d domain(s) ...", len(domains))
1393     for row in domains:
1394         logger.debug("row[]='%s'", type(row))
1395         try:
1396             logger.info("Checking nodeinfo for row[domain]='%s',row[software]='%s' ...", row["domain"], row["software"])
1397             software = federation.determine_software(row["domain"])
1398
1399             logger.debug("Determined software='%s'", software)
1400             if software != row["software"]:
1401                 logger.warning("Software type has changed from '%s' to '%s'!", row["software"], software)
1402                 instances.set_software(row["domain"], software)
1403
1404             instances.set_success(row["domain"])
1405         except network.exceptions as exception:
1406             logger.warning("Exception '%s' during updating nodeinfo for row[domain]='%s'", type(exception), row["domain"])
1407             instances.set_last_error(row["domain"], exception)
1408
1409         instances.set_last_nodeinfo(row["domain"])
1410         instances.update_data(row["domain"])
1411
1412     logger.debug("Success! - EXIT!")
1413     return 0