]> git.mxchange.org Git - fba.git/blob - fba/commands.py
Continued:
[fba.git] / fba / commands.py
1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
3 #
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
8 #
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 # GNU Affero General Public License for more details.
13 #
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program.  If not, see <https://www.gnu.org/licenses/>.
16
17 import csv
18 import inspect
19 import json
20 import logging
21 import time
22
23 import argparse
24 import atoma
25 import bs4
26 import markdown
27 import reqto
28 import validators
29
30 from fba import csrf
31 from fba import database
32 from fba import utils
33
34 from fba.helpers import blacklist
35 from fba.helpers import config
36 from fba.helpers import cookies
37 from fba.helpers import locking
38 from fba.helpers import software as software_helper
39 from fba.helpers import tidyup
40
41 from fba.http import federation
42 from fba.http import network
43
44 from fba.models import blocks
45 from fba.models import instances
46
47 from fba.networks import friendica
48 from fba.networks import lemmy
49 from fba.networks import mastodon
50 from fba.networks import misskey
51 from fba.networks import pleroma
52
53 logging.basicConfig(level=logging.INFO)
54 logger = logging.getLogger(__name__)
55 #logger.setLevel(logging.DEBUG)
56
57 def check_instance(args: argparse.Namespace) -> int:
58     logger.debug("args.domain='%s' - CALLED!", args.domain)
59     status = 0
60     if not validators.domain(args.domain):
61         logger.warning("args.domain='%s' is not valid", args.domain)
62         status = 100
63     elif blacklist.is_blacklisted(args.domain):
64         logger.warning("args.domain='%s' is blacklisted", args.domain)
65         status = 101
66     elif instances.is_registered(args.domain):
67         logger.warning("args.domain='%s' is already registered", args.domain)
68         status = 102
69     else:
70         logger.info("args.domain='%s' is not known", args.domain)
71
72     logger.debug("status=%d - EXIT!", status)
73     return status
74
75 def check_nodeinfo(args: argparse.Namespace) -> int:
76     logger.debug("args[]='%s' - CALLED!", type(args))
77
78     # Fetch rows
79     database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE nodeinfo_url IS NOT NULL ORDER BY domain ASC")
80
81     cnt = 0
82     for row in database.cursor.fetchall():
83         logger.debug("Checking row[domain]='%s',row[software]='%s',row[nodeinfo_url]='%s' ...", row["domain"], row["software"], row["nodeinfo_url"])
84         punycode = row["domain"].encode("idna").decode("utf-8")
85
86         if row["nodeinfo_url"].startswith("/"):
87             logger.debug("row[nodeinfo_url]='%s' is a relative URL and always matches", row["nodeinfo_url"])
88             continue
89         elif row["nodeinfo_url"].find(punycode) == -1 and row["nodeinfo_url"].find(row["domain"]) == -1:
90             logger.warning("punycode='%s' is not found in row[nodeinfo_url]='%s',row[software]='%s'", punycode, row["nodeinfo_url"], row["software"])
91             cnt = cnt + 1
92
93     logger.info("Found %d row(s)", cnt)
94
95     logger.debug("EXIT!")
96     return 0
97
98 def fetch_pixelfed_api(args: argparse.Namespace) -> int:
99     logger.debug("args[]='%s' - CALLED!", type(args))
100
101     # No CSRF by default, you don't have to add network.api_headers by yourself here
102     headers = tuple()
103
104     try:
105         logger.debug("Checking CSRF from pixelfed.org")
106         headers = csrf.determine("pixelfed.org", dict())
107     except network.exceptions as exception:
108         logger.warning("Exception '%s' during checking CSRF (fetch_peers,%s) - EXIT!", type(exception), __name__)
109         return list()
110
111     try:
112         logger.debug("Fetching JSON from pixelfed.org API, headers()=%d ...", len(headers))
113         fetched = network.get_json_api(
114             "pixelfed.org",
115             "/api/v1/servers/all.json?scope=All&country=all&language=all",
116             headers,
117             (config.get("connection_timeout"), config.get("read_timeout"))
118         )
119
120         logger.debug("JSON API returned %d elements", len(fetched))
121         if "error_message" in fetched:
122             logger.warning("API returned error_message='%s' - EXIT!", fetched["error_message"])
123             return 101
124         elif "data" not in fetched["json"]:
125             logger.warning("API did not return JSON with 'data' element - EXIT!")
126             return 102
127
128         rows = fetched["json"]["data"]
129         logger.info("Checking %d fetched rows ...", len(rows))
130         for row in rows:
131             logger.debug("row[]='%s'", type(row))
132             if "domain" not in row:
133                 logger.warning("row='%s' does not contain element 'domain' - SKIPPED!", row)
134                 continue
135             elif row["domain"] == "":
136                 logger.debug("row[domain] is empty - SKIPPED!")
137                 continue
138             elif not utils.is_domain_wanted(row["domain"]):
139                 logger.warning("row[domain]='%s' is not wanted - SKIPPED!", row["domain"])
140                 continue
141             elif instances.is_registered(row["domain"]):
142                 logger.debug("row[domain]='%s' is already registered - SKIPPED!", row["domain"])
143                 continue
144             elif instances.is_recent(row["domain"]):
145                 logger.debug("row[domain]='%s' has been recently crawled - SKIPPED!", row["domain"])
146                 continue
147
148             logger.debug("Fetching instances from row[domain]='%s' ...", row["domain"])
149             federation.fetch_instances(row["domain"], None, None, inspect.currentframe().f_code.co_name)
150
151     except network.exceptions as exception:
152         logger.warning("Cannot fetch graphql,exception[%s]:'%s' - EXIT!", type(exception), str(exception))
153         return 103
154
155     logger.debug("Success! - EXIT!")
156     return 0
157
158 def fetch_bkali(args: argparse.Namespace) -> int:
159     logger.debug("args[]='%s' - CALLED!", type(args))
160     domains = list()
161     try:
162         fetched = network.post_json_api("gql.api.bka.li", "/v1/graphql", json.dumps({
163             "query": "query domainlist {nodeinfo(order_by: {domain: asc}) {domain}}"
164         }))
165
166         logger.debug("fetched[]='%s'", type(fetched))
167         if "error_message" in fetched:
168             logger.warning("post_json_api() for 'gql.api.bka.li' returned error message='%s", fetched["error_message"])
169             return 100
170         elif isinstance(fetched["json"], dict) and "error" in fetched["json"] and "message" in fetched["json"]["error"]:
171             logger.warning("post_json_api() returned error: '%s", fetched["error"]["message"])
172             return 101
173
174         rows = fetched["json"]
175
176         logger.debug("rows(%d)[]='%s'", len(rows), type(rows))
177         if len(rows) == 0:
178             raise Exception("WARNING: Returned no records")
179         elif "data" not in rows:
180             raise Exception(f"WARNING: rows()={len(rows)} does not contain key 'data'")
181         elif "nodeinfo" not in rows["data"]:
182             raise Exception(f"WARNING: rows()={len(rows['data'])} does not contain key 'nodeinfo'")
183
184         for entry in rows["data"]["nodeinfo"]:
185             logger.debug("entry[%s]='%s'", type(entry), entry)
186             if "domain" not in entry:
187                 logger.warning("entry()=%d does not contain 'domain' - SKIPPED!", len(entry))
188                 continue
189             elif entry["domain"] == "":
190                 logger.debug("entry[domain] is empty - SKIPPED!")
191                 continue
192             elif not utils.is_domain_wanted(entry["domain"]):
193                 logger.warning("entry[domain]='%s' is not wanted - SKIPPED!", entry["domain"])
194                 continue
195             elif instances.is_registered(entry["domain"]):
196                 logger.debug("entry[domain]='%s' is already registered - SKIPPED!", entry["domain"])
197                 continue
198             elif instances.is_recent(entry["domain"]):
199                 logger.debug("entry[domain]='%s' has been recently crawled - SKIPPED!", entry["domain"])
200                 continue
201
202             logger.debug("Adding domain='%s' ...", entry["domain"])
203             domains.append(entry["domain"])
204
205     except network.exceptions as exception:
206         logger.warning("Cannot fetch graphql,exception[%s]:'%s' - EXIT!", type(exception), str(exception))
207         return 102
208
209     logger.debug("domains()=%d", len(domains))
210     if len(domains) > 0:
211         locking.acquire()
212
213         logger.info("Adding %d new instances ...", len(domains))
214         for domain in domains:
215             try:
216                 logger.info("Fetching instances from domain='%s' ...", domain)
217                 federation.fetch_instances(domain, 'tak.teleyal.blog', None, inspect.currentframe().f_code.co_name)
218             except network.exceptions as exception:
219                 logger.warning("Exception '%s' during fetching instances (fetch_bkali) from domain='%s'", type(exception), domain)
220                 instances.set_last_error(domain, exception)
221                 return 100
222
223     logger.debug("Success - EXIT!")
224     return 0
225
226 def fetch_blocks(args: argparse.Namespace) -> int:
227     logger.debug("args[]='%s' - CALLED!", type(args))
228     if args.domain is not None and args.domain != "":
229         logger.debug("args.domain='%s' - checking ...", args.domain)
230         if not validators.domain(args.domain):
231             logger.warning("args.domain='%s' is not valid.", args.domain)
232             return 100
233         elif blacklist.is_blacklisted(args.domain):
234             logger.warning("args.domain='%s' is blacklisted, won't check it!", args.domain)
235             return 101
236         elif not instances.is_registered(args.domain):
237             logger.warning("args.domain='%s' is not registered, please run ./utils.py fetch_instances '%s' first.", args.domain, args.domain)
238             return 102
239
240     locking.acquire()
241
242     if args.domain is not None and args.domain != "":
243         # Re-check single domain
244         logger.debug("Querying database for single args.domain='%s' ...", args.domain)
245         database.cursor.execute(
246             "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE domain = ?", [args.domain]
247         )
248     elif args.software is not None and args.software != "":
249         # Re-check single software
250         logger.debug("Querying database for args.software='%s' ...", args.software)
251         database.cursor.execute(
252             "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software = ? AND nodeinfo_url IS NOT NULL", [args.software]
253         )
254     else:
255         # Re-check after "timeout" (aka. minimum interval)
256         database.cursor.execute(
257             "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'lemmy', 'friendica', 'misskey') AND (last_blocked IS NULL OR last_blocked < ?) AND nodeinfo_url IS NOT NULL ORDER BY rowid DESC", [time.time() - config.get("recheck_block")]
258         )
259
260     rows = database.cursor.fetchall()
261     logger.info("Checking %d entries ...", len(rows))
262     for blocker, software, origin, nodeinfo_url in rows:
263         logger.debug("blocker='%s',software='%s',origin='%s',nodeinfo_url='%s'", blocker, software, origin, nodeinfo_url)
264         blocker = tidyup.domain(blocker)
265         logger.debug("blocker='%s' - AFTER!", blocker)
266
267         if blocker == "":
268             logger.warning("blocker is now empty!")
269             continue
270         elif nodeinfo_url is None or nodeinfo_url == "":
271             logger.debug("blocker='%s',software='%s' has empty nodeinfo_url", blocker, software)
272             continue
273         elif not utils.is_domain_wanted(blocker):
274             logger.warning("blocker='%s' is not wanted - SKIPPED!", blocker)
275             continue
276
277         logger.debug("blocker='%s'", blocker)
278         instances.set_last_blocked(blocker)
279         instances.set_has_obfuscation(blocker, False)
280
281         blocking = list()
282         blockdict = list()
283         if software == "pleroma":
284             logger.info("blocker='%s',software='%s'", blocker, software)
285             blocking = pleroma.fetch_blocks(blocker, nodeinfo_url)
286         elif software == "mastodon":
287             logger.info("blocker='%s',software='%s'", blocker, software)
288             blocking = mastodon.fetch_blocks(blocker, nodeinfo_url)
289         elif software == "lemmy":
290             logger.info("blocker='%s',software='%s'", blocker, software)
291             blocking = lemmy.fetch_blocks(blocker, nodeinfo_url)
292         elif software == "friendica":
293             logger.info("blocker='%s',software='%s'", blocker, software)
294             blocking = friendica.fetch_blocks(blocker)
295         elif software == "misskey":
296             logger.info("blocker='%s',software='%s'", blocker, software)
297             blocking = misskey.fetch_blocks(blocker)
298         else:
299             logger.warning("Unknown software: blocker='%s',software='%s'", blocker, software)
300
301         logger.info("Checking %d entries from blocker='%s',software='%s' ...", len(blocking), blocker, software)
302         blockdict = list()
303         for block in blocking:
304             logger.debug("blocked='%s',block_level='%s',reason='%s'", block["blocked"], block["block_level"], block["reason"])
305
306             if block["block_level"] == "":
307                 logger.warning("block_level is empty, blocker='%s',blocked='%s'", block["blocker"], block["blocked"])
308                 continue
309
310             logger.debug("blocked='%s',reason='%s' - BEFORE!", block["blocked"], block["reason"])
311             block["blocked"] = tidyup.domain(block["blocked"])
312             block["reason"]  = tidyup.reason(block["reason"]) if block["reason"] is not None and block["reason"] != "" else None
313             logger.debug("blocked='%s',reason='%s' - AFTER!", block["blocked"], block["reason"])
314
315             if block["blocked"] == "":
316                 logger.warning("blocked is empty, blocker='%s'", blocker)
317                 continue
318             elif block["blocked"].endswith(".onion"):
319                 logger.debug("blocked='%s' is a TOR .onion domain - SKIPPED", block["blocked"])
320                 continue
321             elif block["blocked"].endswith(".arpa"):
322                 logger.debug("blocked='%s' is a reverse IP address - SKIPPED", block["blocked"])
323                 continue
324             elif block["blocked"].endswith(".tld"):
325                 logger.debug("blocked='%s' is a fake domain - SKIPPED", block["blocked"])
326                 continue
327             elif block["blocked"].find("*") >= 0:
328                 logger.debug("blocker='%s' uses obfuscated domains", blocker)
329
330                 # Some friendica servers also obscure domains without hash
331                 row = instances.deobfuscate("*", block["blocked"], block["hash"] if "hash" in block else None)
332
333                 logger.debug("row[]='%s'", type(row))
334                 if row is None:
335                     logger.warning("Cannot deobfuscate blocked='%s',blocker='%s',software='%s' - SKIPPED!", block["blocked"], blocker, software)
336                     instances.set_has_obfuscation(blocker, True)
337                     continue
338
339                 block["blocked"] = row["domain"]
340                 origin           = row["origin"]
341                 nodeinfo_url     = row["nodeinfo_url"]
342             elif block["blocked"].find("?") >= 0:
343                 logger.debug("blocker='%s' uses obfuscated domains", blocker)
344
345                 # Some obscure them with question marks, not sure if that's dependent on version or not
346                 row = instances.deobfuscate("?", block["blocked"], block["hash"] if "hash" in block else None)
347
348                 logger.debug("row[]='%s'", type(row))
349                 if row is None:
350                     logger.warning("Cannot deobfuscate blocked='%s',blocker='%s',software='%s' - SKIPPED!", block["blocked"], blocker, software)
351                     instances.set_has_obfuscation(blocker, True)
352                     continue
353
354                 block["blocked"] = row["domain"]
355                 origin           = row["origin"]
356                 nodeinfo_url     = row["nodeinfo_url"]
357
358             logger.debug("Looking up instance by domainm, blocked='%s'", block["blocked"])
359             if block["blocked"] == "":
360                 logger.debug("block[blocked] is empty - SKIPPED!")
361                 continue
362             elif not utils.is_domain_wanted(block["blocked"]):
363                 logger.warning("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
364                 continue
365             elif block["block_level"] in ["accept", "accepted"]:
366                 logger.debug("blocked='%s' is accepted, not wanted here - SKIPPED!", block["blocked"])
367                 continue
368             elif not instances.is_registered(block["blocked"]):
369                 logger.debug("Hash wasn't found, adding: blocked='%s',blocker='%s'", block["blocked"], blocker)
370                 federation.fetch_instances(block["blocked"], blocker, None, inspect.currentframe().f_code.co_name)
371
372             block["block_level"] = utils.alias_block_level(block["block_level"])
373
374             if utils.process_block(blocker, block["blocked"], block["reason"], block["block_level"]) and block["block_level"] == "reject" and config.get("bot_enabled"):
375                 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], blocker)
376                 blockdict.append({
377                     "blocked": block["blocked"],
378                     "reason" : block["reason"],
379                 })
380
381             logger.debug("Invoking cookies.clear(%s) ...", block["blocked"])
382             cookies.clear(block["blocked"])
383
384         logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
385         if instances.has_pending(blocker):
386             logger.debug("Flushing updates for blocker='%s' ...", blocker)
387             instances.update_data(blocker)
388
389         logger.debug("Invoking commit() ...")
390         database.connection.commit()
391
392         logger.debug("Invoking cookies.clear(%s) ...", blocker)
393         cookies.clear(blocker)
394
395         logger.debug("config.get(bot_enabled)='%s',blockdict()=%d'", config.get("bot_enabled"), len(blockdict))
396         if config.get("bot_enabled") and len(blockdict) > 0:
397             logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
398             network.send_bot_post(blocker, blockdict)
399
400     logger.debug("Success! - EXIT!")
401     return 0
402
403 def fetch_observer(args: argparse.Namespace) -> int:
404     logger.debug("args[]='%s' - CALLED!", type(args))
405
406     # Acquire lock
407     locking.acquire()
408
409     types = list()
410     if args.software is None:
411         logger.info("Fetching software list ...")
412         raw = utils.fetch_url(
413             "https://fediverse.observer",
414             network.web_headers,
415             (config.get("connection_timeout"), config.get("read_timeout"))
416         ).text
417         logger.debug("raw[%s]()=%d", type(raw), len(raw))
418
419         doc = bs4.BeautifulSoup(raw, features="html.parser")
420         logger.debug("doc[]='%s'", type(doc))
421
422         items = doc.find("div", {"aria-labelledby": "navbarDropdownMenuSoftwares"}).findAll("a", {"class": "dropdown-item"})
423         logger.debug("items[]='%s'", type(items))
424
425         logger.info("Checking %d menu items ...", len(items))
426         for item in items:
427             logger.debug("item[%s]='%s'", type(item), item)
428             if item.text.lower() == "all":
429                 logger.debug("Skipping 'All' menu entry ...")
430                 continue
431
432             logger.debug("Appending item.text='%s' ...", item.text)
433             types.append(tidyup.domain(item.text))
434     else:
435         logger.info("Adding args.software='%s' as type ...", args.software)
436         types.append(args.software)
437
438     logger.info("Fetching %d different table data ...", len(types))
439     for software in types:
440         logger.debug("software='%s' - BEFORE!", software)
441         if args.software is not None and args.software != software:
442             logger.debug("args.software='%s' does not match software='%s' - SKIPPED!", args.software, software)
443             continue
444
445         doc = None
446         try:
447             logger.debug("Fetching table data for software='%s' ...", software)
448             raw = utils.fetch_url(
449                 f"https://fediverse.observer/app/views/tabledata.php?software={software}",
450                 network.web_headers,
451                 (config.get("connection_timeout"), config.get("read_timeout"))
452             ).text
453             logger.debug("raw[%s]()=%d", type(raw), len(raw))
454
455             doc = bs4.BeautifulSoup(raw, features="html.parser")
456             logger.debug("doc[]='%s'", type(doc))
457         except network.exceptions as exception:
458             logger.warning("Cannot fetch software='%s' from fediverse.observer: '%s'", software, type(exception))
459             continue
460
461         items = doc.findAll("a", {"class": "url"})
462         logger.info("Checking %d items,software='%s' ...", len(items), software)
463         for item in items:
464             logger.debug("item[]='%s'", type(item))
465             domain = item.decode_contents()
466
467             logger.debug("domain='%s' - AFTER!", domain)
468             if domain == "":
469                 logger.debug("domain is empty - SKIPPED!")
470                 continue
471             elif not utils.is_domain_wanted(domain):
472                 logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
473                 continue
474             elif instances.is_registered(domain):
475                 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
476                 continue
477             elif instances.is_recent(domain):
478                 logger.debug("domain='%s' is recently being handled - SKIPPED!", domain)
479                 continue
480
481             software = software_helper.alias(software)
482             logger.info("Fetching instances for domain='%s'", domain)
483             federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
484
485     logger.debug("Success! - EXIT!")
486     return 0
487
488 def fetch_todon_wiki(args: argparse.Namespace) -> int:
489     logger.debug("args[]='%s' - CALLED!", type(args))
490
491     locking.acquire()
492     blocklist = {
493         "silenced": list(),
494         "reject": list(),
495     }
496
497     raw = utils.fetch_url("https://wiki.todon.eu/todon/domainblocks", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))).text
498     logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
499
500     doc = bs4.BeautifulSoup(raw, "html.parser")
501     logger.debug("doc[]='%s'", type(doc))
502
503     silenced = doc.find("h3", {"id": "silencedlimited_servers"}).find_next("ul").findAll("li")
504     logger.info("Checking %d silenced/limited entries ...", len(silenced))
505     blocklist["silenced"] = utils.find_domains(silenced, "div")
506
507     suspended = doc.find("h3", {"id": "suspended_servers"}).find_next("ul").findAll("li")
508     logger.info("Checking %d suspended entries ...", len(suspended))
509     blocklist["reject"] = utils.find_domains(suspended, "div")
510
511     blockdict = list()
512     for block_level in blocklist:
513         blockers = blocklist[block_level]
514
515         logger.debug("block_level='%s',blockers()=%d'", block_level, len(blockers))
516         for blocked in blockers:
517             logger.debug("blocked='%s'", blocked)
518
519             if not instances.is_registered(blocked):
520                 try:
521                     logger.info("Fetching instances from domain='%s' ...", blocked)
522                     federation.fetch_instances(blocked, 'chaos.social', None, inspect.currentframe().f_code.co_name)
523                 except network.exceptions as exception:
524                     logger.warning("Exception '%s' during fetching instances (fetch_cs) from blocked='%s'", type(exception), blocked)
525                     instances.set_last_error(blocked, exception)
526
527             if blocks.is_instance_blocked("todon.eu", blocked, block_level):
528                 logger.debug("blocked='%s',block_level='%s' is already blocked - SKIPPED!", blocked, block_level)
529                 continue
530
531             logger.info("Adding new block: blocked='%s',block_level='%s'", blocked, block_level)
532             if utils.process_block("todon.eu", blocked, None, block_level) and block_level == "reject" and config.get("bot_enabled"):
533                 logger.debug("Appending blocked='%s',reason='%s' for blocker='todon.eu' ...", blocked, block_level)
534                 blockdict.append({
535                     "blocked": blocked,
536                     "reason" : None,
537                 })
538
539         logger.debug("Invoking commit() ...")
540         database.connection.commit()
541
542         logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
543         if config.get("bot_enabled") and len(blockdict) > 0:
544             logger.info("Sending bot POST for blocker='todon.eu',blockdict()=%d ...", len(blockdict))
545             network.send_bot_post("todon.eu", blockdict)
546
547     logger.debug("Success! - EXIT!")
548     return 0
549
550 def fetch_cs(args: argparse.Namespace):
551     logger.debug("args[]='%s' - CALLED!", type(args))
552     extensions = [
553         "extra",
554         "abbr",
555         "attr_list",
556         "def_list",
557         "fenced_code",
558         "footnotes",
559         "md_in_html",
560         "admonition",
561         "codehilite",
562         "legacy_attrs",
563         "legacy_em",
564         "meta",
565         "nl2br",
566         "sane_lists",
567         "smarty",
568         "toc",
569         "wikilinks"
570     ]
571
572     domains = {
573         "silenced": list(),
574         "reject"  : list(),
575     }
576
577     raw = utils.fetch_url("https://raw.githubusercontent.com/chaossocial/meta/master/federation.md", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))).text
578     logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
579
580     doc = bs4.BeautifulSoup(markdown.markdown(raw, extensions=extensions), features="html.parser")
581     logger.debug("doc()=%d[]='%s'", len(doc), type(doc))
582
583     silenced = doc.find("h2", {"id": "silenced-instances"}).findNext("table").find("tbody")
584     logger.debug("silenced[%s]()=%d", type(silenced), len(silenced))
585     domains["silenced"] = federation.find_domains(silenced)
586
587     blocked = doc.find("h2", {"id": "blocked-instances"}).findNext("table").find("tbody")
588     logger.debug("blocked[%s]()=%d", type(blocked), len(blocked))
589     domains["reject"] = federation.find_domains(blocked)
590
591     logger.debug("domains[silenced]()=%d,domains[reject]()=%d", len(domains["silenced"]), len(domains["reject"]))
592     blockdict = list()
593     if len(domains) > 0:
594         locking.acquire()
595
596         for block_level in domains:
597             logger.info("block_level='%s' has %d row(s)", block_level, len(domains[block_level]))
598
599             for row in domains[block_level]:
600                 logger.debug("row[%s]='%s'", type(row), row)
601                 if instances.is_recent(row["domain"], "last_blocked"):
602                     logger.debug("row[domain]='%s' has been recently crawled - SKIPPED!", row["domain"])
603                     continue
604                 elif not instances.is_registered(row["domain"]):
605                     try:
606                         logger.info("Fetching instances from domain='%s' ...", row["domain"])
607                         federation.fetch_instances(row["domain"], 'chaos.social', None, inspect.currentframe().f_code.co_name)
608                     except network.exceptions as exception:
609                         logger.warning("Exception '%s' during fetching instances (fetch_cs) from row[domain]='%s'", type(exception), row["domain"])
610                         instances.set_last_error(row["domain"], exception)
611
612                 if utils.process_block("chaos.social", row["domain"], row["reason"], block_level) and block_level == "reject" and config.get("bot_enabled"):
613                     logger.debug("Appending blocked='%s',reason='%s' for blocker='chaos.social' ...", row["domain"], block_level)
614                     blockdict.append({
615                         "blocked": row["domain"],
616                         "reason" : row["reason"],
617                     })
618
619         logger.debug("Invoking commit() ...")
620         database.connection.commit()
621
622         logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
623         if config.get("bot_enabled") and len(blockdict) > 0:
624             logger.info("Sending bot POST for blocker='chaos.social',blockdict()=%d ...", len(blockdict))
625             network.send_bot_post("chaos.social", blockdict)
626
627     logger.debug("Success! - EXIT!")
628     return 0
629
630 def fetch_fba_rss(args: argparse.Namespace) -> int:
631     logger.debug("args[]='%s' - CALLED!", type(args))
632     domains = list()
633
634     logger.info("Fetch FBA-specific RSS args.feed='%s' ...", args.feed)
635     response = utils.fetch_url(args.feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
636
637     logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
638     if response.ok and response.status_code < 300 and len(response.text) > 0:
639         logger.debug("Parsing RSS feed (%d Bytes) ...", len(response.text))
640         rss = atoma.parse_rss_bytes(response.content)
641
642         logger.debug("rss[]='%s'", type(rss))
643         for item in rss.items:
644             logger.debug("item='%s'", item)
645             domain = tidyup.domain(item.link.split("=")[1])
646
647             logger.debug("domain='%s' - AFTER!", domain)
648             if domain == "":
649                 logger.debug("domain is empty - SKIPPED!")
650                 continue
651             elif not utils.is_domain_wanted(domain):
652                 logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
653                 continue
654             elif domain in domains:
655                 logger.debug("domain='%s' is already added - SKIPPED!", domain)
656                 continue
657             elif instances.is_registered(domain):
658                 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
659                 continue
660             elif instances.is_recent(domain):
661                 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
662                 continue
663
664             logger.debug("Adding domain='%s'", domain)
665             domains.append(domain)
666
667     logger.debug("domains()=%d", len(domains))
668     if len(domains) > 0:
669         locking.acquire()
670
671         logger.info("Adding %d new instances ...", len(domains))
672         for domain in domains:
673             try:
674                 logger.info("Fetching instances from domain='%s' ...", domain)
675                 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
676             except network.exceptions as exception:
677                 logger.warning("Exception '%s' during fetching instances (fetch_fba_rss) from domain='%s'", type(exception), domain)
678                 instances.set_last_error(domain, exception)
679                 return 100
680
681     logger.debug("Success! - EXIT!")
682     return 0
683
684 def fetch_fbabot_atom(args: argparse.Namespace) -> int:
685     logger.debug("args[]='%s' - CALLED!", type(args))
686     feed = "https://ryona.agency/users/fba/feed.atom"
687
688     domains = list()
689
690     logger.info("Fetching ATOM feed='%s' from FBA bot account ...", feed)
691     response = utils.fetch_url(feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
692
693     logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
694     if response.ok and response.status_code < 300 and len(response.text) > 0:
695         logger.debug("Parsing ATOM feed (%d Bytes) ...", len(response.text))
696         atom = atoma.parse_atom_bytes(response.content)
697
698         logger.debug("atom[]='%s'", type(atom))
699         for entry in atom.entries:
700             logger.debug("entry[]='%s'", type(entry))
701             doc = bs4.BeautifulSoup(entry.content.value, "html.parser")
702             logger.debug("doc[]='%s'", type(doc))
703             for element in doc.findAll("a"):
704                 logger.debug("element[]='%s'", type(element))
705                 for href in element["href"].split(","):
706                     logger.debug("href[%s]='%s' - BEFORE!", type(href), href)
707                     domain = tidyup.domain(href)
708
709                     logger.debug("domain='%s' - AFTER!", domain)
710                     if domain == "":
711                         logger.debug("domain is empty - SKIPPED!")
712                         continue
713                     elif not utils.is_domain_wanted(domain):
714                         logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
715                         continue
716                     elif domain in domains:
717                         logger.debug("domain='%s' is already added - SKIPPED!", domain)
718                         continue
719                     elif instances.is_registered(domain):
720                         logger.debug("domain='%s' is already registered - SKIPPED!", domain)
721                         continue
722                     elif instances.is_recent(domain):
723                         logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
724                         continue
725
726                     logger.debug("Adding domain='%s',domains()=%d", domain, len(domains))
727                     domains.append(domain)
728
729     logger.debug("domains()=%d", len(domains))
730     if len(domains) > 0:
731         locking.acquire()
732
733         logger.info("Adding %d new instances ...", len(domains))
734         for domain in domains:
735             try:
736                 logger.info("Fetching instances from domain='%s' ...", domain)
737                 federation.fetch_instances(domain, "ryona.agency", None, inspect.currentframe().f_code.co_name)
738             except network.exceptions as exception:
739                 logger.warning("Exception '%s' during fetching instances (fetch_fbabot_atom) from domain='%s'", type(exception), domain)
740                 instances.set_last_error(domain, exception)
741                 return 100
742
743     logger.debug("Success! - EXIT!")
744     return 0
745
746 def fetch_instances(args: argparse.Namespace) -> int:
747     logger.debug("args[]='%s' - CALLED!", type(args))
748     locking.acquire()
749
750     # Initial fetch
751     try:
752         logger.info("Fetching instances from args.domain='%s' ...", args.domain)
753         federation.fetch_instances(args.domain, None, None, inspect.currentframe().f_code.co_name)
754     except network.exceptions as exception:
755         logger.warning("Exception '%s' during fetching instances (fetch_instances) from args.domain='%s'", type(exception), args.domain)
756         instances.set_last_error(args.domain, exception)
757         instances.update_data(args.domain)
758         return 100
759
760     if args.single:
761         logger.debug("Not fetching more instances - EXIT!")
762         return 0
763
764     # Loop through some instances
765     database.cursor.execute(
766         "SELECT domain, origin, software, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'lemmy', 'peertube', 'takahe') AND (last_instance_fetch IS NULL OR last_instance_fetch < ?) ORDER BY rowid DESC", [time.time() - config.get("recheck_instance")]
767     )
768
769     rows = database.cursor.fetchall()
770     logger.info("Checking %d entries ...", len(rows))
771     for row in rows:
772         logger.debug("row[domain]='%s'", row["domain"])
773         if row["domain"] == "":
774             logger.debug("row[domain] is empty - SKIPPED!")
775             continue
776         elif not utils.is_domain_wanted(row["domain"]):
777             logger.warning("Domain row[domain]='%s' is not wanted - SKIPPED!", row["domain"])
778             continue
779
780         try:
781             logger.info("Fetching instances for domain='%s',origin='%s',software='%s',nodeinfo_url='%s'", row["domain"], row["origin"], row["software"], row["nodeinfo_url"])
782             federation.fetch_instances(row["domain"], row["origin"], row["software"], inspect.currentframe().f_code.co_name, row["nodeinfo_url"])
783         except network.exceptions as exception:
784             logger.warning("Exception '%s' during fetching instances (fetch_instances) from row[domain]='%s'", type(exception), row["domain"])
785             instances.set_last_error(row["domain"], exception)
786
787     logger.debug("Success - EXIT!")
788     return 0
789
790 def fetch_oliphant(args: argparse.Namespace) -> int:
791     logger.debug("args[]='%s' - CALLED!", type(args))
792     locking.acquire()
793
794     # Base URL
795     base_url = "https://codeberg.org/oliphant/blocklists/raw/branch/main/blocklists"
796
797     # URLs to fetch
798     blocklists = (
799         {
800             "blocker": "artisan.chat",
801             "csv_url": "mastodon/artisan.chat.csv",
802         },{
803             "blocker": "mastodon.art",
804             "csv_url": "mastodon/mastodon.art.csv",
805         },{
806             "blocker": "pleroma.envs.net",
807             "csv_url": "mastodon/pleroma.envs.net.csv",
808         },{
809             "blocker": "oliphant.social",
810             "csv_url": "mastodon/_unified_tier3_blocklist.csv",
811         },{
812             "blocker": "mastodon.online",
813             "csv_url": "mastodon/mastodon.online.csv",
814         },{
815             "blocker": "mastodon.social",
816             "csv_url": "mastodon/mastodon.social.csv",
817         },{
818             "blocker": "mastodon.social",
819             "csv_url": "other/missing-tier0-mastodon.social.csv",
820         },{
821             "blocker": "rage.love",
822             "csv_url": "mastodon/rage.love.csv",
823         },{
824             "blocker": "sunny.garden",
825             "csv_url": "mastodon/sunny.garden.csv",
826         },{
827             "blocker": "solarpunk.moe",
828             "csv_url": "mastodon/solarpunk.moe.csv",
829         },{
830             "blocker": "toot.wales",
831             "csv_url": "mastodon/toot.wales.csv",
832         },{
833             "blocker": "union.place",
834             "csv_url": "mastodon/union.place.csv",
835         }
836     )
837
838     domains = list()
839
840     logger.debug("Downloading %d files ...", len(blocklists))
841     for block in blocklists:
842         # Is domain given and not equal blocker?
843         if isinstance(args.domain, str) and args.domain != block["blocker"]:
844             logger.debug("Skipping blocker='%s', not matching args.domain='%s'", block["blocker"], args.domain)
845             continue
846         elif args.domain in domains:
847             logger.debug("args.domain='%s' already handled - SKIPPED!", args.domain)
848             continue
849         elif instances.is_recent(block["blocker"]):
850             logger.debug("block[blocker]='%s' has been recently crawled - SKIPPED!", block["blocker"])
851             continue
852
853         # Fetch this URL
854         logger.info("Fetching csv_url='%s' for blocker='%s' ...", block["csv_url"], block["blocker"])
855         response = utils.fetch_url(f"{base_url}/{block['csv_url']}", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
856
857         logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
858         if not response.ok or response.status_code > 399 or response.content == "":
859             logger.warning("Could not fetch csv_url='%s' for blocker='%s' - SKIPPED!", block["csv_url"], block["blocker"])
860             continue
861
862         logger.debug("Fetched %d Bytes, parsing CSV ...", len(response.content))
863         reader = csv.DictReader(response.content.decode('utf-8').splitlines(), dialect="unix")
864
865         logger.debug("reader[]='%s'", type(reader))
866         blockdict = list()
867         for row in reader:
868             logger.debug("row[%s]='%s'", type(row), row)
869             domain = severity = None
870             reject_media = reject_reports = False
871             if "#domain" in row:
872                 domain = row["#domain"]
873             elif "domain" in row:
874                 domain = row["domain"]
875             else:
876                 logger.debug("row='%s' does not contain domain column", row)
877                 continue
878
879             if "#severity" in row:
880                 severity = row["#severity"]
881             elif "severity" in row:
882                 severity = row["severity"]
883             else:
884                 logger.debug("row='%s' does not contain severity column", row)
885                 continue
886
887             if "#reject_media" in row and row["#reject_media"].lower() == "true":
888                 reject_media = True
889             elif "reject_media" in row and row["reject_media"].lower() == "true":
890                 reject_media = True
891
892             if "#reject_reports" in row and row["#reject_reports"].lower() == "true":
893                 reject_reports = True
894             elif "reject_reports" in row and row["reject_reports"].lower() == "true":
895                 reject_reports = True
896
897             logger.debug("domain='%s',severity='%s',reject_media='%s',reject_reports='%s'", domain, severity, reject_media, reject_reports)
898             if domain == "":
899                 logger.debug("domain is empty - SKIPPED!")
900                 continue
901             elif not utils.is_domain_wanted(domain):
902                 logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
903                 continue
904
905             logger.debug("Marking domain='%s' as handled", domain)
906             domains.append(domain)
907
908             logger.debug("Processing domain='%s' ...", domain)
909             processed = utils.process_domain(domain, block["blocker"], inspect.currentframe().f_code.co_name)
910             logger.debug("processed='%s'", processed)
911
912             if utils.process_block(block["blocker"], domain, None, "reject") and config.get("bot_enabled"):
913                 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", domain, block["block_level"], block["blocker"])
914                 blockdict.append({
915                     "blocked": domain,
916                     "reason" : block["reason"],
917                 })
918
919             if reject_media:
920                 utils.process_block(block["blocker"], domain, None, "reject_media")
921             if reject_reports:
922                 utils.process_block(block["blocker"], domain, None, "reject_reports")
923
924         logger.debug("Invoking commit() ...")
925         database.connection.commit()
926
927         logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
928         if config.get("bot_enabled") and len(blockdict) > 0:
929             logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", block["blocker"], len(blockdict))
930             network.send_bot_post(block["blocker"], blockdict)
931
932     logger.debug("Success! - EXIT!")
933     return 0
934
935 def fetch_txt(args: argparse.Namespace) -> int:
936     logger.debug("args[]='%s' - CALLED!", type(args))
937     locking.acquire()
938
939     # Static URLs
940     urls = ({
941         "blocker": "seirdy.one",
942         "url"    : "https://seirdy.one/pb/bsl.txt",
943     },)
944
945     logger.info("Checking %d text file(s) ...", len(urls))
946     for row in urls:
947         logger.debug("Fetching row[url]='%s' ...", row["url"])
948         response = utils.fetch_url(row["url"], network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
949
950         logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
951         if response.ok and response.status_code < 300 and response.text != "":
952             logger.debug("Returned %d Bytes for processing", len(response.text.strip()))
953             domains = response.text.split("\n")
954
955             logger.info("Processing %d domains ...", len(domains))
956             for domain in domains:
957                 logger.debug("domain='%s' - BEFORE!", domain)
958                 domain = tidyup.domain(domain)
959
960                 logger.debug("domain='%s' - AFTER!", domain)
961                 if domain == "":
962                     logger.debug("domain is empty - SKIPPED!")
963                     continue
964                 elif not utils.is_domain_wanted(domain):
965                     logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
966                     continue
967                 elif instances.is_recent(domain):
968                     logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
969                     continue
970
971                 logger.debug("Processing domain='%s',row[blocker]='%s'", domain, row["blocker"])
972                 processed = utils.process_domain(domain, row["blocker"], inspect.currentframe().f_code.co_name)
973
974                 logger.debug("processed='%s'", processed)
975                 if not processed:
976                     logger.debug("domain='%s' was not generically processed - SKIPPED!", domain)
977                     continue
978
979     logger.debug("Success! - EXIT!")
980     return 0
981
982 def fetch_fedipact(args: argparse.Namespace) -> int:
983     logger.debug("args[]='%s' - CALLED!", type(args))
984     locking.acquire()
985
986     response = utils.fetch_url("https://fedipact.online", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
987
988     logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
989     if response.ok and response.status_code < 300 and response.text != "":
990         logger.debug("Parsing %d Bytes ...", len(response.text))
991
992         doc = bs4.BeautifulSoup(response.text, "html.parser")
993         logger.debug("doc[]='%s'", type(doc))
994
995         rows = doc.findAll("li")
996         logger.info("Checking %d row(s) ...", len(rows))
997         for row in rows:
998             logger.debug("row[]='%s'", type(row))
999             domain = tidyup.domain(row.contents[0])
1000
1001             logger.debug("domain='%s' - AFTER!", domain)
1002             if domain == "":
1003                 logger.debug("domain is empty - SKIPPED!")
1004                 continue
1005             elif not utils.is_domain_wanted(domain):
1006                 logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
1007                 continue
1008             elif instances.is_registered(domain):
1009                 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
1010                 continue
1011             elif instances.is_recent(domain):
1012                 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1013                 continue
1014
1015             logger.info("Fetching domain='%s' ...", domain)
1016             federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
1017
1018     logger.debug("Success! - EXIT!")
1019     return 0
1020
1021 def fetch_joinfediverse(args: argparse.Namespace) -> int:
1022     logger.debug("args[]='%s' - CALLED!", type(args))
1023     locking.acquire()
1024
1025     raw = utils.fetch_url("https://joinfediverse.wiki/FediBlock", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))).text
1026     logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
1027
1028     doc = bs4.BeautifulSoup(raw, "html.parser")
1029     logger.debug("doc[]='%s'", type(doc))
1030
1031     tables = doc.findAll("table", {"class": "wikitable"})
1032
1033     logger.info("Analyzing %d table(s) ...", len(tables))
1034     blocklist = list()
1035     for table in tables:
1036         logger.debug("table[]='%s'", type(table))
1037
1038         rows = table.findAll("tr")
1039         logger.info("Checking %d row(s) ...", len(rows))
1040         block_headers = dict()
1041         for row in rows:
1042             logger.debug("row[%s]='%s'", type(row), row)
1043
1044             headers = row.findAll("th")
1045             logger.debug("Found headers()=%d header(s)", len(headers))
1046             if len(headers) > 1:
1047                 block_headers = dict()
1048                 cnt = 0
1049                 for header in headers:
1050                     cnt = cnt + 1
1051                     logger.debug("header[]='%s',cnt=%d", type(header), cnt)
1052                     text = header.contents[0]
1053
1054                     logger.debug("text[]='%s'", type(text))
1055                     if not isinstance(text, str):
1056                         logger.debug("text[]='%s' is not 'str' - SKIPPED!", type(text))
1057                         continue
1058                     elif validators.domain(text.strip()):
1059                         logger.debug("text='%s' is a domain - SKIPPED!", text.strip())
1060                         continue
1061
1062                     text = tidyup.domain(text.strip())
1063                     logger.debug("text='%s'", text)
1064                     if text in ["domain", "instance", "subdomain(s)", "block reason(s)"]:
1065                         logger.debug("Found header: '%s'=%d", text, cnt)
1066                         block_headers[cnt] = text
1067
1068             elif len(block_headers) == 0:
1069                 logger.debug("row is not scrapable - SKIPPED!")
1070                 continue
1071             elif len(block_headers) > 0:
1072                 logger.debug("Found a row with %d scrapable headers ...", len(block_headers))
1073                 cnt = 0
1074                 block = dict()
1075
1076                 for element in row.find_all(["th", "td"]):
1077                     cnt = cnt + 1
1078                     logger.debug("element[]='%s',cnt=%d", type(element), cnt)
1079                     if cnt in block_headers:
1080                         logger.debug("block_headers[%d]='%s'", cnt, block_headers[cnt])
1081
1082                         text = element.text.strip()
1083                         key = block_headers[cnt] if block_headers[cnt] not in ["domain", "instance"] else "blocked"
1084
1085                         logger.debug("cnt=%d is wanted: key='%s',text[%s]='%s'", cnt, key, type(text), text)
1086                         if key in ["domain", "instance"]:
1087                             block[key] = text
1088                         elif key == "reason":
1089                             block[key] = tidyup.reason(text)
1090                         elif key == "subdomain(s)":
1091                             block[key] = list()
1092                             if text != "":
1093                                 block[key] = text.split("/")
1094                         else:
1095                             logger.debug("key='%s'", key)
1096                             block[key] = text
1097
1098                 logger.debug("block()=%d ...", len(block))
1099                 if len(block) > 0:
1100                     logger.debug("Appending block()=%d ...", len(block))
1101                     blocklist.append(block)
1102
1103     logger.debug("blocklist()=%d", len(blocklist))
1104
1105     database.cursor.execute("SELECT domain FROM instances WHERE domain LIKE 'climatejustice.%'")
1106     domains = database.cursor.fetchall()
1107
1108     logger.debug("domains(%d)[]='%s'", len(domains), type(domains))
1109     blocking = list()
1110     for block in blocklist:
1111         logger.debug("block='%s'", block)
1112         if "subdomain(s)" in block and len(block["subdomain(s)"]) > 0:
1113             origin = block["blocked"]
1114             for subdomain in block["subdomain(s)"]:
1115                 block["blocked"] = subdomain + "." + origin
1116                 blocking.append(block)
1117         else:
1118             blocking.append(block)
1119
1120     logger.debug("blocking()=%d", blocking)
1121     for block in blocking:
1122         logger.debug("block[]='%s'", type(block))
1123         block["blocked"] = tidyup.domain(block["blocked"])
1124
1125         logger.debug("block[blocked]='%s' - AFTER!", block["blocked"])
1126         if block["blocked"] == "":
1127             logger.debug("block[blocked] is empty - SKIPPED!")
1128             continue
1129         elif not utils.is_domain_wanted(block["blocked"]):
1130             logger.warning("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
1131             continue
1132         elif instances.is_recent(block["blocked"]):
1133             logger.debug("blocked='%s' has been recently checked - SKIPPED!", block["blocked"])
1134             continue
1135
1136         logger.info("Proccessing blocked='%s' ...", block["blocked"])
1137         utils.process_domain(block["blocked"], "climatejustice.social", inspect.currentframe().f_code.co_name)
1138
1139     blockdict = list()
1140     for blocker in domains:
1141         blocker = blocker[0]
1142         logger.debug("blocker[%s]='%s'", type(blocker), blocker)
1143
1144         for block in blocking:
1145             logger.debug("block[blocked]='%s',block[reason]='%s' - BEFORE!", block["blocked"], block["reason"])
1146             block["reason"] = tidyup.reason(block["block reason(s)"]) if "block reason(s)" in block else None
1147
1148             logger.debug("block[blocked]='%s',block[reason]='%s' - AFTER!", block["blocked"], block["reason"])
1149             if block["blocked"] == "":
1150                 logger.debug("block[blocked] is empty - SKIPPED!")
1151                 continue
1152             elif not utils.is_domain_wanted(block["blocked"]):
1153                 logger.warning("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
1154                 continue
1155
1156             logger.debug("blocked='%s',reason='%s'", block["blocked"], block["reason"])
1157             if utils.process_block(blocker, block["blocked"], block["reason"], "reject") and config.get("bot_enabled"):
1158                 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], blocker)
1159                 blockdict.append({
1160                     "blocked": block["blocked"],
1161                     "reason" : block["reason"],
1162                 })
1163
1164         if instances.has_pending(blocker):
1165             logger.debug("Flushing updates for blocker='%s' ...", blocker)
1166             instances.update_data(blocker)
1167
1168         logger.debug("Invoking commit() ...")
1169         database.connection.commit()
1170
1171         logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
1172         if config.get("bot_enabled") and len(blockdict) > 0:
1173             logger.info("Sending bot POST for blocker='%s,blockdict()=%d ...", blocker, len(blockdict))
1174             network.send_bot_post(blocker, blockdict)
1175
1176     logger.debug("Success! - EXIT!")
1177     return 0
1178
1179 def recheck_obfuscation(args: argparse.Namespace) -> int:
1180     logger.debug("args[]='%s' - CALLED!", type(args))
1181
1182     locking.acquire()
1183
1184     if isinstance(args.domain, str) and args.domain != "" and utils.is_domain_wanted(args.domain):
1185         database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1 AND domain = ?", [args.domain])
1186     elif isinstance(args.software, str) and args.software != "" and validators.domain(args.software) == args.software:
1187         database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1 AND software = ?", [args.software])
1188     else:
1189         database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1")
1190
1191     rows = database.cursor.fetchall()
1192     logger.info("Checking %d domains ...", len(rows))
1193     for row in rows:
1194         logger.debug("Fetching peers from domain='%s',software='%s',nodeinfo_url='%s' ...", row["domain"], row["software"], row["nodeinfo_url"])
1195
1196         blocking = list()
1197         if row["software"] == "pleroma":
1198             logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1199             blocking = pleroma.fetch_blocks(row["domain"], row["nodeinfo_url"])
1200         elif row["software"] == "mastodon":
1201             logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1202             blocking = mastodon.fetch_blocks(row["domain"], row["nodeinfo_url"])
1203         elif row["software"] == "lemmy":
1204             logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1205             blocking = lemmy.fetch_blocks(row["domain"], row["nodeinfo_url"])
1206         elif row["software"] == "friendica":
1207             logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1208             blocking = friendica.fetch_blocks(row["domain"])
1209         elif row["software"] == "misskey":
1210             logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1211             blocking = misskey.fetch_blocks(row["domain"])
1212         else:
1213             logger.warning("Unknown sofware: domain='%s',software='%s'", row["domain"], row["software"])
1214
1215         logger.info("Checking %d block(s) from domain='%s' ...", len(blocking), row["domain"])
1216         obfuscated = 0
1217         blockdict = list()
1218         for block in blocking:
1219             logger.debug("block[blocked]='%s'", block["blocked"])
1220             blocked = None
1221
1222             if block["blocked"] == "":
1223                 logger.debug("block[blocked] is empty - SKIPPED!")
1224                 continue
1225             elif block["blocked"].endswith(".arpa"):
1226                 logger.debug("blocked='%s' is a reversed IP address - SKIPPED!", block["blocked"])
1227                 continue
1228             elif block["blocked"].endswith(".tld"):
1229                 logger.debug("blocked='%s' is a fake domain name - SKIPPED!", block["blocked"])
1230                 continue
1231             elif block["blocked"].endswith(".onion"):
1232                 logger.debug("blocked='%s' is a TOR onion domain name - SKIPPED!", block["blocked"])
1233                 continue
1234             elif block["blocked"].find("*") >= 0 or block["blocked"].find("?") >= 0:
1235                 logger.debug("block='%s' is obfuscated.", block["blocked"])
1236                 obfuscated = obfuscated + 1
1237                 blocked = utils.deobfuscate_domain(block["blocked"], row["domain"], block["hash"] if "hash" in block else None)
1238             elif not utils.is_domain_wanted(block["blocked"]):
1239                 logger.warning("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
1240                 continue
1241             elif blocks.is_instance_blocked(row["domain"], block["blocked"]):
1242                 logger.debug("blocked='%s' is already blocked - SKIPPED!", block["blocked"])
1243                 continue
1244
1245             logger.debug("blocked[%s]='%s',block[blocked]='%s'", type(blocked), blocked, block["blocked"])
1246             if blocked is not None and blocked != block["blocked"]:
1247                 logger.debug("blocked='%s' was deobfuscated to blocked='%s'", block["blocked"], blocked)
1248                 obfuscated = obfuscated - 1
1249                 if blocks.is_instance_blocked(row["domain"], blocked):
1250                     logger.debug("blocked='%s' is already blocked by domain='%s' - SKIPPED!", blocked, row["domain"])
1251                     continue
1252
1253                 block["block_level"] = utils.alias_block_level(block["block_level"])
1254
1255                 logger.info("blocked='%s' has been deobfuscated to blocked='%s', adding ...", block["blocked"], blocked)
1256                 if utils.process_block(row["domain"], blocked, block["reason"], block["block_level"]) and block["block_level"] == "reject" and config.get("bot_enabled"):
1257                     logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], row["domain"])
1258                     blockdict.append({
1259                         "blocked": blocked,
1260                         "reason" : block["reason"],
1261                     })
1262
1263         logger.info("domain='%s' has %d obfuscated domain(s)", row["domain"], obfuscated)
1264         if obfuscated == 0 and len(blocking) > 0:
1265             logger.info("Block list from domain='%s' has been fully deobfuscated.", row["domain"])
1266             instances.set_has_obfuscation(row["domain"], False)
1267
1268         if instances.has_pending(row["domain"]):
1269             logger.debug("Flushing updates for blocker='%s' ...", row["domain"])
1270             instances.update_data(row["domain"])
1271
1272         logger.debug("Invoking commit() ...")
1273         database.connection.commit()
1274
1275         logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
1276         if config.get("bot_enabled") and len(blockdict) > 0:
1277             logger.info("Sending bot POST for blocker='%s,blockdict()=%d ...", row["domain"], len(blockdict))
1278             network.send_bot_post(row["domain"], blockdict)
1279
1280     logger.debug("Success! - EXIT!")
1281     return 0
1282
1283 def fetch_fedilist(args: argparse.Namespace) -> int:
1284     logger.debug("args[]='%s' - CALLED!", type(args))
1285
1286     url = "http://demo.fedilist.com/instance/csv?onion=not"
1287     if args.software is not None and args.software != "":
1288         logger.debug("args.software='%s'", args.software)
1289         url = f"http://demo.fedilist.com/instance/csv?software={args.software}&onion=not"
1290
1291     locking.acquire()
1292
1293     logger.info("Fetching url='%s' from fedilist.com ...", url)
1294     response = reqto.get(
1295         url,
1296         headers=network.web_headers,
1297         timeout=(config.get("connection_timeout"), config.get("read_timeout")),
1298         allow_redirects=False
1299     )
1300
1301     logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
1302     reader = csv.DictReader(response.content.decode('utf-8').splitlines(), dialect="unix")
1303
1304     logger.debug("reader[]='%s'", type(reader))
1305     blockdict = list()
1306     for row in reader:
1307         logger.debug("row[]='%s'", type(row))
1308         domain = tidyup.domain(row["hostname"])
1309         logger.debug("domain='%s' - AFTER!", domain)
1310
1311         if domain == "":
1312             logger.debug("domain is empty after tidyup: row[hostname]='%s' - SKIPPED!", row["hostname"])
1313             continue
1314         elif not utils.is_domain_wanted(domain):
1315             logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
1316             continue
1317         elif instances.is_recent(domain):
1318             logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1319             continue
1320
1321         logger.info("Fetching instances from domain='%s' ...", domain)
1322         federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
1323
1324     logger.debug("Success! - EXIT!")
1325     return 0