]> git.mxchange.org Git - fba.git/blob - fba/commands.py
9f5b50962b882f9605d543f6c329782f5c343375
[fba.git] / fba / commands.py
1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
3 #
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
8 #
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 # GNU Affero General Public License for more details.
13 #
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program.  If not, see <https://www.gnu.org/licenses/>.
16
17 import csv
18 import inspect
19 import json
20 import logging
21 import time
22
23 import argparse
24 import atoma
25 import bs4
26 import markdown
27 import reqto
28 import validators
29
30 from fba import csrf
31 from fba import database
32 from fba import utils
33
34 from fba.helpers import blacklist
35 from fba.helpers import config
36 from fba.helpers import cookies
37 from fba.helpers import locking
38 from fba.helpers import software as software_helper
39 from fba.helpers import tidyup
40
41 from fba.http import federation
42 from fba.http import network
43
44 from fba.models import blocks
45 from fba.models import instances
46
47 from fba.networks import friendica
48 from fba.networks import lemmy
49 from fba.networks import mastodon
50 from fba.networks import misskey
51 from fba.networks import pleroma
52
53 logging.basicConfig(level=logging.INFO)
54 logger = logging.getLogger(__name__)
55 #logger.setLevel(logging.DEBUG)
56
57 def check_instance(args: argparse.Namespace) -> int:
58     logger.debug("args.domain='%s' - CALLED!", args.domain)
59     status = 0
60     if not validators.domain(args.domain):
61         logger.warning("args.domain='%s' is not valid", args.domain)
62         status = 100
63     elif blacklist.is_blacklisted(args.domain):
64         logger.warning("args.domain='%s' is blacklisted", args.domain)
65         status = 101
66     elif instances.is_registered(args.domain):
67         logger.warning("args.domain='%s' is already registered", args.domain)
68         status = 102
69     else:
70         logger.info("args.domain='%s' is not known", args.domain)
71
72     logger.debug("status=%d - EXIT!", status)
73     return status
74
75 def check_nodeinfo(args: argparse.Namespace) -> int:
76     logger.debug("args[]='%s' - CALLED!", type(args))
77
78     # Fetch rows
79     database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE nodeinfo_url IS NOT NULL ORDER BY domain ASC")
80
81     cnt = 0
82     for row in database.cursor.fetchall():
83         logger.debug("Checking row[domain]='%s',row[software]='%s',row[nodeinfo_url]='%s' ...", row["domain"], row["software"], row["nodeinfo_url"])
84         punycode = row["domain"].encode("idna").decode("utf-8")
85
86         if row["nodeinfo_url"].startswith("/"):
87             logger.debug("row[nodeinfo_url]='%s' is a relative URL and always matches", row["nodeinfo_url"])
88             continue
89         elif row["nodeinfo_url"].find(punycode) == -1 and row["nodeinfo_url"].find(row["domain"]) == -1:
90             logger.warning("punycode='%s' is not found in row[nodeinfo_url]='%s',row[software]='%s'", punycode, row["nodeinfo_url"], row["software"])
91             cnt = cnt + 1
92
93     logger.info("Found %d row(s)", cnt)
94
95     logger.debug("EXIT!")
96     return 0
97
98 def fetch_pixelfed_api(args: argparse.Namespace) -> int:
99     logger.debug("args[]='%s' - CALLED!", type(args))
100
101     # No CSRF by default, you don't have to add network.api_headers by yourself here
102     headers = tuple()
103
104     try:
105         logger.debug("Checking CSRF from pixelfed.org")
106         headers = csrf.determine("pixelfed.org", dict())
107     except network.exceptions as exception:
108         logger.warning("Exception '%s' during checking CSRF (fetch_peers,%s) - EXIT!", type(exception), __name__)
109         return list()
110
111     try:
112         logger.debug("Fetching JSON from pixelfed.org API, headers()=%d ...", len(headers))
113         fetched = network.get_json_api(
114             "pixelfed.org",
115             "/api/v1/servers/all.json?scope=All&country=all&language=all",
116             headers,
117             (config.get("connection_timeout"), config.get("read_timeout"))
118         )
119
120         logger.debug("JSON API returned %d elements", len(fetched))
121         if "error_message" in fetched:
122             logger.warning("API returned error_message='%s' - EXIT!", fetched["error_message"])
123             return 101
124         elif "data" not in fetched["json"]:
125             logger.warning("API did not return JSON with 'data' element - EXIT!")
126             return 102
127
128         rows = fetched["json"]["data"]
129         logger.info("Checking %d fetched rows ...", len(rows))
130         for row in rows:
131             logger.debug("row[]='%s'", type(row))
132             if "domain" not in row:
133                 logger.warning("row='%s' does not contain element 'domain' - SKIPPED!", row)
134                 continue
135             elif row["domain"] == "":
136                 logger.debug("row[domain] is empty - SKIPPED!")
137                 continue
138             elif not utils.is_domain_wanted(row["domain"]):
139                 logger.warning("row[domain]='%s' is not wanted - SKIPPED!", row["domain"])
140                 continue
141             elif instances.is_registered(row["domain"]):
142                 logger.debug("row[domain]='%s' is already registered - SKIPPED!", row["domain"])
143                 continue
144             elif instances.is_recent(row["domain"]):
145                 logger.debug("row[domain]='%s' has been recently crawled - SKIPPED!", row["domain"])
146                 continue
147
148             logger.debug("Fetching instances from row[domain]='%s' ...", row["domain"])
149             federation.fetch_instances(row["domain"], None, None, inspect.currentframe().f_code.co_name)
150
151     except network.exceptions as exception:
152         logger.warning("Cannot fetch graphql,exception[%s]:'%s' - EXIT!", type(exception), str(exception))
153         return 103
154
155     logger.debug("Success! - EXIT!")
156     return 0
157
158 def fetch_bkali(args: argparse.Namespace) -> int:
159     logger.debug("args[]='%s' - CALLED!", type(args))
160     domains = list()
161     try:
162         fetched = network.post_json_api("gql.api.bka.li", "/v1/graphql", json.dumps({
163             "query": "query domainlist {nodeinfo(order_by: {domain: asc}) {domain}}"
164         }))
165
166         logger.debug("fetched[]='%s'", type(fetched))
167         if "error_message" in fetched:
168             logger.warning("post_json_api() for 'gql.api.bka.li' returned error message='%s", fetched["error_message"])
169             return 100
170         elif isinstance(fetched["json"], dict) and "error" in fetched["json"] and "message" in fetched["json"]["error"]:
171             logger.warning("post_json_api() returned error: '%s", fetched["error"]["message"])
172             return 101
173
174         rows = fetched["json"]
175
176         logger.debug("rows(%d)[]='%s'", len(rows), type(rows))
177         if len(rows) == 0:
178             raise Exception("WARNING: Returned no records")
179         elif "data" not in rows:
180             raise Exception(f"WARNING: rows()={len(rows)} does not contain key 'data'")
181         elif "nodeinfo" not in rows["data"]:
182             raise Exception(f"WARNING: rows()={len(rows['data'])} does not contain key 'nodeinfo'")
183
184         for entry in rows["data"]["nodeinfo"]:
185             logger.debug("entry[%s]='%s'", type(entry), entry)
186             if "domain" not in entry:
187                 logger.warning("entry()=%d does not contain 'domain' - SKIPPED!", len(entry))
188                 continue
189             elif entry["domain"] == "":
190                 logger.debug("entry[domain] is empty - SKIPPED!")
191                 continue
192             elif not utils.is_domain_wanted(entry["domain"]):
193                 logger.warning("entry[domain]='%s' is not wanted - SKIPPED!", entry["domain"])
194                 continue
195             elif instances.is_registered(entry["domain"]):
196                 logger.debug("entry[domain]='%s' is already registered - SKIPPED!", entry["domain"])
197                 continue
198             elif instances.is_recent(entry["domain"]):
199                 logger.debug("entry[domain]='%s' has been recently crawled - SKIPPED!", entry["domain"])
200                 continue
201
202             logger.debug("Adding domain='%s' ...", entry["domain"])
203             domains.append(entry["domain"])
204
205     except network.exceptions as exception:
206         logger.warning("Cannot fetch graphql,exception[%s]:'%s' - EXIT!", type(exception), str(exception))
207         return 102
208
209     logger.debug("domains()=%d", len(domains))
210     if len(domains) > 0:
211         locking.acquire()
212
213         logger.info("Adding %d new instances ...", len(domains))
214         for domain in domains:
215             try:
216                 logger.info("Fetching instances from domain='%s' ...", domain)
217                 federation.fetch_instances(domain, 'tak.teleyal.blog', None, inspect.currentframe().f_code.co_name)
218             except network.exceptions as exception:
219                 logger.warning("Exception '%s' during fetching instances (fetch_bkali) from domain='%s'", type(exception), domain)
220                 instances.set_last_error(domain, exception)
221                 return 100
222
223     logger.debug("Success - EXIT!")
224     return 0
225
226 def fetch_blocks(args: argparse.Namespace) -> int:
227     logger.debug("args[]='%s' - CALLED!", type(args))
228     if args.domain is not None and args.domain != "":
229         logger.debug("args.domain='%s' - checking ...", args.domain)
230         if not validators.domain(args.domain):
231             logger.warning("args.domain='%s' is not valid.", args.domain)
232             return 100
233         elif blacklist.is_blacklisted(args.domain):
234             logger.warning("args.domain='%s' is blacklisted, won't check it!", args.domain)
235             return 101
236         elif not instances.is_registered(args.domain):
237             logger.warning("args.domain='%s' is not registered, please run ./utils.py fetch_instances '%s' first.", args.domain, args.domain)
238             return 102
239
240     locking.acquire()
241
242     if args.domain is not None and args.domain != "":
243         # Re-check single domain
244         logger.debug("Querying database for single args.domain='%s' ...", args.domain)
245         database.cursor.execute(
246             "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE domain = ?", [args.domain]
247         )
248     elif args.software is not None and args.software != "":
249         # Re-check single software
250         logger.debug("Querying database for args.software='%s' ...", args.software)
251         database.cursor.execute(
252             "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software = ? AND nodeinfo_url IS NOT NULL", [args.software]
253         )
254     else:
255         # Re-check after "timeout" (aka. minimum interval)
256         database.cursor.execute(
257             "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'lemmy', 'friendica', 'misskey') AND (last_blocked IS NULL OR last_blocked < ?) AND nodeinfo_url IS NOT NULL ORDER BY rowid DESC", [time.time() - config.get("recheck_block")]
258         )
259
260     rows = database.cursor.fetchall()
261     logger.info("Checking %d entries ...", len(rows))
262     for blocker, software, origin, nodeinfo_url in rows:
263         logger.debug("blocker='%s',software='%s',origin='%s',nodeinfo_url='%s'", blocker, software, origin, nodeinfo_url)
264         blocker = tidyup.domain(blocker)
265         logger.debug("blocker='%s' - AFTER!", blocker)
266
267         if blocker == "":
268             logger.warning("blocker is now empty!")
269             continue
270         elif nodeinfo_url is None or nodeinfo_url == "":
271             logger.debug("blocker='%s',software='%s' has empty nodeinfo_url", blocker, software)
272             continue
273         elif not utils.is_domain_wanted(blocker):
274             logger.warning("blocker='%s' is not wanted - SKIPPED!", blocker)
275             continue
276
277         logger.debug("blocker='%s'", blocker)
278         instances.set_last_blocked(blocker)
279         instances.set_has_obfuscation(blocker, False)
280
281         blocking = list()
282         blockdict = list()
283         if software == "pleroma":
284             logger.info("blocker='%s',software='%s'", blocker, software)
285             blocking = pleroma.fetch_blocks(blocker, nodeinfo_url)
286         elif software == "mastodon":
287             logger.info("blocker='%s',software='%s'", blocker, software)
288             blocking = mastodon.fetch_blocks(blocker, nodeinfo_url)
289         elif software == "lemmy":
290             logger.info("blocker='%s',software='%s'", blocker, software)
291             blocking = lemmy.fetch_blocks(blocker, nodeinfo_url)
292         elif software == "friendica":
293             logger.info("blocker='%s',software='%s'", blocker, software)
294             blocking = friendica.fetch_blocks(blocker)
295         elif software == "misskey":
296             logger.info("blocker='%s',software='%s'", blocker, software)
297             blocking = misskey.fetch_blocks(blocker)
298         else:
299             logger.warning("Unknown software: blocker='%s',software='%s'", blocker, software)
300
301         logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
302         instances.set_total_blocks(blocker, blocking)
303
304         logger.info("Checking %d entries from blocker='%s',software='%s' ...", len(blocking), blocker, software)
305         blockdict = list()
306         for block in blocking:
307             logger.debug("blocked='%s',block_level='%s',reason='%s'", block["blocked"], block["block_level"], block["reason"])
308
309             if block["block_level"] == "":
310                 logger.warning("block_level is empty, blocker='%s',blocked='%s'", block["blocker"], block["blocked"])
311                 continue
312
313             logger.debug("blocked='%s',reason='%s' - BEFORE!", block["blocked"], block["reason"])
314             block["blocked"] = tidyup.domain(block["blocked"])
315             block["reason"]  = tidyup.reason(block["reason"]) if block["reason"] is not None and block["reason"] != "" else None
316             logger.debug("blocked='%s',reason='%s' - AFTER!", block["blocked"], block["reason"])
317
318             if block["blocked"] == "":
319                 logger.warning("blocked is empty, blocker='%s'", blocker)
320                 continue
321             elif block["blocked"].endswith(".onion"):
322                 logger.debug("blocked='%s' is a TOR .onion domain - SKIPPED", block["blocked"])
323                 continue
324             elif block["blocked"].endswith(".arpa"):
325                 logger.debug("blocked='%s' is a reverse IP address - SKIPPED", block["blocked"])
326                 continue
327             elif block["blocked"].endswith(".tld"):
328                 logger.debug("blocked='%s' is a fake domain - SKIPPED", block["blocked"])
329                 continue
330             elif block["blocked"].find("*") >= 0:
331                 logger.debug("blocker='%s' uses obfuscated domains", blocker)
332
333                 # Some friendica servers also obscure domains without hash
334                 row = instances.deobfuscate("*", block["blocked"], block["hash"] if "hash" in block else None)
335
336                 logger.debug("row[]='%s'", type(row))
337                 if row is None:
338                     logger.warning("Cannot deobfuscate blocked='%s',blocker='%s',software='%s' - SKIPPED!", block["blocked"], blocker, software)
339                     instances.set_has_obfuscation(blocker, True)
340                     continue
341
342                 block["blocked"] = row["domain"]
343                 origin           = row["origin"]
344                 nodeinfo_url     = row["nodeinfo_url"]
345             elif block["blocked"].find("?") >= 0:
346                 logger.debug("blocker='%s' uses obfuscated domains", blocker)
347
348                 # Some obscure them with question marks, not sure if that's dependent on version or not
349                 row = instances.deobfuscate("?", block["blocked"], block["hash"] if "hash" in block else None)
350
351                 logger.debug("row[]='%s'", type(row))
352                 if row is None:
353                     logger.warning("Cannot deobfuscate blocked='%s',blocker='%s',software='%s' - SKIPPED!", block["blocked"], blocker, software)
354                     instances.set_has_obfuscation(blocker, True)
355                     continue
356
357                 block["blocked"] = row["domain"]
358                 origin           = row["origin"]
359                 nodeinfo_url     = row["nodeinfo_url"]
360
361             logger.debug("Looking up instance by domainm, blocked='%s'", block["blocked"])
362             if block["blocked"] == "":
363                 logger.debug("block[blocked] is empty - SKIPPED!")
364                 continue
365             elif not utils.is_domain_wanted(block["blocked"]):
366                 logger.warning("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
367                 continue
368             elif block["block_level"] in ["accept", "accepted"]:
369                 logger.debug("blocked='%s' is accepted, not wanted here - SKIPPED!", block["blocked"])
370                 continue
371             elif not instances.is_registered(block["blocked"]):
372                 logger.debug("Hash wasn't found, adding: blocked='%s',blocker='%s'", block["blocked"], blocker)
373                 federation.fetch_instances(block["blocked"], blocker, None, inspect.currentframe().f_code.co_name)
374
375             block["block_level"] = utils.alias_block_level(block["block_level"])
376
377             if utils.process_block(blocker, block["blocked"], block["reason"], block["block_level"]) and block["block_level"] == "reject" and config.get("bot_enabled"):
378                 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], blocker)
379                 blockdict.append({
380                     "blocked": block["blocked"],
381                     "reason" : block["reason"],
382                 })
383
384             logger.debug("Invoking cookies.clear(%s) ...", block["blocked"])
385             cookies.clear(block["blocked"])
386
387         logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
388         if instances.has_pending(blocker):
389             logger.debug("Flushing updates for blocker='%s' ...", blocker)
390             instances.update_data(blocker)
391
392         logger.debug("Invoking commit() ...")
393         database.connection.commit()
394
395         logger.debug("Invoking cookies.clear(%s) ...", blocker)
396         cookies.clear(blocker)
397
398         logger.debug("config.get(bot_enabled)='%s',blockdict()=%d'", config.get("bot_enabled"), len(blockdict))
399         if config.get("bot_enabled") and len(blockdict) > 0:
400             logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
401             network.send_bot_post(blocker, blockdict)
402
403     logger.debug("Success! - EXIT!")
404     return 0
405
406 def fetch_observer(args: argparse.Namespace) -> int:
407     logger.debug("args[]='%s' - CALLED!", type(args))
408
409     # Acquire lock
410     locking.acquire()
411
412     types = list()
413     if args.software is None:
414         logger.info("Fetching software list ...")
415         raw = utils.fetch_url(
416             "https://fediverse.observer",
417             network.web_headers,
418             (config.get("connection_timeout"), config.get("read_timeout"))
419         ).text
420         logger.debug("raw[%s]()=%d", type(raw), len(raw))
421
422         doc = bs4.BeautifulSoup(raw, features="html.parser")
423         logger.debug("doc[]='%s'", type(doc))
424
425         items = doc.find("div", {"aria-labelledby": "navbarDropdownMenuSoftwares"}).findAll("a", {"class": "dropdown-item"})
426         logger.debug("items[]='%s'", type(items))
427
428         logger.info("Checking %d menu items ...", len(items))
429         for item in items:
430             logger.debug("item[%s]='%s'", type(item), item)
431             if item.text.lower() == "all":
432                 logger.debug("Skipping 'All' menu entry ...")
433                 continue
434
435             logger.debug("Appending item.text='%s' ...", item.text)
436             types.append(tidyup.domain(item.text))
437     else:
438         logger.info("Adding args.software='%s' as type ...", args.software)
439         types.append(args.software)
440
441     logger.info("Fetching %d different table data ...", len(types))
442     for software in types:
443         logger.debug("software='%s' - BEFORE!", software)
444         if args.software is not None and args.software != software:
445             logger.debug("args.software='%s' does not match software='%s' - SKIPPED!", args.software, software)
446             continue
447
448         doc = None
449         try:
450             logger.debug("Fetching table data for software='%s' ...", software)
451             raw = utils.fetch_url(
452                 f"https://fediverse.observer/app/views/tabledata.php?software={software}",
453                 network.web_headers,
454                 (config.get("connection_timeout"), config.get("read_timeout"))
455             ).text
456             logger.debug("raw[%s]()=%d", type(raw), len(raw))
457
458             doc = bs4.BeautifulSoup(raw, features="html.parser")
459             logger.debug("doc[]='%s'", type(doc))
460         except network.exceptions as exception:
461             logger.warning("Cannot fetch software='%s' from fediverse.observer: '%s'", software, type(exception))
462             continue
463
464         items = doc.findAll("a", {"class": "url"})
465         logger.info("Checking %d items,software='%s' ...", len(items), software)
466         for item in items:
467             logger.debug("item[]='%s'", type(item))
468             domain = item.decode_contents()
469
470             logger.debug("domain='%s' - AFTER!", domain)
471             if domain == "":
472                 logger.debug("domain is empty - SKIPPED!")
473                 continue
474             elif not utils.is_domain_wanted(domain):
475                 logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
476                 continue
477             elif instances.is_registered(domain):
478                 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
479                 continue
480             elif instances.is_recent(domain):
481                 logger.debug("domain='%s' is recently being handled - SKIPPED!", domain)
482                 continue
483
484             software = software_helper.alias(software)
485             logger.info("Fetching instances for domain='%s'", domain)
486             federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
487
488     logger.debug("Success! - EXIT!")
489     return 0
490
491 def fetch_todon_wiki(args: argparse.Namespace) -> int:
492     logger.debug("args[]='%s' - CALLED!", type(args))
493
494     locking.acquire()
495     blocklist = {
496         "silenced": list(),
497         "reject": list(),
498     }
499
500     raw = utils.fetch_url("https://wiki.todon.eu/todon/domainblocks", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))).text
501     logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
502
503     doc = bs4.BeautifulSoup(raw, "html.parser")
504     logger.debug("doc[]='%s'", type(doc))
505
506     silenced = doc.find("h3", {"id": "silencedlimited_servers"}).find_next("ul").findAll("li")
507     logger.info("Checking %d silenced/limited entries ...", len(silenced))
508     blocklist["silenced"] = utils.find_domains(silenced, "div")
509
510     suspended = doc.find("h3", {"id": "suspended_servers"}).find_next("ul").findAll("li")
511     logger.info("Checking %d suspended entries ...", len(suspended))
512     blocklist["reject"] = utils.find_domains(suspended, "div")
513
514     blockdict = list()
515     for block_level in blocklist:
516         blockers = blocklist[block_level]
517
518         logger.debug("block_level='%s',blockers()=%d'", block_level, len(blockers))
519         for blocked in blockers:
520             logger.debug("blocked='%s'", blocked)
521
522             if not instances.is_registered(blocked):
523                 try:
524                     logger.info("Fetching instances from domain='%s' ...", blocked)
525                     federation.fetch_instances(blocked, 'chaos.social', None, inspect.currentframe().f_code.co_name)
526                 except network.exceptions as exception:
527                     logger.warning("Exception '%s' during fetching instances (fetch_cs) from blocked='%s'", type(exception), blocked)
528                     instances.set_last_error(blocked, exception)
529
530             if blocks.is_instance_blocked("todon.eu", blocked, block_level):
531                 logger.debug("blocked='%s',block_level='%s' is already blocked - SKIPPED!", blocked, block_level)
532                 continue
533
534             logger.info("Adding new block: blocked='%s',block_level='%s'", blocked, block_level)
535             if utils.process_block("todon.eu", blocked, None, block_level) and block_level == "reject" and config.get("bot_enabled"):
536                 logger.debug("Appending blocked='%s',reason='%s' for blocker='todon.eu' ...", blocked, block_level)
537                 blockdict.append({
538                     "blocked": blocked,
539                     "reason" : None,
540                 })
541
542         logger.debug("Invoking commit() ...")
543         database.connection.commit()
544
545         logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
546         if config.get("bot_enabled") and len(blockdict) > 0:
547             logger.info("Sending bot POST for blocker='todon.eu',blockdict()=%d ...", len(blockdict))
548             network.send_bot_post("todon.eu", blockdict)
549
550     logger.debug("Success! - EXIT!")
551     return 0
552
553 def fetch_cs(args: argparse.Namespace):
554     logger.debug("args[]='%s' - CALLED!", type(args))
555     extensions = [
556         "extra",
557         "abbr",
558         "attr_list",
559         "def_list",
560         "fenced_code",
561         "footnotes",
562         "md_in_html",
563         "admonition",
564         "codehilite",
565         "legacy_attrs",
566         "legacy_em",
567         "meta",
568         "nl2br",
569         "sane_lists",
570         "smarty",
571         "toc",
572         "wikilinks"
573     ]
574
575     domains = {
576         "silenced": list(),
577         "reject"  : list(),
578     }
579
580     raw = utils.fetch_url("https://raw.githubusercontent.com/chaossocial/meta/master/federation.md", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))).text
581     logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
582
583     doc = bs4.BeautifulSoup(markdown.markdown(raw, extensions=extensions), features="html.parser")
584     logger.debug("doc()=%d[]='%s'", len(doc), type(doc))
585
586     silenced = doc.find("h2", {"id": "silenced-instances"}).findNext("table").find("tbody")
587     logger.debug("silenced[%s]()=%d", type(silenced), len(silenced))
588     domains["silenced"] = federation.find_domains(silenced)
589
590     blocked = doc.find("h2", {"id": "blocked-instances"}).findNext("table").find("tbody")
591     logger.debug("blocked[%s]()=%d", type(blocked), len(blocked))
592     domains["reject"] = federation.find_domains(blocked)
593
594     logger.debug("domains[silenced]()=%d,domains[reject]()=%d", len(domains["silenced"]), len(domains["reject"]))
595     blockdict = list()
596     if len(domains) > 0:
597         locking.acquire()
598
599         for block_level in domains:
600             logger.info("block_level='%s' has %d row(s)", block_level, len(domains[block_level]))
601
602             for row in domains[block_level]:
603                 logger.debug("row[%s]='%s'", type(row), row)
604                 if instances.is_recent(row["domain"], "last_blocked"):
605                     logger.debug("row[domain]='%s' has been recently crawled - SKIPPED!", row["domain"])
606                     continue
607                 elif not instances.is_registered(row["domain"]):
608                     try:
609                         logger.info("Fetching instances from domain='%s' ...", row["domain"])
610                         federation.fetch_instances(row["domain"], 'chaos.social', None, inspect.currentframe().f_code.co_name)
611                     except network.exceptions as exception:
612                         logger.warning("Exception '%s' during fetching instances (fetch_cs) from row[domain]='%s'", type(exception), row["domain"])
613                         instances.set_last_error(row["domain"], exception)
614
615                 if utils.process_block("chaos.social", row["domain"], row["reason"], block_level) and block_level == "reject" and config.get("bot_enabled"):
616                     logger.debug("Appending blocked='%s',reason='%s' for blocker='chaos.social' ...", row["domain"], block_level)
617                     blockdict.append({
618                         "blocked": row["domain"],
619                         "reason" : row["reason"],
620                     })
621
622         logger.debug("Invoking commit() ...")
623         database.connection.commit()
624
625         logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
626         if config.get("bot_enabled") and len(blockdict) > 0:
627             logger.info("Sending bot POST for blocker='chaos.social',blockdict()=%d ...", len(blockdict))
628             network.send_bot_post("chaos.social", blockdict)
629
630     logger.debug("Success! - EXIT!")
631     return 0
632
633 def fetch_fba_rss(args: argparse.Namespace) -> int:
634     logger.debug("args[]='%s' - CALLED!", type(args))
635     domains = list()
636
637     logger.info("Fetch FBA-specific RSS args.feed='%s' ...", args.feed)
638     response = utils.fetch_url(args.feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
639
640     logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
641     if response.ok and response.status_code < 300 and len(response.text) > 0:
642         logger.debug("Parsing RSS feed (%d Bytes) ...", len(response.text))
643         rss = atoma.parse_rss_bytes(response.content)
644
645         logger.debug("rss[]='%s'", type(rss))
646         for item in rss.items:
647             logger.debug("item='%s'", item)
648             domain = tidyup.domain(item.link.split("=")[1])
649
650             logger.debug("domain='%s' - AFTER!", domain)
651             if domain == "":
652                 logger.debug("domain is empty - SKIPPED!")
653                 continue
654             elif not utils.is_domain_wanted(domain):
655                 logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
656                 continue
657             elif domain in domains:
658                 logger.debug("domain='%s' is already added - SKIPPED!", domain)
659                 continue
660             elif instances.is_registered(domain):
661                 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
662                 continue
663             elif instances.is_recent(domain):
664                 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
665                 continue
666
667             logger.debug("Adding domain='%s'", domain)
668             domains.append(domain)
669
670     logger.debug("domains()=%d", len(domains))
671     if len(domains) > 0:
672         locking.acquire()
673
674         logger.info("Adding %d new instances ...", len(domains))
675         for domain in domains:
676             try:
677                 logger.info("Fetching instances from domain='%s' ...", domain)
678                 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
679             except network.exceptions as exception:
680                 logger.warning("Exception '%s' during fetching instances (fetch_fba_rss) from domain='%s'", type(exception), domain)
681                 instances.set_last_error(domain, exception)
682                 return 100
683
684     logger.debug("Success! - EXIT!")
685     return 0
686
687 def fetch_fbabot_atom(args: argparse.Namespace) -> int:
688     logger.debug("args[]='%s' - CALLED!", type(args))
689     feed = "https://ryona.agency/users/fba/feed.atom"
690
691     domains = list()
692
693     logger.info("Fetching ATOM feed='%s' from FBA bot account ...", feed)
694     response = utils.fetch_url(feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
695
696     logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
697     if response.ok and response.status_code < 300 and len(response.text) > 0:
698         logger.debug("Parsing ATOM feed (%d Bytes) ...", len(response.text))
699         atom = atoma.parse_atom_bytes(response.content)
700
701         logger.debug("atom[]='%s'", type(atom))
702         for entry in atom.entries:
703             logger.debug("entry[]='%s'", type(entry))
704             doc = bs4.BeautifulSoup(entry.content.value, "html.parser")
705             logger.debug("doc[]='%s'", type(doc))
706             for element in doc.findAll("a"):
707                 logger.debug("element[]='%s'", type(element))
708                 for href in element["href"].split(","):
709                     logger.debug("href[%s]='%s' - BEFORE!", type(href), href)
710                     domain = tidyup.domain(href)
711
712                     logger.debug("domain='%s' - AFTER!", domain)
713                     if domain == "":
714                         logger.debug("domain is empty - SKIPPED!")
715                         continue
716                     elif not utils.is_domain_wanted(domain):
717                         logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
718                         continue
719                     elif domain in domains:
720                         logger.debug("domain='%s' is already added - SKIPPED!", domain)
721                         continue
722                     elif instances.is_registered(domain):
723                         logger.debug("domain='%s' is already registered - SKIPPED!", domain)
724                         continue
725                     elif instances.is_recent(domain):
726                         logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
727                         continue
728
729                     logger.debug("Adding domain='%s',domains()=%d", domain, len(domains))
730                     domains.append(domain)
731
732     logger.debug("domains()=%d", len(domains))
733     if len(domains) > 0:
734         locking.acquire()
735
736         logger.info("Adding %d new instances ...", len(domains))
737         for domain in domains:
738             try:
739                 logger.info("Fetching instances from domain='%s' ...", domain)
740                 federation.fetch_instances(domain, "ryona.agency", None, inspect.currentframe().f_code.co_name)
741             except network.exceptions as exception:
742                 logger.warning("Exception '%s' during fetching instances (fetch_fbabot_atom) from domain='%s'", type(exception), domain)
743                 instances.set_last_error(domain, exception)
744                 return 100
745
746     logger.debug("Success! - EXIT!")
747     return 0
748
749 def fetch_instances(args: argparse.Namespace) -> int:
750     logger.debug("args[]='%s' - CALLED!", type(args))
751     locking.acquire()
752
753     # Initial fetch
754     try:
755         logger.info("Fetching instances from args.domain='%s' ...", args.domain)
756         federation.fetch_instances(args.domain, None, None, inspect.currentframe().f_code.co_name)
757     except network.exceptions as exception:
758         logger.warning("Exception '%s' during fetching instances (fetch_instances) from args.domain='%s'", type(exception), args.domain)
759         instances.set_last_error(args.domain, exception)
760         instances.update_data(args.domain)
761         return 100
762
763     if args.single:
764         logger.debug("Not fetching more instances - EXIT!")
765         return 0
766
767     # Loop through some instances
768     database.cursor.execute(
769         "SELECT domain, origin, software, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'lemmy', 'peertube', 'takahe') AND (last_instance_fetch IS NULL OR last_instance_fetch < ?) ORDER BY rowid DESC", [time.time() - config.get("recheck_instance")]
770     )
771
772     rows = database.cursor.fetchall()
773     logger.info("Checking %d entries ...", len(rows))
774     for row in rows:
775         logger.debug("row[domain]='%s'", row["domain"])
776         if row["domain"] == "":
777             logger.debug("row[domain] is empty - SKIPPED!")
778             continue
779         elif not utils.is_domain_wanted(row["domain"]):
780             logger.warning("Domain row[domain]='%s' is not wanted - SKIPPED!", row["domain"])
781             continue
782
783         try:
784             logger.info("Fetching instances for domain='%s',origin='%s',software='%s',nodeinfo_url='%s'", row["domain"], row["origin"], row["software"], row["nodeinfo_url"])
785             federation.fetch_instances(row["domain"], row["origin"], row["software"], inspect.currentframe().f_code.co_name, row["nodeinfo_url"])
786         except network.exceptions as exception:
787             logger.warning("Exception '%s' during fetching instances (fetch_instances) from row[domain]='%s'", type(exception), row["domain"])
788             instances.set_last_error(row["domain"], exception)
789
790     logger.debug("Success - EXIT!")
791     return 0
792
793 def fetch_oliphant(args: argparse.Namespace) -> int:
794     logger.debug("args[]='%s' - CALLED!", type(args))
795     locking.acquire()
796
797     # Base URL
798     base_url = "https://codeberg.org/oliphant/blocklists/raw/branch/main/blocklists"
799
800     # URLs to fetch
801     blocklists = (
802         {
803             "blocker": "artisan.chat",
804             "csv_url": "mastodon/artisan.chat.csv",
805         },{
806             "blocker": "mastodon.art",
807             "csv_url": "mastodon/mastodon.art.csv",
808         },{
809             "blocker": "pleroma.envs.net",
810             "csv_url": "mastodon/pleroma.envs.net.csv",
811         },{
812             "blocker": "oliphant.social",
813             "csv_url": "mastodon/_unified_tier3_blocklist.csv",
814         },{
815             "blocker": "mastodon.online",
816             "csv_url": "mastodon/mastodon.online.csv",
817         },{
818             "blocker": "mastodon.social",
819             "csv_url": "mastodon/mastodon.social.csv",
820         },{
821             "blocker": "mastodon.social",
822             "csv_url": "other/missing-tier0-mastodon.social.csv",
823         },{
824             "blocker": "rage.love",
825             "csv_url": "mastodon/rage.love.csv",
826         },{
827             "blocker": "sunny.garden",
828             "csv_url": "mastodon/sunny.garden.csv",
829         },{
830             "blocker": "solarpunk.moe",
831             "csv_url": "mastodon/solarpunk.moe.csv",
832         },{
833             "blocker": "toot.wales",
834             "csv_url": "mastodon/toot.wales.csv",
835         },{
836             "blocker": "union.place",
837             "csv_url": "mastodon/union.place.csv",
838         }
839     )
840
841     domains = list()
842
843     logger.debug("Downloading %d files ...", len(blocklists))
844     for block in blocklists:
845         # Is domain given and not equal blocker?
846         if isinstance(args.domain, str) and args.domain != block["blocker"]:
847             logger.debug("Skipping blocker='%s', not matching args.domain='%s'", block["blocker"], args.domain)
848             continue
849         elif args.domain in domains:
850             logger.debug("args.domain='%s' already handled - SKIPPED!", args.domain)
851             continue
852         elif instances.is_recent(block["blocker"]):
853             logger.debug("block[blocker]='%s' has been recently crawled - SKIPPED!", block["blocker"])
854             continue
855
856         # Fetch this URL
857         logger.info("Fetching csv_url='%s' for blocker='%s' ...", block["csv_url"], block["blocker"])
858         response = utils.fetch_url(f"{base_url}/{block['csv_url']}", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
859
860         logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
861         if not response.ok or response.status_code > 399 or response.content == "":
862             logger.warning("Could not fetch csv_url='%s' for blocker='%s' - SKIPPED!", block["csv_url"], block["blocker"])
863             continue
864
865         logger.debug("Fetched %d Bytes, parsing CSV ...", len(response.content))
866         reader = csv.DictReader(response.content.decode('utf-8').splitlines(), dialect="unix")
867
868         logger.debug("reader[]='%s'", type(reader))
869         blockdict = list()
870         for row in reader:
871             logger.debug("row[%s]='%s'", type(row), row)
872             domain = severity = None
873             reject_media = reject_reports = False
874             if "#domain" in row:
875                 domain = row["#domain"]
876             elif "domain" in row:
877                 domain = row["domain"]
878             else:
879                 logger.debug("row='%s' does not contain domain column", row)
880                 continue
881
882             if "#severity" in row:
883                 severity = row["#severity"]
884             elif "severity" in row:
885                 severity = row["severity"]
886             else:
887                 logger.debug("row='%s' does not contain severity column", row)
888                 continue
889
890             if "#reject_media" in row and row["#reject_media"].lower() == "true":
891                 reject_media = True
892             elif "reject_media" in row and row["reject_media"].lower() == "true":
893                 reject_media = True
894
895             if "#reject_reports" in row and row["#reject_reports"].lower() == "true":
896                 reject_reports = True
897             elif "reject_reports" in row and row["reject_reports"].lower() == "true":
898                 reject_reports = True
899
900             logger.debug("domain='%s',severity='%s',reject_media='%s',reject_reports='%s'", domain, severity, reject_media, reject_reports)
901             if domain == "":
902                 logger.debug("domain is empty - SKIPPED!")
903                 continue
904             elif not utils.is_domain_wanted(domain):
905                 logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
906                 continue
907
908             logger.debug("Marking domain='%s' as handled", domain)
909             domains.append(domain)
910
911             logger.debug("Processing domain='%s' ...", domain)
912             processed = utils.process_domain(domain, block["blocker"], inspect.currentframe().f_code.co_name)
913             logger.debug("processed='%s'", processed)
914
915             if utils.process_block(block["blocker"], domain, None, "reject") and config.get("bot_enabled"):
916                 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", domain, block["block_level"], block["blocker"])
917                 blockdict.append({
918                     "blocked": domain,
919                     "reason" : block["reason"],
920                 })
921
922             if reject_media:
923                 utils.process_block(block["blocker"], domain, None, "reject_media")
924             if reject_reports:
925                 utils.process_block(block["blocker"], domain, None, "reject_reports")
926
927         logger.debug("Invoking commit() ...")
928         database.connection.commit()
929
930         logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
931         if config.get("bot_enabled") and len(blockdict) > 0:
932             logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", block["blocker"], len(blockdict))
933             network.send_bot_post(block["blocker"], blockdict)
934
935     logger.debug("Success! - EXIT!")
936     return 0
937
938 def fetch_txt(args: argparse.Namespace) -> int:
939     logger.debug("args[]='%s' - CALLED!", type(args))
940     locking.acquire()
941
942     # Static URLs
943     urls = ({
944         "blocker": "seirdy.one",
945         "url"    : "https://seirdy.one/pb/bsl.txt",
946     },)
947
948     logger.info("Checking %d text file(s) ...", len(urls))
949     for row in urls:
950         logger.debug("Fetching row[url]='%s' ...", row["url"])
951         response = utils.fetch_url(row["url"], network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
952
953         logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
954         if response.ok and response.status_code < 300 and response.text != "":
955             logger.debug("Returned %d Bytes for processing", len(response.text.strip()))
956             domains = response.text.split("\n")
957
958             logger.info("Processing %d domains ...", len(domains))
959             for domain in domains:
960                 logger.debug("domain='%s' - BEFORE!", domain)
961                 domain = tidyup.domain(domain)
962
963                 logger.debug("domain='%s' - AFTER!", domain)
964                 if domain == "":
965                     logger.debug("domain is empty - SKIPPED!")
966                     continue
967                 elif not utils.is_domain_wanted(domain):
968                     logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
969                     continue
970                 elif instances.is_recent(domain):
971                     logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
972                     continue
973
974                 logger.debug("Processing domain='%s',row[blocker]='%s'", domain, row["blocker"])
975                 processed = utils.process_domain(domain, row["blocker"], inspect.currentframe().f_code.co_name)
976
977                 logger.debug("processed='%s'", processed)
978                 if not processed:
979                     logger.debug("domain='%s' was not generically processed - SKIPPED!", domain)
980                     continue
981
982     logger.debug("Success! - EXIT!")
983     return 0
984
985 def fetch_fedipact(args: argparse.Namespace) -> int:
986     logger.debug("args[]='%s' - CALLED!", type(args))
987     locking.acquire()
988
989     response = utils.fetch_url("https://fedipact.online", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
990
991     logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
992     if response.ok and response.status_code < 300 and response.text != "":
993         logger.debug("Parsing %d Bytes ...", len(response.text))
994
995         doc = bs4.BeautifulSoup(response.text, "html.parser")
996         logger.debug("doc[]='%s'", type(doc))
997
998         rows = doc.findAll("li")
999         logger.info("Checking %d row(s) ...", len(rows))
1000         for row in rows:
1001             logger.debug("row[]='%s'", type(row))
1002             domain = tidyup.domain(row.contents[0])
1003
1004             logger.debug("domain='%s' - AFTER!", domain)
1005             if domain == "":
1006                 logger.debug("domain is empty - SKIPPED!")
1007                 continue
1008             elif not utils.is_domain_wanted(domain):
1009                 logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
1010                 continue
1011             elif instances.is_registered(domain):
1012                 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
1013                 continue
1014             elif instances.is_recent(domain):
1015                 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1016                 continue
1017
1018             logger.info("Fetching domain='%s' ...", domain)
1019             federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
1020
1021     logger.debug("Success! - EXIT!")
1022     return 0
1023
1024 def fetch_joinfediverse(args: argparse.Namespace) -> int:
1025     logger.debug("args[]='%s' - CALLED!", type(args))
1026     locking.acquire()
1027
1028     raw = utils.fetch_url("https://joinfediverse.wiki/FediBlock", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))).text
1029     logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
1030
1031     doc = bs4.BeautifulSoup(raw, "html.parser")
1032     logger.debug("doc[]='%s'", type(doc))
1033
1034     tables = doc.findAll("table", {"class": "wikitable"})
1035
1036     logger.info("Analyzing %d table(s) ...", len(tables))
1037     blocklist = list()
1038     for table in tables:
1039         logger.debug("table[]='%s'", type(table))
1040
1041         rows = table.findAll("tr")
1042         logger.info("Checking %d row(s) ...", len(rows))
1043         block_headers = dict()
1044         for row in rows:
1045             logger.debug("row[%s]='%s'", type(row), row)
1046
1047             headers = row.findAll("th")
1048             logger.debug("Found headers()=%d header(s)", len(headers))
1049             if len(headers) > 1:
1050                 block_headers = dict()
1051                 cnt = 0
1052                 for header in headers:
1053                     cnt = cnt + 1
1054                     logger.debug("header[]='%s',cnt=%d", type(header), cnt)
1055                     text = header.contents[0]
1056
1057                     logger.debug("text[]='%s'", type(text))
1058                     if not isinstance(text, str):
1059                         logger.debug("text[]='%s' is not 'str' - SKIPPED!", type(text))
1060                         continue
1061                     elif validators.domain(text.strip()):
1062                         logger.debug("text='%s' is a domain - SKIPPED!", text.strip())
1063                         continue
1064
1065                     text = tidyup.domain(text.strip())
1066                     logger.debug("text='%s'", text)
1067                     if text in ["domain", "instance", "subdomain(s)", "block reason(s)"]:
1068                         logger.debug("Found header: '%s'=%d", text, cnt)
1069                         block_headers[cnt] = text
1070
1071             elif len(block_headers) == 0:
1072                 logger.debug("row is not scrapable - SKIPPED!")
1073                 continue
1074             elif len(block_headers) > 0:
1075                 logger.debug("Found a row with %d scrapable headers ...", len(block_headers))
1076                 cnt = 0
1077                 block = dict()
1078
1079                 for element in row.find_all(["th", "td"]):
1080                     cnt = cnt + 1
1081                     logger.debug("element[]='%s',cnt=%d", type(element), cnt)
1082                     if cnt in block_headers:
1083                         logger.debug("block_headers[%d]='%s'", cnt, block_headers[cnt])
1084
1085                         text = element.text.strip()
1086                         key = block_headers[cnt] if block_headers[cnt] not in ["domain", "instance"] else "blocked"
1087
1088                         logger.debug("cnt=%d is wanted: key='%s',text[%s]='%s'", cnt, key, type(text), text)
1089                         if key in ["domain", "instance"]:
1090                             block[key] = text
1091                         elif key == "reason":
1092                             block[key] = tidyup.reason(text)
1093                         elif key == "subdomain(s)":
1094                             block[key] = list()
1095                             if text != "":
1096                                 block[key] = text.split("/")
1097                         else:
1098                             logger.debug("key='%s'", key)
1099                             block[key] = text
1100
1101                 logger.debug("block()=%d ...", len(block))
1102                 if len(block) > 0:
1103                     logger.debug("Appending block()=%d ...", len(block))
1104                     blocklist.append(block)
1105
1106     logger.debug("blocklist()=%d", len(blocklist))
1107
1108     database.cursor.execute("SELECT domain FROM instances WHERE domain LIKE 'climatejustice.%'")
1109     domains = database.cursor.fetchall()
1110
1111     logger.debug("domains(%d)[]='%s'", len(domains), type(domains))
1112     blocking = list()
1113     for block in blocklist:
1114         logger.debug("block='%s'", block)
1115         if "subdomain(s)" in block and len(block["subdomain(s)"]) > 0:
1116             origin = block["blocked"]
1117             for subdomain in block["subdomain(s)"]:
1118                 block["blocked"] = subdomain + "." + origin
1119                 blocking.append(block)
1120         else:
1121             blocking.append(block)
1122
1123     logger.debug("blocking()=%d", blocking)
1124     for block in blocking:
1125         logger.debug("block[]='%s'", type(block))
1126         block["blocked"] = tidyup.domain(block["blocked"])
1127
1128         logger.debug("block[blocked]='%s' - AFTER!", block["blocked"])
1129         if block["blocked"] == "":
1130             logger.debug("block[blocked] is empty - SKIPPED!")
1131             continue
1132         elif not utils.is_domain_wanted(block["blocked"]):
1133             logger.warning("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
1134             continue
1135         elif instances.is_recent(block["blocked"]):
1136             logger.debug("blocked='%s' has been recently checked - SKIPPED!", block["blocked"])
1137             continue
1138
1139         logger.info("Proccessing blocked='%s' ...", block["blocked"])
1140         utils.process_domain(block["blocked"], "climatejustice.social", inspect.currentframe().f_code.co_name)
1141
1142     blockdict = list()
1143     for blocker in domains:
1144         blocker = blocker[0]
1145         logger.debug("blocker[%s]='%s'", type(blocker), blocker)
1146
1147         for block in blocking:
1148             logger.debug("block[blocked]='%s',block[reason]='%s' - BEFORE!", block["blocked"], block["reason"])
1149             block["reason"] = tidyup.reason(block["block reason(s)"]) if "block reason(s)" in block else None
1150
1151             logger.debug("block[blocked]='%s',block[reason]='%s' - AFTER!", block["blocked"], block["reason"])
1152             if block["blocked"] == "":
1153                 logger.debug("block[blocked] is empty - SKIPPED!")
1154                 continue
1155             elif not utils.is_domain_wanted(block["blocked"]):
1156                 logger.warning("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
1157                 continue
1158
1159             logger.debug("blocked='%s',reason='%s'", block["blocked"], block["reason"])
1160             if utils.process_block(blocker, block["blocked"], block["reason"], "reject") and config.get("bot_enabled"):
1161                 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], blocker)
1162                 blockdict.append({
1163                     "blocked": block["blocked"],
1164                     "reason" : block["reason"],
1165                 })
1166
1167         if instances.has_pending(blocker):
1168             logger.debug("Flushing updates for blocker='%s' ...", blocker)
1169             instances.update_data(blocker)
1170
1171         logger.debug("Invoking commit() ...")
1172         database.connection.commit()
1173
1174         logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
1175         if config.get("bot_enabled") and len(blockdict) > 0:
1176             logger.info("Sending bot POST for blocker='%s,blockdict()=%d ...", blocker, len(blockdict))
1177             network.send_bot_post(blocker, blockdict)
1178
1179     logger.debug("Success! - EXIT!")
1180     return 0
1181
1182 def recheck_obfuscation(args: argparse.Namespace) -> int:
1183     logger.debug("args[]='%s' - CALLED!", type(args))
1184
1185     locking.acquire()
1186
1187     if isinstance(args.domain, str) and args.domain != "" and utils.is_domain_wanted(args.domain):
1188         database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1 AND domain = ?", [args.domain])
1189     elif isinstance(args.software, str) and args.software != "" and validators.domain(args.software) == args.software:
1190         database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1 AND software = ?", [args.software])
1191     else:
1192         database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1")
1193
1194     rows = database.cursor.fetchall()
1195     logger.info("Checking %d domains ...", len(rows))
1196     for row in rows:
1197         logger.debug("Fetching peers from domain='%s',software='%s',nodeinfo_url='%s' ...", row["domain"], row["software"], row["nodeinfo_url"])
1198         if (args.all is None or not args.all) and instances.is_recent(row["domain"]) and args.domain is None and args.software is None:
1199             logger.debug("row[domain]='%s' has been recently checked, args.all[]='%s' - SKIPPED!", row["domain"], type(args.all))
1200             continue
1201
1202         blocking = list()
1203         if row["software"] == "pleroma":
1204             logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1205             blocking = pleroma.fetch_blocks(row["domain"], row["nodeinfo_url"])
1206         elif row["software"] == "mastodon":
1207             logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1208             blocking = mastodon.fetch_blocks(row["domain"], row["nodeinfo_url"])
1209         elif row["software"] == "lemmy":
1210             logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1211             blocking = lemmy.fetch_blocks(row["domain"], row["nodeinfo_url"])
1212         elif row["software"] == "friendica":
1213             logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1214             blocking = friendica.fetch_blocks(row["domain"])
1215         elif row["software"] == "misskey":
1216             logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1217             blocking = misskey.fetch_blocks(row["domain"])
1218         else:
1219             logger.warning("Unknown sofware: domain='%s',software='%s'", row["domain"], row["software"])
1220
1221         logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", row["domain"], len(blocking))
1222         instances.set_total_blocks(row["domain"], blocking)
1223
1224         logger.info("Checking %d block(s) from domain='%s' ...", len(blocking), row["domain"])
1225         obfuscated = 0
1226         blockdict = list()
1227         for block in blocking:
1228             logger.debug("block[blocked]='%s'", block["blocked"])
1229             blocked = None
1230
1231             if block["blocked"] == "":
1232                 logger.debug("block[blocked] is empty - SKIPPED!")
1233                 continue
1234             elif block["blocked"].endswith(".arpa"):
1235                 logger.debug("blocked='%s' is a reversed IP address - SKIPPED!", block["blocked"])
1236                 continue
1237             elif block["blocked"].endswith(".tld"):
1238                 logger.debug("blocked='%s' is a fake domain name - SKIPPED!", block["blocked"])
1239                 continue
1240             elif block["blocked"].endswith(".onion"):
1241                 logger.debug("blocked='%s' is a TOR onion domain name - SKIPPED!", block["blocked"])
1242                 continue
1243             elif block["blocked"].find("*") >= 0 or block["blocked"].find("?") >= 0:
1244                 logger.debug("block='%s' is obfuscated.", block["blocked"])
1245                 obfuscated = obfuscated + 1
1246                 blocked = utils.deobfuscate_domain(block["blocked"], row["domain"], block["hash"] if "hash" in block else None)
1247             elif not utils.is_domain_wanted(block["blocked"]):
1248                 logger.warning("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
1249                 continue
1250             elif blocks.is_instance_blocked(row["domain"], block["blocked"]):
1251                 logger.debug("blocked='%s' is already blocked - SKIPPED!", block["blocked"])
1252                 continue
1253
1254             logger.debug("blocked[%s]='%s',block[blocked]='%s'", type(blocked), blocked, block["blocked"])
1255             if blocked is not None and blocked != block["blocked"]:
1256                 logger.debug("blocked='%s' was deobfuscated to blocked='%s'", block["blocked"], blocked)
1257                 obfuscated = obfuscated - 1
1258                 if blocks.is_instance_blocked(row["domain"], blocked):
1259                     logger.debug("blocked='%s' is already blocked by domain='%s' - SKIPPED!", blocked, row["domain"])
1260                     continue
1261
1262                 block["block_level"] = utils.alias_block_level(block["block_level"])
1263
1264                 logger.info("blocked='%s' has been deobfuscated to blocked='%s', adding ...", block["blocked"], blocked)
1265                 if utils.process_block(row["domain"], blocked, block["reason"], block["block_level"]) and block["block_level"] == "reject" and config.get("bot_enabled"):
1266                     logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], row["domain"])
1267                     blockdict.append({
1268                         "blocked": blocked,
1269                         "reason" : block["reason"],
1270                     })
1271
1272         logger.info("domain='%s' has %d obfuscated domain(s)", row["domain"], obfuscated)
1273         if obfuscated == 0 and len(blocking) > 0:
1274             logger.info("Block list from domain='%s' has been fully deobfuscated.", row["domain"])
1275             instances.set_has_obfuscation(row["domain"], False)
1276
1277         if instances.has_pending(row["domain"]):
1278             logger.debug("Flushing updates for blocker='%s' ...", row["domain"])
1279             instances.update_data(row["domain"])
1280
1281         logger.debug("Invoking commit() ...")
1282         database.connection.commit()
1283
1284         logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
1285         if config.get("bot_enabled") and len(blockdict) > 0:
1286             logger.info("Sending bot POST for blocker='%s,blockdict()=%d ...", row["domain"], len(blockdict))
1287             network.send_bot_post(row["domain"], blockdict)
1288
1289     logger.debug("Success! - EXIT!")
1290     return 0
1291
1292 def fetch_fedilist(args: argparse.Namespace) -> int:
1293     logger.debug("args[]='%s' - CALLED!", type(args))
1294
1295     url = "http://demo.fedilist.com/instance/csv?onion=not"
1296     if args.software is not None and args.software != "":
1297         logger.debug("args.software='%s'", args.software)
1298         url = f"http://demo.fedilist.com/instance/csv?software={args.software}&onion=not"
1299
1300     locking.acquire()
1301
1302     logger.info("Fetching url='%s' from fedilist.com ...", url)
1303     response = reqto.get(
1304         url,
1305         headers=network.web_headers,
1306         timeout=(config.get("connection_timeout"), config.get("read_timeout")),
1307         allow_redirects=False
1308     )
1309
1310     logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
1311     reader = csv.DictReader(response.content.decode('utf-8').splitlines(), dialect="unix")
1312
1313     logger.debug("reader[]='%s'", type(reader))
1314     blockdict = list()
1315     for row in reader:
1316         logger.debug("row[]='%s'", type(row))
1317         domain = tidyup.domain(row["hostname"])
1318         logger.debug("domain='%s' - AFTER!", domain)
1319
1320         if domain == "":
1321             logger.debug("domain is empty after tidyup: row[hostname]='%s' - SKIPPED!", row["hostname"])
1322             continue
1323         elif not utils.is_domain_wanted(domain):
1324             logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
1325             continue
1326         elif (args.all is None or not args.all) and instances.is_registered(domain):
1327             logger.debug("domain='%s' is already registered, --all not specified: args.all[]='%s'", type(args.all))
1328             continue
1329         elif instances.is_recent(domain):
1330             logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1331             continue
1332
1333         logger.info("Fetching instances from domain='%s' ...", domain)
1334         federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
1335
1336     logger.debug("Success! - EXIT!")
1337     return 0