]> git.mxchange.org Git - fba.git/blob - fba/commands.py
Continued:
[fba.git] / fba / commands.py
1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
3 #
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
8 #
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 # GNU Affero General Public License for more details.
13 #
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program.  If not, see <https://www.gnu.org/licenses/>.
16
17 import csv
18 import inspect
19 import json
20 import logging
21 import time
22
23 from urllib.parse import urlparse
24
25 import argparse
26 import atoma
27 import bs4
28 import markdown
29 import reqto
30 import validators
31
32 from fba import database
33 from fba import utils
34
35 from fba.helpers import blacklist
36 from fba.helpers import blocklists
37 from fba.helpers import config
38 from fba.helpers import cookies
39 from fba.helpers import dicts as dict_helper
40 from fba.helpers import domain as domain_helper
41 from fba.helpers import locking
42 from fba.helpers import processing
43 from fba.helpers import software as software_helper
44 from fba.helpers import tidyup
45
46 from fba.http import csrf
47 from fba.http import federation
48 from fba.http import network
49
50 from fba.models import blocks
51 from fba.models import instances
52 from fba.models import sources
53
54 from fba.networks import friendica
55 from fba.networks import lemmy
56 from fba.networks import mastodon
57 from fba.networks import misskey
58 from fba.networks import pleroma
59
60 logging.basicConfig(level=logging.INFO)
61 logger = logging.getLogger(__name__)
62 #logger.setLevel(logging.DEBUG)
63
64 def check_instance(args: argparse.Namespace) -> int:
65     logger.debug("args.domain='%s' - CALLED!", args.domain)
66
67     status = 0
68     if not validators.domain(args.domain):
69         logger.warning("args.domain='%s' is not valid", args.domain)
70         status = 100
71     elif blacklist.is_blacklisted(args.domain):
72         logger.warning("args.domain='%s' is blacklisted", args.domain)
73         status = 101
74     elif instances.is_registered(args.domain):
75         logger.warning("args.domain='%s' is already registered", args.domain)
76         status = 102
77     else:
78         logger.info("args.domain='%s' is not known", args.domain)
79
80     logger.debug("status=%d - EXIT!", status)
81     return status
82
83 def check_nodeinfo(args: argparse.Namespace) -> int:
84     logger.debug("args[]='%s' - CALLED!", type(args))
85
86     # Fetch rows
87     database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE nodeinfo_url IS NOT NULL ORDER BY domain ASC")
88
89     cnt = 0
90     for row in database.cursor.fetchall():
91         logger.debug("Checking row[domain]='%s',row[software]='%s',row[nodeinfo_url]='%s' ...", row["domain"], row["software"], row["nodeinfo_url"])
92         punycode = row["domain"].encode("idna").decode("utf-8")
93
94         if row["nodeinfo_url"].startswith("/"):
95             logger.debug("row[nodeinfo_url]='%s' is a relative URL and always matches", row["nodeinfo_url"])
96             continue
97         elif row["nodeinfo_url"].find(punycode) == -1 and row["nodeinfo_url"].find(row["domain"]) == -1:
98             logger.warning("punycode='%s' is not found in row[nodeinfo_url]='%s',row[software]='%s'", punycode, row["nodeinfo_url"], row["software"])
99             cnt = cnt + 1
100
101     logger.info("Found %d row(s)", cnt)
102
103     logger.debug("EXIT!")
104     return 0
105
106 def fetch_pixelfed_api(args: argparse.Namespace) -> int:
107     logger.debug("args[]='%s' - CALLED!", type(args))
108
109     # No CSRF by default, you don't have to add network.source_headers by yourself here
110     headers = tuple()
111     source_domain = "pixelfed.org"
112
113     if sources.is_recent(source_domain):
114         logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
115         return 0
116     else:
117         logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
118         sources.update(source_domain)
119
120     try:
121         logger.debug("Checking CSRF from source_domain='%s' ...", source_domain)
122         headers = csrf.determine(source_domain, dict())
123     except network.exceptions as exception:
124         logger.warning("Exception '%s' during checking CSRF (fetch_peers,%s) - EXIT!", type(exception), __name__)
125         return list()
126
127     try:
128         logger.info("Fetching JSON from pixelfed.org API, headers()=%d ...", len(headers))
129         fetched = network.get_json_api(
130             source_domain,
131             "/api/v1/servers/all.json?scope=All&country=all&language=all",
132             headers,
133             (config.get("connection_timeout"), config.get("read_timeout"))
134         )
135
136         logger.debug("JSON API returned %d elements", len(fetched))
137         if "error_message" in fetched:
138             logger.warning("API returned error_message='%s' - EXIT!", fetched["error_message"])
139             return 101
140         elif "data" not in fetched["json"]:
141             logger.warning("API did not return JSON with 'data' element - EXIT!")
142             return 102
143
144         rows = fetched["json"]["data"]
145         logger.info("Checking %d fetched rows ...", len(rows))
146         for row in rows:
147             logger.debug("row[]='%s'", type(row))
148             if "domain" not in row:
149                 logger.warning("row='%s' does not contain element 'domain' - SKIPPED!", row)
150                 continue
151             elif row["domain"] == "":
152                 logger.debug("row[domain] is empty - SKIPPED!")
153                 continue
154
155             logger.debug("row[domain]='%s' - BEFORE!", row["domain"])
156             domain = row["domain"].encode("idna").decode("utf-8")
157             logger.debug("domain='%s' - AFTER!", domain)
158
159             if not domain_helper.is_wanted(domain):
160                 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
161                 continue
162             elif instances.is_registered(domain):
163                 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
164                 continue
165             elif instances.is_recent(domain):
166                 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
167                 continue
168
169             logger.debug("Fetching instances from domain='%s' ...", domain)
170             federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
171
172     except network.exceptions as exception:
173         logger.warning("Cannot fetch graphql,exception[%s]:'%s' - EXIT!", type(exception), str(exception))
174         return 103
175
176     logger.debug("Success! - EXIT!")
177     return 0
178
179 def fetch_bkali(args: argparse.Namespace) -> int:
180     logger.debug("args[]='%s' - CALLED!", type(args))
181
182     logger.debug("Invoking locking.acquire() ...")
183     locking.acquire()
184
185     source_domain = "gql.api.bka.li"
186     if sources.is_recent(source_domain):
187         logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
188         return 0
189     else:
190         logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
191         sources.update(source_domain)
192
193     domains = list()
194     try:
195         logger.info("Fetching domainlist from source_domain='%s' ...", source_domain)
196         fetched = network.post_json_api(
197             source_domain,
198             "/v1/graphql",
199             json.dumps({
200                 "query": "query domainlist {nodeinfo(order_by: {domain: asc}) {domain}}"
201             })
202         )
203
204         logger.debug("fetched[]='%s'", type(fetched))
205         if "error_message" in fetched:
206             logger.warning("post_json_api() for 'gql.sources.bka.li' returned error message='%s", fetched["error_message"])
207             return 100
208         elif isinstance(fetched["json"], dict) and "error" in fetched["json"] and "message" in fetched["json"]["error"]:
209             logger.warning("post_json_api() returned error: '%s", fetched["error"]["message"])
210             return 101
211
212         rows = fetched["json"]
213
214         logger.debug("rows(%d)[]='%s'", len(rows), type(rows))
215         if len(rows) == 0:
216             raise Exception("WARNING: Returned no records")
217         elif "data" not in rows:
218             raise Exception(f"WARNING: rows()={len(rows)} does not contain key 'data'")
219         elif "nodeinfo" not in rows["data"]:
220             raise Exception(f"WARNING: rows()={len(rows['data'])} does not contain key 'nodeinfo'")
221
222         for entry in rows["data"]["nodeinfo"]:
223             logger.debug("entry[%s]='%s'", type(entry), entry)
224             if "domain" not in entry:
225                 logger.warning("entry()=%d does not contain 'domain' - SKIPPED!", len(entry))
226                 continue
227             elif entry["domain"] == "":
228                 logger.debug("entry[domain] is empty - SKIPPED!")
229                 continue
230             elif not domain_helper.is_wanted(entry["domain"]):
231                 logger.debug("entry[domain]='%s' is not wanted - SKIPPED!", entry["domain"])
232                 continue
233             elif instances.is_registered(entry["domain"]):
234                 logger.debug("entry[domain]='%s' is already registered - SKIPPED!", entry["domain"])
235                 continue
236             elif instances.is_recent(entry["domain"]):
237                 logger.debug("entry[domain]='%s' has been recently crawled - SKIPPED!", entry["domain"])
238                 continue
239
240             logger.debug("Adding domain='%s' ...", entry["domain"])
241             domains.append(entry["domain"])
242
243     except network.exceptions as exception:
244         logger.warning("Cannot fetch graphql,exception[%s]:'%s' - EXIT!", type(exception), str(exception))
245         return 102
246
247     logger.debug("domains()=%d", len(domains))
248     if len(domains) > 0:
249         logger.info("Adding %d new instances ...", len(domains))
250         for domain in domains:
251             logger.debug("domain='%s' - BEFORE!", domain)
252             domain = domain.encode("idna").decode("utf-8")
253             logger.debug("domain='%s' - AFTER!", domain)
254
255             try:
256                 logger.info("Fetching instances from domain='%s' ...", domain)
257                 federation.fetch_instances(domain, 'tak.teleyal.blog', None, inspect.currentframe().f_code.co_name)
258             except network.exceptions as exception:
259                 logger.warning("Exception '%s' during fetching instances (fetch_bkali) from domain='%s'", type(exception), domain)
260                 instances.set_last_error(domain, exception)
261                 return 100
262
263     logger.debug("Success - EXIT!")
264     return 0
265
266 def fetch_blocks(args: argparse.Namespace) -> int:
267     logger.debug("args[]='%s' - CALLED!", type(args))
268     if args.domain is not None and args.domain != "":
269         logger.debug("args.domain='%s' - checking ...", args.domain)
270         if not validators.domain(args.domain):
271             logger.warning("args.domain='%s' is not valid.", args.domain)
272             return 100
273         elif blacklist.is_blacklisted(args.domain):
274             logger.warning("args.domain='%s' is blacklisted, won't check it!", args.domain)
275             return 101
276         elif not instances.is_registered(args.domain):
277             logger.warning("args.domain='%s' is not registered, please run ./utils.py fetch_instances '%s' first.", args.domain, args.domain)
278             return 102
279
280     logger.debug("Invoking locking.acquire() ...")
281     locking.acquire()
282
283     if args.domain is not None and args.domain != "":
284         # Re-check single domain
285         logger.debug("Querying database for args.domain='%s' ...", args.domain)
286         database.cursor.execute(
287             "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE domain = ? LIMIT 1", [args.domain]
288         )
289     elif args.software is not None and args.software != "":
290         # Re-check single software
291         logger.debug("Querying database for args.software='%s' ...", args.software)
292         database.cursor.execute(
293             "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software = ? AND nodeinfo_url IS NOT NULL ORDER BY total_blocks DESC, last_response_time ASC, last_updated ASC", [args.software]
294         )
295     elif args.force:
296         # Re-check all
297         logger.debug("Re-checking all instances ...")
298         database.cursor.execute(
299             "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'lemmy', 'friendica', 'misskey') AND nodeinfo_url IS NOT NULL ORDER BY total_blocks DESC, last_response_time ASC, last_updated ASC"
300         )
301     else:
302         # Re-check after "timeout" (aka. minimum interval)
303         database.cursor.execute(
304             "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'lemmy', 'friendica', 'misskey') AND (last_blocked IS NULL OR last_blocked < ?) AND nodeinfo_url IS NOT NULL ORDER BY total_blocks DESC, last_response_time ASC, last_updated ASC", [time.time() - config.get("recheck_block")]
305         )
306
307     rows = database.cursor.fetchall()
308     logger.info("Checking %d entries ...", len(rows))
309     for blocker, software, origin, nodeinfo_url in rows:
310         logger.debug("blocker='%s',software='%s',origin='%s',nodeinfo_url='%s'", blocker, software, origin, nodeinfo_url)
311
312         if not domain_helper.is_wanted(blocker):
313             logger.warning("blocker='%s' is not wanted - SKIPPED!", blocker)
314             continue
315
316         logger.debug("Setting last_blocked,has_obfuscation=false for blocker='%s' ...", blocker)
317         instances.set_last_blocked(blocker)
318         instances.set_has_obfuscation(blocker, False)
319
320         # c.s isn't part of oliphant's "hidden" blocklists
321         if blocker == "chaos.social" or software_helper.is_relay(software) or blocklists.has(blocker):
322             logger.debug("Skipping blocker='%s', run ./fba.py fetch_cs or fetch_oliphant instead!", blocker)
323             continue
324
325         logger.debug("Invoking federation.fetch_blocks(%s) ...", blocker)
326         blocking = federation.fetch_blocks(blocker)
327
328         logger.debug("blocker='%s',software='%s',blocking()=%d", blocker, software, len(blocking))
329         if len(blocking) == 0:
330             logger.debug("blocker='%s',software='%s' - fetching blocklist ...", blocker, software)
331             if software == "pleroma":
332                 blocking = pleroma.fetch_blocks(blocker)
333                 logger.info("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software)
334             elif software == "mastodon":
335                 blocking = mastodon.fetch_blocks(blocker)
336                 logger.info("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software)
337             elif software == "lemmy":
338                 blocking = lemmy.fetch_blocks(blocker)
339                 logger.info("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software)
340             elif software == "friendica":
341                 blocking = friendica.fetch_blocks(blocker)
342                 logger.info("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software)
343             elif software == "misskey":
344                 blocking = misskey.fetch_blocks(blocker)
345                 logger.info("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software)
346             else:
347                 logger.warning("Unknown software: blocker='%s',software='%s'", blocker, software)
348
349         logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
350         instances.set_total_blocks(blocker, blocking)
351
352         blockdict = list()
353
354         logger.info("Checking %d entries from blocker='%s',software='%s' ...", len(blocking), blocker, software)
355         for block in blocking:
356             logger.debug("blocked='%s',block_level='%s',reason='%s'", block["blocked"], block["block_level"], block["reason"])
357
358             if block["block_level"] == "":
359                 logger.warning("block_level is empty, blocker='%s',blocked='%s'", block["blocker"], block["blocked"])
360                 continue
361
362             logger.debug("blocked='%s',reason='%s' - BEFORE!", block["blocked"], block["reason"])
363             block["blocked"] = tidyup.domain(block["blocked"])
364             block["reason"]  = tidyup.reason(block["reason"]) if block["reason"] is not None and block["reason"] != "" else None
365             logger.debug("blocked='%s',reason='%s' - AFTER!", block["blocked"], block["reason"])
366
367             if block["blocked"] == "":
368                 logger.warning("blocked is empty, blocker='%s'", blocker)
369                 continue
370             elif block["blocked"].endswith(".onion"):
371                 logger.debug("blocked='%s' is a TOR .onion domain - SKIPPED", block["blocked"])
372                 continue
373             elif block["blocked"].endswith(".arpa"):
374                 logger.debug("blocked='%s' is a reverse IP address - SKIPPED", block["blocked"])
375                 continue
376             elif block["blocked"].endswith(".tld"):
377                 logger.debug("blocked='%s' is a fake domain - SKIPPED", block["blocked"])
378                 continue
379             elif block["blocked"].find("*") >= 0:
380                 logger.debug("blocker='%s' uses obfuscated domains", blocker)
381
382                 # Some friendica servers also obscure domains without hash
383                 row = instances.deobfuscate("*", block["blocked"], block["hash"] if "hash" in block else None)
384
385                 logger.debug("row[]='%s'", type(row))
386                 if row is None:
387                     logger.warning("Cannot deobfuscate blocked='%s',blocker='%s',software='%s' - SKIPPED!", block["blocked"], blocker, software)
388                     instances.set_has_obfuscation(blocker, True)
389                     continue
390
391                 block["blocked"] = row["domain"]
392                 origin           = row["origin"]
393                 nodeinfo_url     = row["nodeinfo_url"]
394             elif block["blocked"].find("?") >= 0:
395                 logger.debug("blocker='%s' uses obfuscated domains", blocker)
396
397                 # Some obscure them with question marks, not sure if that's dependent on version or not
398                 row = instances.deobfuscate("?", block["blocked"], block["hash"] if "hash" in block else None)
399
400                 logger.debug("row[]='%s'", type(row))
401                 if row is None:
402                     logger.warning("Cannot deobfuscate blocked='%s',blocker='%s',software='%s' - SKIPPED!", block["blocked"], blocker, software)
403                     instances.set_has_obfuscation(blocker, True)
404                     continue
405
406                 block["blocked"] = row["domain"]
407                 origin           = row["origin"]
408                 nodeinfo_url     = row["nodeinfo_url"]
409
410             logger.debug("Looking up instance by domainm, blocked='%s'", block["blocked"])
411             if block["blocked"] == "":
412                 logger.debug("block[blocked] is empty - SKIPPED!")
413                 continue
414
415             logger.debug("block[blocked]='%s' - BEFORE!", block["blocked"])
416             block["blocked"] = block["blocked"].lstrip(".").encode("idna").decode("utf-8")
417             logger.debug("block[blocked]='%s' - AFTER!", block["blocked"])
418
419             if not domain_helper.is_wanted(block["blocked"]):
420                 logger.debug("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
421                 continue
422             elif block["block_level"] in ["accept", "accepted"]:
423                 logger.debug("blocked='%s' is accepted, not wanted here - SKIPPED!", block["blocked"])
424                 continue
425             elif not instances.is_registered(block["blocked"]):
426                 logger.debug("Hash wasn't found, adding: blocked='%s',blocker='%s'", block["blocked"], blocker)
427                 federation.fetch_instances(block["blocked"], blocker, None, inspect.currentframe().f_code.co_name)
428
429             block["block_level"] = blocks.alias_block_level(block["block_level"])
430
431             if processing.block(blocker, block["blocked"], block["reason"], block["block_level"]) and block["block_level"] == "reject" and config.get("bot_enabled"):
432                 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], blocker)
433                 blockdict.append({
434                     "blocked": block["blocked"],
435                     "reason" : block["reason"],
436                 })
437
438             logger.debug("Invoking cookies.clear(%s) ...", block["blocked"])
439             cookies.clear(block["blocked"])
440
441         logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
442         if instances.has_pending(blocker):
443             logger.debug("Flushing updates for blocker='%s' ...", blocker)
444             instances.update(blocker)
445
446         logger.debug("Invoking commit() ...")
447         database.connection.commit()
448
449         logger.debug("Invoking cookies.clear(%s) ...", blocker)
450         cookies.clear(blocker)
451
452         logger.debug("config.get(bot_enabled)='%s',blockdict()=%d'", config.get("bot_enabled"), len(blockdict))
453         if config.get("bot_enabled") and len(blockdict) > 0:
454             logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
455             network.send_bot_post(blocker, blockdict)
456
457     logger.debug("Success! - EXIT!")
458     return 0
459
460 def fetch_observer(args: argparse.Namespace) -> int:
461     logger.debug("args[]='%s' - CALLED!", type(args))
462
463     logger.debug("Invoking locking.acquire() ...")
464     locking.acquire()
465
466     source_domain = "fediverse.observer"
467     if sources.is_recent(source_domain):
468         logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
469         return 0
470     else:
471         logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
472         sources.update(source_domain)
473
474     types = list()
475     if args.software is None:
476         logger.info("Fetching software list ...")
477         raw = utils.fetch_url(
478             f"https://{source_domain}",
479             network.web_headers,
480             (config.get("connection_timeout"), config.get("read_timeout"))
481         ).text
482         logger.debug("raw[%s]()=%d", type(raw), len(raw))
483
484         doc = bs4.BeautifulSoup(raw, features="html.parser")
485         logger.debug("doc[]='%s'", type(doc))
486
487         navbar = doc.find("div", {"aria-labelledby": "navbarDropdownMenuSoftwares"})
488         logger.debug("navbar[]='%s'", type(navbar))
489         if navbar is None:
490             logger.warning("Cannot find navigation bar, cannot continue!")
491             return 1
492
493         items = navbar.findAll("a", {"class": "dropdown-item"})
494         logger.debug("items[]='%s'", type(items))
495
496         logger.info("Checking %d menu items ...", len(items))
497         for item in items:
498             logger.debug("item[%s]='%s'", type(item), item)
499             if item.text.lower() == "all":
500                 logger.debug("Skipping 'All' menu entry ...")
501                 continue
502
503             logger.debug("Appending item.text='%s' ...", item.text)
504             types.append(tidyup.domain(item.text))
505     else:
506         logger.info("Adding args.software='%s' as type ...", args.software)
507         types.append(args.software)
508
509     logger.info("Fetching %d different table data ...", len(types))
510     for software in types:
511         logger.debug("software='%s' - BEFORE!", software)
512         if args.software is not None and args.software != software:
513             logger.debug("args.software='%s' does not match software='%s' - SKIPPED!", args.software, software)
514             continue
515
516         doc = None
517         try:
518             logger.debug("Fetching table data for software='%s' ...", software)
519             raw = utils.fetch_url(
520                 f"https://{source_domain}/app/views/tabledata.php?software={software}",
521                 network.web_headers,
522                 (config.get("connection_timeout"), config.get("read_timeout"))
523             ).text
524             logger.debug("raw[%s]()=%d", type(raw), len(raw))
525
526             doc = bs4.BeautifulSoup(raw, features="html.parser")
527             logger.debug("doc[]='%s'", type(doc))
528         except network.exceptions as exception:
529             logger.warning("Cannot fetch software='%s' from source_domain='%s': '%s'", software, source_domain, type(exception))
530             continue
531
532         items = doc.findAll("a", {"class": "url"})
533         logger.info("Checking %d items,software='%s' ...", len(items), software)
534         for item in items:
535             logger.debug("item[]='%s'", type(item))
536             domain = item.decode_contents()
537             domain = tidyup.domain(domain) if domain != None and domain != "" else None
538             logger.debug("domain='%s' - AFTER!", domain)
539
540             if domain is None or domain == "":
541                 logger.debug("domain='%s' is empty after tidyup.domain() - SKIPPED!", domain)
542                 continue
543
544             logger.debug("domain='%s' - BEFORE!", domain)
545             domain = domain.encode("idna").decode("utf-8")
546             logger.debug("domain='%s' - AFTER!", domain)
547
548             if not domain_helper.is_wanted(domain):
549                 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
550                 continue
551             elif instances.is_registered(domain):
552                 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
553                 continue
554
555             software = software_helper.alias(software)
556             logger.info("Fetching instances for domain='%s'", domain)
557             federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
558
559     logger.debug("Success! - EXIT!")
560     return 0
561
562 def fetch_todon_wiki(args: argparse.Namespace) -> int:
563     logger.debug("args[]='%s' - CALLED!", type(args))
564
565     logger.debug("Invoking locking.acquire() ...")
566     locking.acquire()
567
568     source_domain = "wiki.todon.eu"
569     if sources.is_recent(source_domain):
570         logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
571         return 0
572     else:
573         logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
574         sources.update(source_domain)
575
576     blocklist = {
577         "silenced": list(),
578         "reject": list(),
579     }
580
581     logger.debug("Fetching domainblocks from source_domain='%s'", source_domain)
582     raw = utils.fetch_url(
583         f"https://{source_domain}/todon/domainblocks",
584         network.web_headers,
585         (config.get("connection_timeout"), config.get("read_timeout"))
586     ).text
587     logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
588
589     doc = bs4.BeautifulSoup(raw, "html.parser")
590     logger.debug("doc[]='%s'", type(doc))
591
592     silenced = doc.find("h3", {"id": "silencedlimited_servers"}).find_next("ul").findAll("li")
593     logger.info("Checking %d silenced/limited entries ...", len(silenced))
594     blocklist["silenced"] = utils.find_domains(silenced, "div")
595
596     suspended = doc.find("h3", {"id": "suspended_servers"}).find_next("ul").findAll("li")
597     logger.info("Checking %d suspended entries ...", len(suspended))
598     blocklist["reject"] = utils.find_domains(suspended, "div")
599
600     blocking = blocklist["silenced"] + blocklist["reject"]
601     blocker = "todon.eu"
602
603     logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
604     instances.set_last_blocked(blocker)
605     instances.set_total_blocks(blocker, blocking)
606
607     blockdict = list()
608     for block_level in blocklist:
609         blockers = blocklist[block_level]
610
611         logger.debug("block_level='%s',blockers()=%d'", block_level, len(blockers))
612         for blocked in blockers:
613             logger.debug("blocked='%s'", blocked)
614
615             if not instances.is_registered(blocked):
616                 try:
617                     logger.info("Fetching instances from domain='%s' ...", blocked)
618                     federation.fetch_instances(blocked, blocker, None, inspect.currentframe().f_code.co_name)
619                 except network.exceptions as exception:
620                     logger.warning("Exception '%s' during fetching instances (fetch_cs) from blocked='%s'", type(exception), blocked)
621                     instances.set_last_error(blocked, exception)
622
623             if not domain_helper.is_wanted(blocked):
624                 logger.warning("blocked='%s' is not wanted - SKIPPED!", blocked)
625                 continue
626             elif not domain_helper.is_wanted(blocker):
627                 logger.warning("blocker='%s' is not wanted - SKIPPED!", blocker)
628                 continue
629             elif blocks.is_instance_blocked(blocker, blocked, block_level):
630                 logger.debug("blocked='%s',block_level='%s' is already blocked - SKIPPED!", blocked, block_level)
631                 continue
632
633             logger.info("Adding new block: blocked='%s',block_level='%s'", blocked, block_level)
634             if processing.block(blocker, blocked, None, block_level) and block_level == "reject" and config.get("bot_enabled"):
635                 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", blocked, block_level, blocker)
636                 blockdict.append({
637                     "blocked": blocked,
638                     "reason" : None,
639                 })
640
641         logger.debug("Invoking commit() ...")
642         database.connection.commit()
643
644         logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
645         if config.get("bot_enabled") and len(blockdict) > 0:
646             logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
647             network.send_bot_post(blocker, blockdict)
648
649     logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
650     if instances.has_pending(blocker):
651         logger.debug("Flushing updates for blocker='%s' ...", blocker)
652         instances.update(blocker)
653
654     logger.debug("Success! - EXIT!")
655     return 0
656
657 def fetch_cs(args: argparse.Namespace):
658     logger.debug("args[]='%s' - CALLED!", type(args))
659
660     logger.debug("Invoking locking.acquire() ...")
661     locking.acquire()
662
663     extensions = [
664         "extra",
665         "abbr",
666         "attr_list",
667         "def_list",
668         "fenced_code",
669         "footnotes",
670         "md_in_html",
671         "admonition",
672         "codehilite",
673         "legacy_attrs",
674         "legacy_em",
675         "meta",
676         "nl2br",
677         "sane_lists",
678         "smarty",
679         "toc",
680         "wikilinks"
681     ]
682
683     blocklist = {
684         "silenced": list(),
685         "reject"  : list(),
686     }
687
688     source_domain = "raw.githubusercontent.com"
689     if sources.is_recent(source_domain):
690         logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
691         return 0
692     else:
693         logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
694         sources.update(source_domain)
695
696     logger.info("Fetching federation.md from source_domain='%s' ...", source_domain)
697     raw = utils.fetch_url(
698         f"https://{source_domain}/chaossocial/meta/master/federation.md",
699         network.web_headers,
700         (config.get("connection_timeout"), config.get("read_timeout"))
701     ).text
702     logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
703
704     doc = bs4.BeautifulSoup(markdown.markdown(raw, extensions=extensions), features="html.parser")
705     logger.debug("doc()=%d[]='%s'", len(doc), type(doc))
706
707     silenced = doc.find("h2", {"id": "silenced-instances"}).findNext("table").find("tbody")
708     logger.debug("silenced[%s]()=%d", type(silenced), len(silenced))
709     blocklist["silenced"] = federation.find_domains(silenced)
710
711     blocked = doc.find("h2", {"id": "blocked-instances"}).findNext("table").find("tbody")
712     logger.debug("blocked[%s]()=%d", type(blocked), len(blocked))
713     blocklist["reject"] = federation.find_domains(blocked)
714
715     blocking = blocklist["silenced"] + blocklist["reject"]
716     blocker = "chaos.social"
717
718     logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
719     instances.set_last_blocked(blocker)
720     instances.set_total_blocks(blocker, blocking)
721
722     logger.debug("blocklist[silenced]()=%d,blocklist[reject]()=%d", len(blocklist["silenced"]), len(blocklist["reject"]))
723     if len(blocking) > 0:
724         blockdict = list()
725         for block_level in blocklist:
726             logger.info("block_level='%s' has %d row(s)", block_level, len(blocklist[block_level]))
727
728             for row in blocklist[block_level]:
729                 logger.debug("row[%s]='%s'", type(row), row)
730                 if not "domain" in row:
731                     logger.warning("row[]='%s' has no element 'domain' - SKIPPED!", type(row))
732                     continue
733                 elif not instances.is_registered(row["domain"]):
734                     try:
735                         logger.info("Fetching instances from domain='%s' ...", row["domain"])
736                         federation.fetch_instances(row["domain"], blocker, None, inspect.currentframe().f_code.co_name)
737                     except network.exceptions as exception:
738                         logger.warning("Exception '%s' during fetching instances (fetch_cs) from row[domain]='%s'", type(exception), row["domain"])
739                         instances.set_last_error(row["domain"], exception)
740
741                 if processing.block(blocker, row["domain"], row["reason"], block_level) and block_level == "reject" and config.get("bot_enabled"):
742                     logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", row["domain"], block_level, blocker)
743                     blockdict.append({
744                         "blocked": row["domain"],
745                         "reason" : row["reason"],
746                     })
747
748         logger.debug("Invoking commit() ...")
749         database.connection.commit()
750
751         logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
752         if config.get("bot_enabled") and len(blockdict) > 0:
753             logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
754             network.send_bot_post(blocker, blockdict)
755
756     logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
757     if instances.has_pending(blocker):
758         logger.debug("Flushing updates for blocker='%s' ...", blocker)
759         instances.update(blocker)
760
761     logger.debug("Success! - EXIT!")
762     return 0
763
764 def fetch_fba_rss(args: argparse.Namespace) -> int:
765     logger.debug("args[]='%s' - CALLED!", type(args))
766
767     domains = list()
768
769     logger.debug("Invoking locking.acquire() ...")
770     locking.acquire()
771
772     components = urlparse(args.feed)
773     domain = components.netloc.lower().split(":")[0]
774
775     logger.debug("domain='%s'", domain)
776     if sources.is_recent(domain):
777         logger.info("API from domain='%s' has recently being accessed - EXIT!", domain)
778         return 0
779     else:
780         logger.debug("domain='%s' has not been recently used, marking ...", domain)
781         sources.update(domain)
782
783     logger.info("Fetch FBA-specific RSS args.feed='%s' ...", args.feed)
784     response = utils.fetch_url(args.feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
785
786     logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
787     if response.ok and response.status_code == 200 and len(response.text) > 0:
788         logger.debug("Parsing RSS feed (%d Bytes) ...", len(response.text))
789         rss = atoma.parse_rss_bytes(response.content)
790
791         logger.debug("rss[]='%s'", type(rss))
792         for item in rss.items:
793             logger.debug("item[%s]='%s'", type(item), item)
794             domain = item.link.split("=")[1]
795             domain = tidyup.domain(domain) if domain != None and domain != "" else None
796
797             logger.debug("domain='%s' - AFTER!", domain)
798             if domain is None or domain == "":
799                 logger.debug("domain='%s' is empty after tidyup.domain() - SKIPPED!", domain)
800                 continue
801
802             logger.debug("domain='%s' - BEFORE!", domain)
803             domain = domain.encode("idna").decode("utf-8")
804             logger.debug("domain='%s' - AFTER!", domain)
805
806             if not domain_helper.is_wanted(domain):
807                 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
808                 continue
809             elif domain in domains:
810                 logger.debug("domain='%s' is already added - SKIPPED!", domain)
811                 continue
812             elif instances.is_registered(domain):
813                 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
814                 continue
815             elif instances.is_recent(domain):
816                 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
817                 continue
818
819             logger.debug("Adding domain='%s'", domain)
820             domains.append(domain)
821
822     logger.debug("domains()=%d", len(domains))
823     if len(domains) > 0:
824         logger.info("Adding %d new instances ...", len(domains))
825         for domain in domains:
826             logger.debug("domain='%s'", domain)
827             try:
828                 logger.info("Fetching instances from domain='%s' ...", domain)
829                 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
830             except network.exceptions as exception:
831                 logger.warning("Exception '%s' during fetching instances (fetch_fba_rss) from domain='%s'", type(exception), domain)
832                 instances.set_last_error(domain, exception)
833                 return 100
834
835     logger.debug("Success! - EXIT!")
836     return 0
837
838 def fetch_fbabot_atom(args: argparse.Namespace) -> int:
839     logger.debug("args[]='%s' - CALLED!", type(args))
840
841     logger.debug("Invoking locking.acquire() ...")
842     locking.acquire()
843
844     source_domain = "ryona.agency"
845     feed = f"https://{source_domain}/users/fba/feed.atom"
846
847     logger.debug("args.feed[%s]='%s'", type(args.feed), args.feed)
848     if args.feed is not None and validators.url(args.feed):
849         logger.debug("Setting feed='%s' ...", args.feed)
850         feed = str(args.feed)
851         source_domain = urlparse(args.feed).netloc
852
853     if sources.is_recent(source_domain):
854         logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
855         return 0
856     else:
857         logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
858         sources.update(source_domain)
859
860     domains = list()
861
862     logger.info("Fetching ATOM feed='%s' from FBA bot account ...", feed)
863     response = utils.fetch_url(feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
864
865     logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
866     if response.ok and response.status_code == 200 and len(response.text) > 0:
867         logger.debug("Parsing ATOM feed (%d Bytes) ...", len(response.text))
868         atom = atoma.parse_atom_bytes(response.content)
869
870         logger.debug("atom[]='%s'", type(atom))
871         for entry in atom.entries:
872             logger.debug("entry[]='%s'", type(entry))
873             doc = bs4.BeautifulSoup(entry.content.value, "html.parser")
874             logger.debug("doc[]='%s'", type(doc))
875             for element in doc.findAll("a"):
876                 logger.debug("element[]='%s'", type(element))
877                 for href in element["href"].split(","):
878                     logger.debug("href[%s]='%s' - BEFORE!", type(href), href)
879                     domain = tidyup.domain(href) if href != None and href != "" else None
880
881                     logger.debug("domain='%s' - AFTER!", domain)
882                     if domain is None or domain == "":
883                         logger.debug("domain='%s' is empty after tidyup.domain() - SKIPPED!", domain)
884                         continue
885
886                     logger.debug("domain='%s' - BEFORE!", domain)
887                     domain = domain.encode("idna").decode("utf-8")
888                     logger.debug("domain='%s' - AFTER!", domain)
889
890                     if not domain_helper.is_wanted(domain):
891                         logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
892                         continue
893                     elif domain in domains:
894                         logger.debug("domain='%s' is already added - SKIPPED!", domain)
895                         continue
896                     elif instances.is_registered(domain):
897                         logger.debug("domain='%s' is already registered - SKIPPED!", domain)
898                         continue
899                     elif instances.is_recent(domain):
900                         logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
901                         continue
902
903                     logger.debug("Adding domain='%s',domains()=%d", domain, len(domains))
904                     domains.append(domain)
905
906     logger.debug("domains()=%d", len(domains))
907     if len(domains) > 0:
908         logger.info("Adding %d new instances ...", len(domains))
909         for domain in domains:
910             logger.debug("domain='%s'", domain)
911             try:
912                 logger.info("Fetching instances from domain='%s' ...", domain)
913                 federation.fetch_instances(domain, source_domain, None, inspect.currentframe().f_code.co_name)
914             except network.exceptions as exception:
915                 logger.warning("Exception '%s' during fetching instances (fetch_fbabot_atom) from domain='%s'", type(exception), domain)
916                 instances.set_last_error(domain, exception)
917                 return 100
918
919     logger.debug("Success! - EXIT!")
920     return 0
921
922 def fetch_instances(args: argparse.Namespace) -> int:
923     logger.debug("args[]='%s' - CALLED!", type(args))
924
925     logger.debug("args.domain='%s' - checking ...", args.domain)
926     if not validators.domain(args.domain):
927         logger.warning("args.domain='%s' is not valid.", args.domain)
928         return 100
929     elif blacklist.is_blacklisted(args.domain):
930         logger.warning("args.domain='%s' is blacklisted, won't check it!", args.domain)
931         return 101
932
933     logger.debug("Invoking locking.acquire() ...")
934     locking.acquire()
935
936     # Initialize values
937     domain = tidyup.domain(args.domain)
938     origin = software = None
939
940     # Fetch record
941     database.cursor.execute("SELECT origin, software FROM instances WHERE domain = ? LIMIT 1", [args.domain])
942     row = database.cursor.fetchone()
943     if row is not None:
944         origin = row["origin"]
945         software = row["software"]
946
947     if software_helper.is_relay(software):
948         logger.warning("args.domain='%s' is of software type '%s' which is not supported by this command. Please invoke fetch_relays instead.", args.domain, software)
949         return 102
950
951     # Initial fetch
952     try:
953         logger.info("Fetching instances from args.domain='%s',origin='%s',software='%s' ...", domain, origin, software)
954         federation.fetch_instances(domain, origin, software, inspect.currentframe().f_code.co_name)
955     except network.exceptions as exception:
956         logger.warning("Exception '%s' during fetching instances (fetch_instances) from args.domain='%s'", type(exception), args.domain)
957         instances.set_last_error(args.domain, exception)
958         instances.update(args.domain)
959         return 100
960
961     if args.single:
962         logger.debug("Not fetching more instances - EXIT!")
963         return 0
964
965     # Loop through some instances
966     database.cursor.execute(
967         "SELECT domain, origin, software, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'lemmy', 'peertube', 'takahe', 'gotosocial', 'brighteon', 'wildebeest', 'bookwyrm', 'mitra', 'areionskey', 'mammuthus', 'neodb') AND (last_instance_fetch IS NULL OR last_instance_fetch < ?) ORDER BY total_peers DESC, last_response_time ASC, last_updated ASC", [time.time() - config.get("recheck_instance")]
968     )
969
970     rows = database.cursor.fetchall()
971     logger.info("Checking %d entries ...", len(rows))
972     for row in rows:
973         logger.debug("row[domain]='%s'", row["domain"])
974         if row["domain"] == "":
975             logger.debug("row[domain] is empty - SKIPPED!")
976             continue
977
978         logger.debug("row[domain]='%s' - BEFORE!", row["domain"])
979         domain = row["domain"].encode("idna").decode("utf-8")
980         logger.debug("domain='%s' - AFTER!", domain)
981
982         if not domain_helper.is_wanted(domain):
983             logger.debug("Domain domain='%s' is not wanted - SKIPPED!", domain)
984             continue
985
986         try:
987             logger.info("Fetching instances for domain='%s',origin='%s',software='%s',nodeinfo_url='%s'", domain, row["origin"], row["software"], row["nodeinfo_url"])
988             federation.fetch_instances(domain, row["origin"], row["software"], inspect.currentframe().f_code.co_name, row["nodeinfo_url"])
989         except network.exceptions as exception:
990             logger.warning("Exception '%s' during fetching instances (fetch_instances) from domain='%s'", type(exception), domain)
991             instances.set_last_error(domain, exception)
992
993     logger.debug("Success - EXIT!")
994     return 0
995
996 def fetch_oliphant(args: argparse.Namespace) -> int:
997     logger.debug("args[]='%s' - CALLED!", type(args))
998
999     logger.debug("Invoking locking.acquire() ...")
1000     locking.acquire()
1001
1002     source_domain = "codeberg.org"
1003     if sources.is_recent(source_domain):
1004         logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1005         return 0
1006     else:
1007         logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1008         sources.update(source_domain)
1009
1010     # Base URL
1011     base_url = f"https://{source_domain}/oliphant/blocklists/raw/branch/main/blocklists"
1012
1013     domains = list()
1014
1015     logger.debug("Downloading %d files ...", len(blocklists.oliphant_blocklists))
1016     for block in blocklists.oliphant_blocklists:
1017         # Is domain given and not equal blocker?
1018         if isinstance(args.domain, str) and args.domain != block["blocker"]:
1019             logger.debug("Skipping blocker='%s', not matching args.domain='%s'", block["blocker"], args.domain)
1020             continue
1021         elif args.domain in domains:
1022             logger.debug("args.domain='%s' already handled - SKIPPED!", args.domain)
1023             continue
1024
1025         instances.set_last_blocked(block["blocker"])
1026
1027         # Fetch this URL
1028         logger.info("Fetching csv_url='%s' for blocker='%s' ...", block["csv_url"], block["blocker"])
1029         response = utils.fetch_url(f"{base_url}/{block['csv_url']}", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
1030
1031         logger.debug("response.ok='%s',response.status_code=%d,response.content()=%d", response.ok, response.status_code, len(response.content))
1032         if not response.ok or response.status_code > 200 or response.content == "":
1033             logger.warning("Could not fetch csv_url='%s' for blocker='%s' - SKIPPED!", block["csv_url"], block["blocker"])
1034             continue
1035
1036         logger.debug("Fetched %d Bytes, parsing CSV ...", len(response.content))
1037         reader = csv.DictReader(response.content.decode("utf-8").splitlines(), dialect="unix")
1038
1039         blockdict = list()
1040
1041         cnt = 0
1042         for row in reader:
1043             logger.debug("row[%s]='%s'", type(row), row)
1044             domain = severity = None
1045             reject_media = reject_reports = False
1046
1047             if "#domain" in row:
1048                 domain = row["#domain"]
1049             elif "domain" in row:
1050                 domain = row["domain"]
1051             else:
1052                 logger.debug("row='%s' does not contain domain column", row)
1053                 continue
1054
1055             if "#severity" in row:
1056                 severity = blocks.alias_block_level(row["#severity"])
1057             elif "severity" in row:
1058                 severity = blocks.alias_block_level(row["severity"])
1059             else:
1060                 logger.debug("row='%s' does not contain severity column", row)
1061                 continue
1062
1063             if "#reject_media" in row and row["#reject_media"].lower() == "true":
1064                 reject_media = True
1065             elif "reject_media" in row and row["reject_media"].lower() == "true":
1066                 reject_media = True
1067
1068             if "#reject_reports" in row and row["#reject_reports"].lower() == "true":
1069                 reject_reports = True
1070             elif "reject_reports" in row and row["reject_reports"].lower() == "true":
1071                 reject_reports = True
1072
1073             cnt = cnt + 1
1074             logger.debug("domain='%s',severity='%s',reject_media='%s',reject_reports='%s'", domain, severity, reject_media, reject_reports)
1075             if domain is None or domain == "":
1076                 logger.debug("domain='%s' is empty - SKIPPED!", domain)
1077                 continue
1078             elif domain.endswith(".onion"):
1079                 logger.debug("domain='%s' is a TOR .onion domain - SKIPPED", domain)
1080                 continue
1081             elif domain.endswith(".arpa"):
1082                 logger.debug("domain='%s' is a reverse IP address - SKIPPED", domain)
1083                 continue
1084             elif domain.endswith(".tld"):
1085                 logger.debug("domain='%s' is a fake domain - SKIPPED", domain)
1086                 continue
1087             elif domain.find("*") >= 0 or domain.find("?") >= 0:
1088                 logger.debug("domain='%s' is obfuscated - Invoking utils.deobfuscate(%s, %s) ...", domain, domain, block["blocker"])
1089                 domain = utils.deobfuscate(domain, block["blocker"])
1090                 logger.debug("domain='%s' - AFTER!", domain)
1091
1092             if not validators.domain(domain):
1093                 logger.debug("domain='%s' is not a valid domain - SKIPPED!")
1094                 continue
1095             elif blacklist.is_blacklisted(domain):
1096                 logger.warning("domain='%s' is blacklisted - SKIPPED!", domain)
1097                 continue
1098             elif blocks.is_instance_blocked(block["blocker"], domain, severity):
1099                 logger.debug("block[blocker]='%s' has already blocked domain='%s' with severity='%s' - SKIPPED!", block["blocker"], domain, severity)
1100                 continue
1101
1102             logger.debug("Marking domain='%s' as handled", domain)
1103             domains.append(domain)
1104
1105             logger.debug("Processing domain='%s' ...", domain)
1106             processed = processing.instance(domain, block["blocker"], inspect.currentframe().f_code.co_name)
1107             logger.debug("processed='%s'", processed)
1108
1109             if processing.block(block["blocker"], domain, None, severity) and config.get("bot_enabled"):
1110                 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", domain, block["block_level"], block["blocker"])
1111                 blockdict.append({
1112                     "blocked": domain,
1113                     "reason" : block["reason"],
1114                 })
1115
1116             if reject_media:
1117                 processing.block(block["blocker"], domain, None, "reject_media")
1118             if reject_reports:
1119                 processing.block(block["blocker"], domain, None, "reject_reports")
1120
1121         logger.debug("block[blocker]='%s'", block["blocker"])
1122         if not blocklists.has(block["blocker"]):
1123             logger.debug("Invoking instances.set_total_blocks(%s, domains()=%d) ...", block["blocker"], len(domains))
1124             instances.set_total_blocks(block["blocker"], domains)
1125
1126         logger.debug("Checking if blocker='%s' has pending updates ...", block["blocker"])
1127         if instances.has_pending(block["blocker"]):
1128             logger.debug("Flushing updates for block[blocker]='%s' ...", block["blocker"])
1129             instances.update(block["blocker"])
1130
1131         logger.debug("Invoking commit() ...")
1132         database.connection.commit()
1133
1134         logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
1135         if config.get("bot_enabled") and len(blockdict) > 0:
1136             logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", block["blocker"], len(blockdict))
1137             network.send_bot_post(block["blocker"], blockdict)
1138
1139     logger.debug("Success! - EXIT!")
1140     return 0
1141
1142 def fetch_txt(args: argparse.Namespace) -> int:
1143     logger.debug("args[]='%s' - CALLED!", type(args))
1144
1145     logger.debug("Invoking locking.acquire() ...")
1146     locking.acquire()
1147
1148     # Static URLs
1149     urls = ({
1150         "blocker": "seirdy.one",
1151         "url"    : "https://seirdy.one/pb/bsl.txt",
1152     },)
1153
1154     logger.info("Checking %d text file(s) ...", len(urls))
1155     for row in urls:
1156         logger.debug("Fetching row[url]='%s' ...", row["url"])
1157         response = utils.fetch_url(row["url"], network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
1158
1159         logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
1160         if response.ok and response.status_code == 200 and response.text != "":
1161             logger.debug("Returned %d Bytes for processing", len(response.text.strip()))
1162             domains = response.text.strip().split("\n")
1163
1164             logger.info("Processing %d domains ...", len(domains))
1165             for domain in domains:
1166                 logger.debug("domain='%s' - BEFORE!", domain)
1167                 domain = tidyup.domain(domain) if domain != None and domain != "" else None
1168
1169                 logger.debug("domain='%s' - AFTER!", domain)
1170                 if domain is None or domain == "":
1171                     logger.debug("domain='%s' is empty after tidyup.domain() - SKIPPED!", domain)
1172                     continue
1173                 elif not domain_helper.is_wanted(domain):
1174                     logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1175                     continue
1176                 elif instances.is_recent(domain):
1177                     logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1178                     continue
1179
1180                 logger.debug("Processing domain='%s',row[blocker]='%s'", domain, row["blocker"])
1181                 processed = processing.instance(domain, row["blocker"], inspect.currentframe().f_code.co_name)
1182
1183                 logger.debug("processed='%s'", processed)
1184                 if not processed:
1185                     logger.debug("domain='%s' was not generically processed - SKIPPED!", domain)
1186                     continue
1187
1188     logger.debug("Success! - EXIT!")
1189     return 0
1190
1191 def fetch_fedipact(args: argparse.Namespace) -> int:
1192     logger.debug("args[]='%s' - CALLED!", type(args))
1193
1194     logger.debug("Invoking locking.acquire() ...")
1195     locking.acquire()
1196
1197     source_domain = "fedipact.online"
1198     if sources.is_recent(source_domain):
1199         logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1200         return 0
1201     else:
1202         logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1203         sources.update(source_domain)
1204
1205     logger.info("Fetching / from source_domain='%s' ...", source_domain)
1206     response = utils.fetch_url(
1207         f"https://{source_domain}",
1208         network.web_headers,
1209         (config.get("connection_timeout"), config.get("read_timeout"))
1210     )
1211
1212     logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
1213     if response.ok and response.status_code == 200 and response.text != "":
1214         logger.debug("Parsing %d Bytes ...", len(response.text))
1215
1216         doc = bs4.BeautifulSoup(response.text, "html.parser")
1217         logger.debug("doc[]='%s'", type(doc))
1218
1219         rows = doc.findAll("li")
1220         logger.info("Checking %d row(s) ...", len(rows))
1221         for row in rows:
1222             logger.debug("row[]='%s'", type(row))
1223             domain = tidyup.domain(row.contents[0]) if row.contents[0] != None and row.contents[0] != "" else None
1224
1225             logger.debug("domain='%s' - AFTER!", domain)
1226             if domain is None or domain == "":
1227                 logger.debug("domain='%s' is empty after tidyup.domain() - SKIPPED!", domain)
1228                 continue
1229
1230             logger.debug("domain='%s' - BEFORE!", domain)
1231             domain = domain.encode("idna").decode("utf-8")
1232             logger.debug("domain='%s' - AFTER!", domain)
1233
1234             if not domain_helper.is_wanted(domain):
1235                 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1236                 continue
1237             elif instances.is_registered(domain):
1238                 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
1239                 continue
1240             elif instances.is_recent(domain):
1241                 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1242                 continue
1243
1244             logger.info("Fetching domain='%s' ...", domain)
1245             federation.fetch_instances(domain, "beach.city", None, inspect.currentframe().f_code.co_name)
1246
1247     logger.debug("Success! - EXIT!")
1248     return 0
1249
1250 def fetch_joinmobilizon(args: argparse.Namespace) -> int:
1251     logger.debug("args[]='%s' - CALLED!", type(args))
1252
1253     logger.debug("Invoking locking.acquire() ...")
1254     locking.acquire()
1255
1256     source_domain = "instances.joinmobilizon.org"
1257     if sources.is_recent(source_domain):
1258         logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1259         return 0
1260     else:
1261         logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1262         sources.update(source_domain)
1263
1264     logger.info("Fetching instances from source_domain='%s' ...", source_domain)
1265     raw = utils.fetch_url(
1266         f"https://{source_domain}/api/v1/instances",
1267         network.web_headers,
1268         (config.get("connection_timeout"), config.get("read_timeout"))
1269     ).text
1270     logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
1271
1272     parsed = json.loads(raw)
1273     logger.debug("parsed[%s]()=%d", type(parsed), len(parsed))
1274
1275     if "data" not in parsed:
1276         logger.warning("parsed()=%d does not contain key 'data'")
1277         return 1
1278
1279     logger.info("Checking %d instances ...", len(parsed["data"]))
1280     for row in parsed["data"]:
1281         logger.debug("row[]='%s'", type(row))
1282         if "host" not in row:
1283             logger.warning("row='%s' does not contain key 'host' - SKIPPED!", row)
1284             continue
1285         elif not domain_helper.is_wanted(row["host"]):
1286             logger.debug("row[host]='%s' is not wanted - SKIPPED!", row["host"])
1287             continue
1288         elif instances.is_registered(row["host"]):
1289             logger.debug("row[host]='%s' is already registered - SKIPPED!", row["host"])
1290             continue
1291
1292         logger.info("Fetching row[host]='%s' ...", row["host"])
1293         federation.fetch_instances(row["host"], "demo.mobilizon.org", None, inspect.currentframe().f_code.co_name)
1294
1295     logger.debug("Success! - EXIT!")
1296     return 0
1297
1298 def fetch_joinmisskey(args: argparse.Namespace) -> int:
1299     logger.debug("args[]='%s' - CALLED!", type(args))
1300
1301     logger.debug("Invoking locking.acquire() ...")
1302     locking.acquire()
1303
1304     source_domain = "instanceapp.misskey.page"
1305     if sources.is_recent(source_domain):
1306         logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1307         return 0
1308     else:
1309         logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1310         sources.update(source_domain)
1311
1312     logger.info("Fetching instances.json from source_domain='%s' ...", source_domain)
1313     raw = utils.fetch_url(
1314         f"https://{source_domain}/instances.json",
1315         network.web_headers,
1316         (config.get("connection_timeout"), config.get("read_timeout"))
1317     ).text
1318     logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
1319
1320     parsed = json.loads(raw)
1321     logger.debug("parsed[%s]()=%d", type(parsed), len(parsed))
1322
1323     if "instancesInfos" not in parsed:
1324         logger.warning("parsed()=%d does not contain element 'instancesInfos'")
1325         return 1
1326
1327     logger.info("Checking %d instane(s) ...", len(parsed["instancesInfos"]))
1328     for row in parsed["instancesInfos"]:
1329         logger.debug("row[%s]='%s'", type(row), row)
1330         if "url" not in row:
1331             logger.warning("row()=%d does not have element 'url' - SKIPPED!", len(row))
1332             continue
1333         elif not domain_helper.is_wanted(row["url"]):
1334             logger.debug("row[url]='%s' is not wanted - SKIPPED!", row["url"])
1335             continue
1336         elif instances.is_registered(row["url"]):
1337             logger.debug("row[url]='%s' is already registered - SKIPPED!", row["url"])
1338             continue
1339
1340         logger.info("Fetching row[url]='%s' ...", row["url"])
1341         federation.fetch_instances(row["url"], "misskey.io", None, inspect.currentframe().f_code.co_name)
1342
1343     logger.debug("Success! - EXIT!")
1344     return 0
1345
1346 def fetch_joinfediverse(args: argparse.Namespace) -> int:
1347     logger.debug("args[]='%s' - CALLED!", type(args))
1348
1349     logger.debug("Invoking locking.acquire() ...")
1350     locking.acquire()
1351
1352     source_domain = "joinfediverse.wiki"
1353     if sources.is_recent(source_domain):
1354         logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1355         return 0
1356     else:
1357         logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1358         sources.update(source_domain)
1359
1360     logger.info("Fetching /FediBlock wiki page from source_domain='%s' ...", source_domain)
1361     raw = utils.fetch_url(
1362         f"https://{source_domain}/FediBlock",
1363         network.web_headers,
1364         (config.get("connection_timeout"), config.get("read_timeout"))
1365     ).text
1366     logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
1367
1368     doc = bs4.BeautifulSoup(raw, "html.parser")
1369     logger.debug("doc[]='%s'", type(doc))
1370
1371     tables = doc.findAll("table", {"class": "wikitable"})
1372
1373     logger.info("Analyzing %d table(s) ...", len(tables))
1374     blocklist = list()
1375     for table in tables:
1376         logger.debug("table[]='%s'", type(table))
1377
1378         rows = table.findAll("tr")
1379         logger.info("Checking %d row(s) ...", len(rows))
1380         block_headers = dict()
1381         for row in rows:
1382             logger.debug("row[%s]='%s'", type(row), row)
1383
1384             headers = row.findAll("th")
1385             logger.debug("Found headers()=%d header(s)", len(headers))
1386             if len(headers) > 1:
1387                 block_headers = dict()
1388                 cnt = 0
1389                 for header in headers:
1390                     cnt = cnt + 1
1391                     logger.debug("header[]='%s',cnt=%d", type(header), cnt)
1392                     text = header.contents[0]
1393
1394                     logger.debug("text[]='%s'", type(text))
1395                     if not isinstance(text, str):
1396                         logger.debug("text[]='%s' is not of type 'str' - SKIPPED!", type(text))
1397                         continue
1398                     elif validators.domain(text.strip()):
1399                         logger.debug("text='%s' is a domain - SKIPPED!", text.strip())
1400                         continue
1401
1402                     text = tidyup.domain(text.strip())
1403                     logger.debug("text='%s' - AFTER!", text)
1404                     if text in ["domain", "instance", "subdomain(s)", "block reason(s)"]:
1405                         logger.debug("Found header: '%s'=%d", text, cnt)
1406                         block_headers[cnt] = text
1407
1408             elif len(block_headers) == 0:
1409                 logger.debug("row is not scrapable - SKIPPED!")
1410                 continue
1411             elif len(block_headers) > 0:
1412                 logger.debug("Found a row with %d scrapable headers ...", len(block_headers))
1413                 cnt = 0
1414                 block = dict()
1415
1416                 for element in row.find_all(["th", "td"]):
1417                     cnt = cnt + 1
1418                     logger.debug("element[]='%s',cnt=%d", type(element), cnt)
1419                     if cnt in block_headers:
1420                         logger.debug("block_headers[%d]='%s'", cnt, block_headers[cnt])
1421
1422                         text = element.text.strip()
1423                         key = block_headers[cnt] if block_headers[cnt] not in ["domain", "instance"] else "blocked"
1424
1425                         logger.debug("cnt=%d is wanted: key='%s',text[%s]='%s'", cnt, key, type(text), text)
1426                         if key in ["domain", "instance"]:
1427                             block[key] = text
1428                         elif key == "reason":
1429                             block[key] = tidyup.reason(text)
1430                         elif key == "subdomain(s)":
1431                             block[key] = list()
1432                             if text != "":
1433                                 block[key] = text.split("/")
1434                         else:
1435                             logger.debug("key='%s'", key)
1436                             block[key] = text
1437
1438                 logger.debug("block()=%d ...", len(block))
1439                 if len(block) > 0:
1440                     logger.debug("Appending block()=%d ...", len(block))
1441                     blocklist.append(block)
1442
1443     logger.debug("blocklist()=%d", len(blocklist))
1444
1445     database.cursor.execute("SELECT domain FROM instances WHERE domain LIKE 'climatejustice.%'")
1446     domains = database.cursor.fetchall()
1447
1448     logger.debug("domains(%d)[]='%s'", len(domains), type(domains))
1449     blocking = list()
1450     for block in blocklist:
1451         logger.debug("block='%s'", block)
1452         if "subdomain(s)" in block and len(block["subdomain(s)"]) > 0:
1453             origin = block["blocked"]
1454             logger.debug("origin='%s'", origin)
1455             for subdomain in block["subdomain(s)"]:
1456                 block["blocked"] = subdomain + "." + origin
1457                 logger.debug("block[blocked]='%s'", block["blocked"])
1458                 blocking.append(block)
1459         else:
1460             blocking.append(block)
1461
1462     logger.debug("blocking()=%d", blocking)
1463     for block in blocking:
1464         logger.debug("block[]='%s'", type(block))
1465         if "blocked" not in block:
1466             raise KeyError(f"block()={len(block)} does not have element 'blocked'")
1467
1468         block["blocked"] = tidyup.domain(block["blocked"]).encode("idna").decode("utf-8")
1469         logger.debug("block[blocked]='%s' - AFTER!", block["blocked"])
1470
1471         if block["blocked"] == "":
1472             logger.debug("block[blocked] is empty - SKIPPED!")
1473             continue
1474         elif not domain_helper.is_wanted(block["blocked"]):
1475             logger.debug("block[blocked]='%s' is not wanted - SKIPPED!", block["blocked"])
1476             continue
1477         elif instances.is_recent(block["blocked"]):
1478             logger.debug("block[blocked]='%s' has been recently checked - SKIPPED!", block["blocked"])
1479             continue
1480
1481         logger.debug("Proccessing blocked='%s' ...", block["blocked"])
1482         processing.instance(block["blocked"], "climatejustice.social", inspect.currentframe().f_code.co_name)
1483
1484     blockdict = list()
1485     for blocker in domains:
1486         blocker = blocker[0]
1487         logger.debug("blocker[%s]='%s'", type(blocker), blocker)
1488         instances.set_last_blocked(blocker)
1489
1490         for block in blocking:
1491             logger.debug("block[blocked]='%s',block[block reason(s)]='%s' - BEFORE!", block["blocked"], block["block reason(s)"] if "block reason(s)" in block else None)
1492             block["reason"] = tidyup.reason(block["block reason(s)"]) if "block reason(s)" in block else None
1493
1494             logger.debug("block[blocked]='%s',block[reason]='%s' - AFTER!", block["blocked"], block["reason"])
1495             if block["blocked"] == "":
1496                 logger.debug("block[blocked] is empty - SKIPPED!")
1497                 continue
1498             elif not domain_helper.is_wanted(block["blocked"]):
1499                 logger.debug("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
1500                 continue
1501
1502             logger.debug("blocked='%s',reason='%s'", block["blocked"], block["reason"])
1503             if processing.block(blocker, block["blocked"], block["reason"], "reject") and config.get("bot_enabled"):
1504                 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], blocker)
1505                 blockdict.append({
1506                     "blocked": block["blocked"],
1507                     "reason" : block["reason"],
1508                 })
1509
1510         if instances.has_pending(blocker):
1511             logger.debug("Flushing updates for blocker='%s' ...", blocker)
1512             instances.update(blocker)
1513
1514         logger.debug("Invoking commit() ...")
1515         database.connection.commit()
1516
1517         logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
1518         if config.get("bot_enabled") and len(blockdict) > 0:
1519             logger.info("Sending bot POST for blocker='%s,blockdict()=%d ...", blocker, len(blockdict))
1520             network.send_bot_post(blocker, blockdict)
1521
1522     logger.debug("Success! - EXIT!")
1523     return 0
1524
1525 def recheck_obfuscation(args: argparse.Namespace) -> int:
1526     logger.debug("args[]='%s' - CALLED!", type(args))
1527
1528     logger.debug("Invoking locking.acquire() ...")
1529     locking.acquire()
1530
1531     if isinstance(args.domain, str) and args.domain != "" and domain_helper.is_wanted(args.domain):
1532         database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1 AND domain = ?", [args.domain])
1533     elif isinstance(args.software, str) and args.software != "" and validators.domain(args.software) == args.software:
1534         database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1 AND software = ?", [args.software])
1535     else:
1536         database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1")
1537
1538     rows = database.cursor.fetchall()
1539     logger.info("Checking %d domains ...", len(rows))
1540     for row in rows:
1541         logger.debug("Fetching peers from domain='%s',software='%s',nodeinfo_url='%s' ...", row["domain"], row["software"], row["nodeinfo_url"])
1542         if (args.force is None or not args.force) and args.domain is None and args.software is None and instances.is_recent(row["domain"], "last_blocked"):
1543             logger.debug("row[domain]='%s' has been recently checked, args.force[]='%s' - SKIPPED!", row["domain"], type(args.force))
1544             continue
1545
1546         logger.debug("Invoking federation.fetch_blocks(%s) ...", row["domain"])
1547         blocking = federation.fetch_blocks(row["domain"])
1548
1549         logger.debug("blocking()=%d", len(blocking))
1550         if len(blocking) == 0:
1551             if row["software"] == "pleroma":
1552                 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1553                 blocking = pleroma.fetch_blocks(row["domain"])
1554             elif row["software"] == "mastodon":
1555                 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1556                 blocking = mastodon.fetch_blocks(row["domain"])
1557             elif row["software"] == "lemmy":
1558                 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1559                 blocking = lemmy.fetch_blocks(row["domain"])
1560             elif row["software"] == "friendica":
1561                 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1562                 blocking = friendica.fetch_blocks(row["domain"])
1563             elif row["software"] == "misskey":
1564                 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1565                 blocking = misskey.fetch_blocks(row["domain"])
1566             else:
1567                 logger.warning("Unknown software: domain='%s',software='%s'", row["domain"], row["software"])
1568
1569         # c.s isn't part of oliphant's "hidden" blocklists
1570         logger.debug("row[domain]='%s'", row["domain"])
1571         if row["domain"] != "chaos.social" and not software_helper.is_relay(software) and not blocklists.has(row["domain"]):
1572             logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", row["domain"], len(blocking))
1573             instances.set_last_blocked(row["domain"])
1574             instances.set_total_blocks(row["domain"], blocking)
1575
1576         obfuscated = 0
1577         blockdict = list()
1578
1579         logger.info("Checking %d block(s) from domain='%s' ...", len(blocking), row["domain"])
1580         for block in blocking:
1581             logger.debug("block[blocked]='%s'", block["blocked"])
1582             blocked = None
1583
1584             if block["blocked"] == "":
1585                 logger.debug("block[blocked] is empty - SKIPPED!")
1586                 continue
1587             elif block["blocked"].endswith(".arpa"):
1588                 logger.debug("blocked='%s' is a reversed IP address - SKIPPED!", block["blocked"])
1589                 continue
1590             elif block["blocked"].endswith(".tld"):
1591                 logger.debug("blocked='%s' is a fake domain name - SKIPPED!", block["blocked"])
1592                 continue
1593             elif block["blocked"].endswith(".onion"):
1594                 logger.debug("blocked='%s' is a TOR onion domain name - SKIPPED!", block["blocked"])
1595                 continue
1596             elif block["blocked"].find("*") >= 0 or block["blocked"].find("?") >= 0:
1597                 logger.debug("block='%s' is obfuscated.", block["blocked"])
1598                 obfuscated = obfuscated + 1
1599                 blocked = utils.deobfuscate(block["blocked"], row["domain"], block["hash"] if "hash" in block else None)
1600             elif not domain_helper.is_wanted(block["blocked"]):
1601                 logger.debug("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
1602                 continue
1603             elif blocks.is_instance_blocked(row["domain"], block["blocked"]):
1604                 logger.debug("blocked='%s' is already blocked - SKIPPED!", block["blocked"])
1605                 continue
1606
1607             logger.debug("blocked[%s]='%s',block[blocked]='%s'", type(blocked), blocked, block["blocked"])
1608             if blocked is not None and blocked != block["blocked"]:
1609                 logger.debug("blocked='%s' was deobfuscated to blocked='%s'", block["blocked"], blocked)
1610                 obfuscated = obfuscated - 1
1611
1612                 if blacklist.is_blacklisted(blocked):
1613                     logger.debug("blocked='%s' is blacklisted - SKIPPED!", blocked)
1614                     continue
1615                 elif blacklist.is_blacklisted(row["domain"]):
1616                     logger.debug("row[domain]='%s' is blacklisted - SKIPPED!", row["domain"])
1617                     continue
1618                 elif blocks.is_instance_blocked(row["domain"], blocked):
1619                     logger.debug("blocked='%s' is already blocked by domain='%s' - SKIPPED!", blocked, row["domain"])
1620                     continue
1621
1622                 block["block_level"] = blocks.alias_block_level(block["block_level"])
1623
1624                 logger.info("blocked='%s' has been deobfuscated to blocked='%s', adding ...", block["blocked"], blocked)
1625                 if processing.block(row["domain"], blocked, block["reason"], block["block_level"]) and block["block_level"] == "reject" and config.get("bot_enabled"):
1626                     logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], row["domain"])
1627                     blockdict.append({
1628                         "blocked": blocked,
1629                         "reason" : block["reason"],
1630                     })
1631
1632         logger.debug("Settings obfuscated=%d for row[domain]='%s' ...", obfuscated, row["domain"])
1633         instances.set_obfuscated_blocks(row["domain"], obfuscated)
1634
1635         logger.info("domain='%s' has %d obfuscated domain(s)", row["domain"], obfuscated)
1636         if obfuscated == 0 and len(blocking) > 0:
1637             logger.info("Block list from domain='%s' has been fully deobfuscated.", row["domain"])
1638             instances.set_has_obfuscation(row["domain"], False)
1639
1640         if instances.has_pending(row["domain"]):
1641             logger.debug("Flushing updates for blocker='%s' ...", row["domain"])
1642             instances.update(row["domain"])
1643
1644         logger.debug("Invoking commit() ...")
1645         database.connection.commit()
1646
1647         logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
1648         if config.get("bot_enabled") and len(blockdict) > 0:
1649             logger.info("Sending bot POST for blocker='%s,blockdict()=%d ...", row["domain"], len(blockdict))
1650             network.send_bot_post(row["domain"], blockdict)
1651
1652     logger.debug("Success! - EXIT!")
1653     return 0
1654
1655 def fetch_fedilist(args: argparse.Namespace) -> int:
1656     logger.debug("args[]='%s' - CALLED!", type(args))
1657
1658     logger.debug("Invoking locking.acquire() ...")
1659     locking.acquire()
1660
1661     source_domain = "demo.fedilist.com"
1662     if sources.is_recent(source_domain):
1663         logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1664         return 0
1665     else:
1666         logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1667         sources.update(source_domain)
1668
1669     url = f"http://{source_domain}/instance/csv?onion=not"
1670     if args.software is not None and args.software != "":
1671         logger.debug("args.software='%s'", args.software)
1672         url = f"http://{source_domain}/instance/csv?software={args.software}&onion=not"
1673
1674     logger.info("Fetching url='%s' ...", url)
1675     response = reqto.get(
1676         url,
1677         headers=network.web_headers,
1678         timeout=(config.get("connection_timeout"), config.get("read_timeout")),
1679         allow_redirects=False
1680     )
1681
1682     logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
1683     if not response.ok or response.status_code > 200 or len(response.content) == 0:
1684         logger.warning("Failed fetching url='%s': response.ok='%s',response.status_code=%d,response.content()=%d - EXIT!", url, response.ok, response.status_code, len(response.text))
1685         return 1
1686
1687     reader = csv.DictReader(response.content.decode("utf-8").splitlines(), dialect="unix")
1688
1689     logger.debug("reader[]='%s'", type(reader))
1690     if reader is None:
1691         logger.warning("Failed parsing response.content()=%d as CSV content", len(response.content))
1692         return 2
1693
1694     rows = list(reader)
1695
1696     logger.info("Checking %d rows ...", len(rows))
1697     for row in rows:
1698         logger.debug("row[]='%s'", type(row))
1699         if "hostname" not in row:
1700             logger.warning("row()=%d has no element 'hostname' - SKIPPED!", len(row))
1701             continue
1702
1703         logger.debug("row[hostname]='%s' - BEFORE!", row["hostname"])
1704         domain = tidyup.domain(row["hostname"]) if row["hostname"] != None and row["hostname"] != "" else None
1705         logger.debug("domain='%s' - AFTER!", domain)
1706
1707         if domain is None or domain == "":
1708             logger.debug("domain='%s' is empty after tidyup.domain(): row[hostname]='%s' - SKIPPED!", domain, row["hostname"])
1709             continue
1710
1711         logger.debug("domain='%s' - BEFORE!", domain)
1712         domain = domain.encode("idna").decode("utf-8")
1713         logger.debug("domain='%s' - AFTER!", domain)
1714
1715         if not domain_helper.is_wanted(domain):
1716             logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1717             continue
1718         elif (args.force is None or not args.force) and instances.is_registered(domain):
1719             logger.debug("domain='%s' is already registered, --force not specified: args.force[]='%s'", domain, type(args.force))
1720             continue
1721         elif instances.is_recent(domain):
1722             logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1723             continue
1724
1725         logger.info("Fetching instances from domain='%s' ...", domain)
1726         federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
1727
1728     logger.debug("Success! - EXIT!")
1729     return 0
1730
1731 def update_nodeinfo(args: argparse.Namespace) -> int:
1732     logger.debug("args[]='%s' - CALLED!", type(args))
1733
1734     logger.debug("Invoking locking.acquire() ...")
1735     locking.acquire()
1736
1737     if args.domain is not None and args.domain != "":
1738         logger.debug("Fetching args.domain='%s'", args.domain)
1739         database.cursor.execute("SELECT domain, software FROM instances WHERE domain = ?", [args.domain])
1740     elif args.software is not None and args.software != "":
1741         logger.info("Fetching domains for args.software='%s'", args.software)
1742         database.cursor.execute("SELECT domain, software FROM instances WHERE software = ? AND (last_nodeinfo < ? OR last_nodeinfo IS NULL)", [args.software.lower(), time.time() - config.get("recheck_nodeinfo")])
1743     elif args.mode is not None and args.mode != "":
1744         logger.info("Fetching domains for args.mode='%s'", args.mode.upper())
1745         database.cursor.execute("SELECT domain, software FROM instances WHERE detection_mode = ? AND (last_nodeinfo < ? OR last_nodeinfo IS NULL)", [args.mode.upper(), time.time() - config.get("recheck_nodeinfo")])
1746     elif args.no_software:
1747         logger.info("Fetching domains with no software type detected ...")
1748         database.cursor.execute("SELECT domain, software FROM instances WHERE software IS NULL AND (last_nodeinfo < ? OR last_nodeinfo IS NULL)", [time.time() - config.get("recheck_nodeinfo")])
1749     else:
1750         logger.info("Fetching domains for recently updated ...")
1751         database.cursor.execute("SELECT domain, software FROM instances WHERE last_nodeinfo < ? OR last_nodeinfo IS NULL", [time.time() - config.get("recheck_nodeinfo")])
1752
1753     domains = database.cursor.fetchall()
1754
1755     logger.info("Checking %d domain(s) ...", len(domains))
1756     cnt = 0
1757     for row in domains:
1758         logger.debug("row[]='%s'", type(row))
1759         if not args.force and instances.is_recent(row["domain"], "last_nodeinfo"):
1760             logger.debug("row[domain]='%s' has been recently checked - SKIPPED!", row["domain"])
1761             continue
1762
1763         try:
1764             logger.info("Checking nodeinfo for row[domain]='%s',row[software]='%s' (%s%%) ...", row["domain"], row["software"], "{:5.1f}".format(cnt / len(domains) * 100))
1765             software = federation.determine_software(row["domain"])
1766
1767             logger.debug("Determined software='%s'", software)
1768             if (software != row["software"] and software is not None) or args.force is True:
1769                 logger.debug("software='%s'", software)
1770                 if software is None:
1771                     logger.debug("Setting nodeinfo_url to 'None' for row[domain]='%s' ...", row["domain"])
1772                     instances.set_nodeinfo_url(row["domain"], None)
1773
1774                 logger.warning("Software type for row[domain]='%s' has changed from '%s' to '%s'!", row["domain"], row["software"], software)
1775                 instances.set_software(row["domain"], software)
1776
1777             if software is not None:
1778                 logger.debug("Setting row[domain]='%s' as successfully determined ...", row["domain"])
1779                 instances.set_success(row["domain"])
1780         except network.exceptions as exception:
1781             logger.warning("Exception '%s' during updating nodeinfo for row[domain]='%s'", type(exception), row["domain"])
1782             instances.set_last_error(row["domain"], exception)
1783
1784         instances.set_last_nodeinfo(row["domain"])
1785         instances.update(row["domain"])
1786         cnt = cnt + 1
1787
1788     logger.debug("Success! - EXIT!")
1789     return 0
1790
1791 def fetch_instances_social(args: argparse.Namespace) -> int:
1792     logger.debug("args[]='%s' - CALLED!", type(args))
1793
1794     logger.debug("Invoking locking.acquire() ...")
1795     locking.acquire()
1796
1797     source_domain = "instances.social"
1798
1799     if config.get("instances_social_api_key") == "":
1800         logger.error("API key not set. Please set in your config.json file.")
1801         return 1
1802     elif sources.is_recent(source_domain):
1803         logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1804         return 0
1805     else:
1806         logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1807         sources.update(source_domain)
1808
1809     headers = {
1810         "Authorization": f"Bearer {config.get('instances_social_api_key')}",
1811     }
1812
1813     logger.info("Fetching list from source_domain='%s' ...", source_domain)
1814     fetched = network.get_json_api(
1815         source_domain,
1816         "/api/1.0/instances/list?count=0&sort_by=name",
1817         headers,
1818         (config.get("connection_timeout"), config.get("read_timeout"))
1819     )
1820     logger.debug("fetched[]='%s'", type(fetched))
1821
1822     if "error_message" in fetched:
1823         logger.warning("Error during fetching API result: '%s' - EXIT!", fetched["error_message"])
1824         return 2
1825     elif "exception" in fetched:
1826         logger.warning("Exception '%s' during fetching API result - EXIT!", type(fetched["exception"]))
1827         return 3
1828     elif "json" not in fetched:
1829         logger.warning("fetched has no element 'json' - EXIT!")
1830         return 4
1831     elif "instances" not in fetched["json"]:
1832         logger.warning("fetched[row] has no element 'instances' - EXIT!")
1833         return 5
1834
1835     domains = list()
1836     rows = fetched["json"]["instances"]
1837
1838     logger.info("Checking %d row(s) ...", len(rows))
1839     for row in rows:
1840         logger.debug("row[]='%s'", type(row))
1841         domain = tidyup.domain(row["name"]) if row["name"] != None and row["name"] != "" else None
1842         logger.debug("domain='%s' - AFTER!", domain)
1843
1844         if domain is None and domain == "":
1845             logger.debug("domain='%s' is empty after tidyup.domain() - SKIPPED!", domain)
1846             continue
1847
1848         logger.debug("domain='%s' - BEFORE!", domain)
1849         domain = domain.encode("idna").decode("utf-8")
1850         logger.debug("domain='%s' - AFTER!", domain)
1851
1852         if not domain_helper.is_wanted(domain):
1853             logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1854             continue
1855         elif domain in domains:
1856             logger.debug("domain='%s' is already added - SKIPPED!", domain)
1857             continue
1858         elif instances.is_registered(domain):
1859             logger.debug("domain='%s' is already registered - SKIPPED!", domain)
1860             continue
1861         elif instances.is_recent(domain):
1862             logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1863             continue
1864
1865         logger.info("Fetching instances from domain='%s'", domain)
1866         federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
1867
1868     logger.debug("Success! - EXIT!")
1869     return 0
1870
1871 def fetch_relays(args: argparse.Namespace) -> int:
1872     logger.debug("args[]='%s' - CALLED!", type(args))
1873
1874     logger.debug("Invoking locking.acquire() ...")
1875     locking.acquire()
1876
1877     if args.domain is not None and args.domain != "":
1878         database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE software IN ('activityrelay', 'aoderelay', 'selective-relay', 'pub-relay') AND domain = ? LIMIT 1", [args.domain])
1879     elif args.software is not None and args.software != "":
1880         database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE software IN ('activityrelay', 'aoderelay', 'selective-relay', 'pub-relay') AND software = ?", [args.software])
1881     else:
1882         database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE software IN ('activityrelay', 'aoderelay', 'selective-relay', 'pub-relay')")
1883
1884     domains = list()
1885     rows = database.cursor.fetchall()
1886
1887     logger.info("Checking %d relays ...", len(rows))
1888     for row in rows:
1889         logger.debug("row[domain]='%s',row[software]='%s' ...", row["domain"], row["software"])
1890         peers = list()
1891         if not args.force and instances.is_recent(row["domain"]):
1892             logger.debug("row[domain]='%s' has been recently fetched - SKIPPED!", row["domain"])
1893             continue
1894
1895         try:
1896             if row["software"] == "pub-relay":
1897                 logger.info("Fetching row[nodeinfo_url]='%s' from relay row[domain]='%s',row[software]='%s' ...", row["nodeinfo_url"], row["domain"], row["software"])
1898                 raw = network.fetch_api_url(
1899                     row["nodeinfo_url"],
1900                     (config.get("connection_timeout"), config.get("read_timeout"))
1901                 )
1902
1903                 logger.debug("raw[%s]()=%d", type(raw), len(raw))
1904                 if "exception" in raw:
1905                     logger.warning("row[domain]='%s' has caused an exception: '%s' - raising again ...", row["domain"], type(raw["exception"]))
1906                     raise raw["exception"]
1907                 elif "error_message" in raw:
1908                     logger.warning("row[domain]='%s' has caused error message: '%s' - SKIPPED!", row["domain"], raw["error_message"])
1909                     instances.set_last_error(row["domain"], raw)
1910                     instances.set_last_instance_fetch(row["domain"])
1911                     instances.update(row["domain"])
1912                     continue
1913                 elif not "json" in raw:
1914                     logger.warning("raw()=%d does not contain key 'json' in response - SKIPPED!", len(raw))
1915                     continue
1916                 elif not "metadata" in raw["json"]:
1917                     logger.warning("raw[json]()=%d does not contain key 'json' in response - SKIPPED!", len(raw["json"]))
1918                     continue
1919                 elif not "peers" in raw["json"]["metadata"]:
1920                     logger.warning("raw[json][metadata()=%d does not contain key 'json' in response - SKIPPED!", len(raw["json"]["metadata"]))
1921                     continue
1922             else:
1923                 logger.info("Fetching / from relay row[domain]='%s',row[software]='%s' ...", row["domain"], row["software"])
1924                 raw = utils.fetch_url(
1925                     f"https://{row['domain']}",
1926                     network.web_headers,
1927                     (config.get("connection_timeout"), config.get("read_timeout"))
1928                 ).text
1929                 logger.debug("raw[%s]()=%d", type(raw), len(raw))
1930
1931                 doc = bs4.BeautifulSoup(raw, features="html.parser")
1932                 logger.debug("doc[]='%s'", type(doc))
1933
1934         except network.exceptions as exception:
1935             logger.warning("Exception '%s' during fetching from relay '%s': '%s'", type(exception), row["domain"], str(exception))
1936             instances.set_last_error(row["domain"], exception)
1937             instances.set_last_instance_fetch(row["domain"])
1938             instances.update(row["domain"])
1939             continue
1940
1941         logger.debug("row[software]='%s'", row["software"])
1942         if row["software"] == "activityrelay":
1943             logger.debug("Checking row[domain]='%s' ...", row["domain"])
1944             tags = doc.findAll("p")
1945
1946             logger.debug("Checking %d paragraphs ...", len(tags))
1947             for tag in tags:
1948                 logger.debug("tag[]='%s'", type(tag))
1949                 if len(tag.contents) == 0:
1950                     logger.debug("tag='%s' is an empty tag - SKIPPED!", tag)
1951                     continue
1952                 elif "registered instances" not in tag.contents[0]:
1953                     logger.debug("Skipping paragraph, text not found.")
1954                     continue
1955
1956                 logger.debug("Found tag.contents[0][]='%s'", tag.contents[0])
1957                 for domain in tag.contents:
1958                     logger.debug("domain[%s]='%s'", type(domain), domain)
1959                     if not isinstance(domain, bs4.element.NavigableString) or "registered instances" in domain:
1960                         continue
1961
1962                     domain = str(domain)
1963                     logger.debug("domain='%s'", domain)
1964                     if not domain_helper.is_wanted(domain):
1965                         logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1966                         continue
1967
1968                     logger.debug("domain='%s' - BEFORE!", domain)
1969                     domain = tidyup.domain(domain) if domain != None and domain != "" else None
1970                     logger.debug("domain='%s' - AFTER!", domain)
1971
1972                     if domain is None or domain == "":
1973                         logger.debug("domain='%s' is empty after tidyup.domain() from origin='%s' - SKIPPED!", domain, row["domain"])
1974                         continue
1975                     elif domain not in peers:
1976                         logger.debug("Appending domain='%s' to peers list for relay='%s' ...", domain, row["domain"])
1977                         peers.append(domain)
1978
1979                     if dict_helper.has_key(domains, "domain", domain):
1980                         logger.debug("domain='%s' already added", domain)
1981                         continue
1982
1983                     logger.debug("Appending domain='%s',origin='%s',software='%s' ...", domain, row["domain"], row["software"])
1984                     domains.append({
1985                         "domain": domain,
1986                         "origin": row["domain"],
1987                     })
1988         elif row["software"] in ["aoderelay", "selective-relay"]:
1989             logger.debug("Checking row[domain]='%s' ...", row["domain"])
1990             if row["software"] == "aoderelay":
1991                 tags = doc.findAll("section", {"class": "instance"})
1992             else:
1993                 tags = doc.find("div", {"id": "instances"}).findAll("li")
1994
1995             logger.debug("Checking %d tags ...", len(tags))
1996             for tag in tags:
1997                 logger.debug("tag[]='%s'", type(tag))
1998
1999                 link = tag.find("a")
2000                 logger.debug("link[%s]='%s'", type(link), link)
2001                 if link is None:
2002                     logger.warning("tag='%s' has no a-tag - SKIPPED!", tag)
2003                     continue
2004                 elif "href" not in link:
2005                     logger.warning("link()=%d has no key 'href' - SKIPPED!", len(link))
2006                     continue
2007
2008                 components = urlparse(link["href"])
2009                 domain = components.netloc.lower().split(":")[0]
2010
2011                 logger.debug("domain='%s' - BEFORE!", domain)
2012                 domain = tidyup.domain(domain) if domain != None and domain != "" else None
2013                 logger.debug("domain='%s' - AFTER!", domain)
2014
2015                 if domain is None or domain == "":
2016                     logger.debug("domain='%s' is empty after tidyup.domain() from origin='%s' - SKIPPED!", domain, row["domain"])
2017                     continue
2018                 elif domain not in peers:
2019                     logger.debug("Appending domain='%s' to peers list for relay='%s' ...", domain, row["domain"])
2020                     peers.append(domain)
2021
2022                 if dict_helper.has_key(domains, "domain", domain):
2023                     logger.debug("domain='%s' already added", domain)
2024                     continue
2025
2026                 logger.debug("Appending domain='%s',origin='%s',software='%s'", domain, row["domain"], row["software"])
2027                 domains.append({
2028                     "domain": domain,
2029                     "origin": row["domain"],
2030                 })
2031         elif row["software"] == "pub-relay":
2032             logger.debug("Checking %d peer(s) row[domain]='%s' ...", len(raw["json"]["metadata"]["peers"]), row["domain"])
2033             for domain in raw["json"]["metadata"]["peers"]:
2034                 logger.debug("domain='%s' - BEFORE!", domain)
2035                 domain = tidyup.domain(domain) if domain != None and domain != "" else None
2036                 logger.debug("domain='%s' - AFTER!", domain)
2037
2038                 if domain is None or domain == "":
2039                     logger.debug("domain='%s' is empty after tidyup.domain() from origin='%s' - SKIPPED!", domain, row["domain"])
2040                     continue
2041                 elif domain not in peers:
2042                     logger.debug("Appending domain='%s' to peers list for relay='%s' ...", domain, row["domain"])
2043                     peers.append(domain)
2044
2045                 if dict_helper.has_key(domains, "domain", domain):
2046                     logger.debug("domain='%s' already added", domain)
2047                     continue
2048
2049                 logger.debug("Appending domain='%s',origin='%s',software='%s'", domain, row["domain"], row["software"])
2050                 domains.append({
2051                     "domain": domain,
2052                     "origin": row["domain"],
2053                 })
2054         else:
2055             logger.warning("row[domain]='%s',row[software]='%s' is not supported", row["domain"], row["software"])
2056             continue
2057
2058         logger.debug("Updating last_instance_fetch for row[domain]='%s' ...", row["domain"])
2059         instances.set_last_instance_fetch(row["domain"])
2060
2061         logger.info("Relay '%s' has %d peer(s) registered.", row["domain"], len(peers))
2062         instances.set_total_peers(row["domain"], peers)
2063
2064         logger.debug("Flushing data for row[domain]='%s'", row["domain"])
2065         instances.update(row["domain"])
2066
2067     logger.info("Checking %d domains ...", len(domains))
2068     for row in domains:
2069         logger.debug("row[domain]='%s',row[origin]='%s'", row["domain"], row["origin"])
2070         if not domain_helper.is_wanted(row["domain"]):
2071             logger.debug("row[domain]='%s' is not wanted - SKIPPED!", row["domain"])
2072             continue
2073         elif instances.is_registered(row["domain"]):
2074             logger.debug("row[domain]='%s' is already registered - SKIPPED!", row["domain"])
2075             continue
2076
2077         logger.info("Fetching row[domain]='%s',row[origin]='%s' ...", row["domain"], row["origin"])
2078         federation.fetch_instances(row["domain"], row["origin"], None, inspect.currentframe().f_code.co_name)
2079
2080     logger.debug("Success! - EXIT!")
2081     return 0
2082
2083 def convert_idna(args: argparse.Namespace) -> int:
2084     logger.debug("args[]='%s' - CALLED!", type(args))
2085
2086     database.cursor.execute("SELECT domain FROM instances WHERE domain NOT LIKE '%xn--%' ORDER BY domain ASC")
2087     rows = database.cursor.fetchall()
2088
2089     logger.debug("rows[]='%s'", type(rows))
2090     instances.translate_idnas(rows, "domain")
2091
2092     database.cursor.execute("SELECT origin FROM instances WHERE origin NOT LIKE '%xn--%' ORDER BY origin ASC")
2093     rows = database.cursor.fetchall()
2094
2095     logger.debug("rows[]='%s'", type(rows))
2096     instances.translate_idnas(rows, "origin")
2097
2098     database.cursor.execute("SELECT blocker FROM blocks WHERE blocker NOT LIKE '%xn--%' ORDER BY blocker ASC")
2099     rows = database.cursor.fetchall()
2100
2101     logger.debug("rows[]='%s'", type(rows))
2102     blocks.translate_idnas(rows, "blocker")
2103
2104     database.cursor.execute("SELECT blocked FROM blocks WHERE blocked NOT LIKE '%xn--%' ORDER BY blocked ASC")
2105     rows = database.cursor.fetchall()
2106
2107     logger.debug("rows[]='%s'", type(rows))
2108     blocks.translate_idnas(rows, "blocked")
2109
2110     logger.debug("Success! - EXIT!")
2111     return 0
2112
2113 def remove_invalid(args: argparse.Namespace) -> int:
2114     logger.debug("args[]='%s' - CALLED!", type(args))
2115
2116     logger.debug("Invoking locking.acquire() ...")
2117     locking.acquire()
2118
2119     database.cursor.execute("SELECT domain FROM instances ORDER BY domain ASC")
2120     rows = database.cursor.fetchall()
2121
2122     logger.info("Checking %d domains ...", len(rows))
2123     for row in rows:
2124         logger.debug("row[domain]='%s'", row["domain"])
2125         if not validators.domain(row["domain"].split("/")[0]):
2126             logger.info("Invalid row[domain]='%s' found, removing ...", row["domain"])
2127             database.cursor.execute("DELETE FROM blocks WHERE blocker = ? OR blocked = ?", [row["domain"], row["domain"]])
2128             database.cursor.execute("DELETE FROM instances WHERE domain = ? LIMIT 1", [row["domain"]])
2129
2130     logger.debug("Invoking commit() ...")
2131     database.connection.commit()
2132
2133     logger.info("Vaccum cleaning database ...")
2134     database.cursor.execute("VACUUM")
2135
2136     logger.debug("Success! - EXIT!")
2137     return 0