]> git.mxchange.org Git - fba.git/blob - fba/commands.py
Continued:
[fba.git] / fba / commands.py
1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
3 #
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
8 #
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 # GNU Affero General Public License for more details.
13 #
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program.  If not, see <https://www.gnu.org/licenses/>.
16
17 import csv
18 import inspect
19 import json
20 import logging
21 import time
22
23 from urllib.parse import urlparse
24
25 import argparse
26 import atoma
27 import bs4
28 import markdown
29 import reqto
30 import validators
31
32 from fba import database
33 from fba import utils
34
35 from fba.helpers import blacklist
36 from fba.helpers import blocklists
37 from fba.helpers import config
38 from fba.helpers import cookies
39 from fba.helpers import dicts as dict_helper
40 from fba.helpers import domain as domain_helper
41 from fba.helpers import locking
42 from fba.helpers import processing
43 from fba.helpers import software as software_helper
44 from fba.helpers import tidyup
45
46 from fba.http import csrf
47 from fba.http import federation
48 from fba.http import network
49
50 from fba.models import blocks
51 from fba.models import instances
52 from fba.models import sources
53
54 from fba.networks import friendica
55 from fba.networks import lemmy
56 from fba.networks import mastodon
57 from fba.networks import misskey
58 from fba.networks import pleroma
59
60 logging.basicConfig(level=logging.INFO)
61 logger = logging.getLogger(__name__)
62 #logger.setLevel(logging.DEBUG)
63
64 def check_instance(args: argparse.Namespace) -> int:
65     logger.debug("args.domain='%s' - CALLED!", args.domain)
66
67     status = 0
68     if not validators.domain(args.domain):
69         logger.warning("args.domain='%s' is not valid", args.domain)
70         status = 100
71     elif blacklist.is_blacklisted(args.domain):
72         logger.warning("args.domain='%s' is blacklisted", args.domain)
73         status = 101
74     elif instances.is_registered(args.domain):
75         logger.warning("args.domain='%s' is already registered", args.domain)
76         status = 102
77     else:
78         logger.info("args.domain='%s' is not known", args.domain)
79
80     logger.debug("status=%d - EXIT!", status)
81     return status
82
83 def check_nodeinfo(args: argparse.Namespace) -> int:
84     logger.debug("args[]='%s' - CALLED!", type(args))
85
86     # Fetch rows
87     database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE nodeinfo_url IS NOT NULL ORDER BY domain ASC")
88
89     cnt = 0
90     for row in database.cursor.fetchall():
91         logger.debug("Checking row[domain]='%s',row[software]='%s',row[nodeinfo_url]='%s' ...", row["domain"], row["software"], row["nodeinfo_url"])
92         punycode = row["domain"].encode("idna").decode("utf-8")
93
94         if row["nodeinfo_url"].startswith("/"):
95             logger.debug("row[nodeinfo_url]='%s' is a relative URL and always matches", row["nodeinfo_url"])
96             continue
97         elif row["nodeinfo_url"].find(punycode) == -1 and row["nodeinfo_url"].find(row["domain"]) == -1:
98             logger.warning("punycode='%s' is not found in row[nodeinfo_url]='%s',row[software]='%s'", punycode, row["nodeinfo_url"], row["software"])
99             cnt = cnt + 1
100
101     logger.info("Found %d row(s)", cnt)
102
103     logger.debug("EXIT!")
104     return 0
105
106 def fetch_pixelfed_api(args: argparse.Namespace) -> int:
107     logger.debug("args[]='%s' - CALLED!", type(args))
108
109     # No CSRF by default, you don't have to add network.source_headers by yourself here
110     headers = tuple()
111     source_domain = "pixelfed.org"
112
113     if sources.is_recent(source_domain):
114         logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
115         return 1
116     else:
117         logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
118         sources.update(source_domain)
119
120     try:
121         logger.debug("Checking CSRF from source_domain='%s' ...", source_domain)
122         headers = csrf.determine(source_domain, dict())
123     except network.exceptions as exception:
124         logger.warning("Exception '%s' during checking CSRF (fetch_peers,%s) - EXIT!", type(exception), __name__)
125         return list()
126
127     try:
128         logger.info("Fetching JSON from pixelfed.org API, headers()=%d ...", len(headers))
129         fetched = network.get_json_api(
130             source_domain,
131             "/api/v1/servers/all.json?scope=All&country=all&language=all",
132             headers,
133             (config.get("connection_timeout"), config.get("read_timeout"))
134         )
135
136         logger.debug("JSON API returned %d elements", len(fetched))
137         if "error_message" in fetched:
138             logger.warning("API returned error_message='%s' - EXIT!", fetched["error_message"])
139             return 101
140         elif "data" not in fetched["json"]:
141             logger.warning("API did not return JSON with 'data' element - EXIT!")
142             return 102
143
144         rows = fetched["json"]["data"]
145         logger.info("Checking %d fetched rows ...", len(rows))
146         for row in rows:
147             logger.debug("row[]='%s'", type(row))
148             if "domain" not in row:
149                 logger.warning("row='%s' does not contain element 'domain' - SKIPPED!", row)
150                 continue
151             elif row["domain"] == "":
152                 logger.debug("row[domain] is empty - SKIPPED!")
153                 continue
154
155             logger.debug("row[domain]='%s' - BEFORE!", row["domain"])
156             domain = row["domain"].encode("idna").decode("utf-8")
157             logger.debug("domain='%s' - AFTER!", domain)
158
159             if not domain_helper.is_wanted(domain):
160                 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
161                 continue
162             elif instances.is_registered(domain):
163                 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
164                 continue
165             elif instances.is_recent(domain):
166                 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
167                 continue
168
169             logger.debug("Fetching instances from domain='%s' ...", domain)
170             federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
171
172     except network.exceptions as exception:
173         logger.warning("Cannot fetch graphql,exception[%s]:'%s' - EXIT!", type(exception), str(exception))
174         return 103
175
176     logger.debug("Success! - EXIT!")
177     return 0
178
179 def fetch_bkali(args: argparse.Namespace) -> int:
180     logger.debug("args[]='%s' - CALLED!", type(args))
181
182     logger.debug("Invoking locking.acquire() ...")
183     locking.acquire()
184
185     source_domain = "gql.api.bka.li"
186     if sources.is_recent(source_domain):
187         logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
188         return 1
189     else:
190         logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
191         sources.update(source_domain)
192
193     domains = list()
194     try:
195         logger.info("Fetching domainlist from source_domain='%s' ...", source_domain)
196         fetched = network.post_json_api(
197             source_domain,
198             "/v1/graphql",
199             json.dumps({
200                 "query": "query domainlist {nodeinfo(order_by: {domain: asc}) {domain}}"
201             })
202         )
203
204         logger.debug("fetched[]='%s'", type(fetched))
205         if "error_message" in fetched:
206             logger.warning("post_json_api() for 'gql.sources.bka.li' returned error message='%s", fetched["error_message"])
207             return 100
208         elif isinstance(fetched["json"], dict) and "error" in fetched["json"] and "message" in fetched["json"]["error"]:
209             logger.warning("post_json_api() returned error: '%s", fetched["error"]["message"])
210             return 101
211
212         rows = fetched["json"]
213
214         logger.debug("rows(%d)[]='%s'", len(rows), type(rows))
215         if len(rows) == 0:
216             raise Exception("WARNING: Returned no records")
217         elif "data" not in rows:
218             raise Exception(f"WARNING: rows()={len(rows)} does not contain key 'data'")
219         elif "nodeinfo" not in rows["data"]:
220             raise Exception(f"WARNING: rows()={len(rows['data'])} does not contain key 'nodeinfo'")
221
222         for entry in rows["data"]["nodeinfo"]:
223             logger.debug("entry[%s]='%s'", type(entry), entry)
224             if "domain" not in entry:
225                 logger.warning("entry()=%d does not contain 'domain' - SKIPPED!", len(entry))
226                 continue
227             elif entry["domain"] == "":
228                 logger.debug("entry[domain] is empty - SKIPPED!")
229                 continue
230             elif not domain_helper.is_wanted(entry["domain"]):
231                 logger.debug("entry[domain]='%s' is not wanted - SKIPPED!", entry["domain"])
232                 continue
233             elif instances.is_registered(entry["domain"]):
234                 logger.debug("entry[domain]='%s' is already registered - SKIPPED!", entry["domain"])
235                 continue
236             elif instances.is_recent(entry["domain"]):
237                 logger.debug("entry[domain]='%s' has been recently crawled - SKIPPED!", entry["domain"])
238                 continue
239
240             logger.debug("Adding domain='%s' ...", entry["domain"])
241             domains.append(entry["domain"])
242
243     except network.exceptions as exception:
244         logger.warning("Cannot fetch graphql,exception[%s]:'%s' - EXIT!", type(exception), str(exception))
245         return 102
246
247     logger.debug("domains()=%d", len(domains))
248     if len(domains) > 0:
249         logger.info("Adding %d new instances ...", len(domains))
250         for domain in domains:
251             logger.debug("domain='%s' - BEFORE!", domain)
252             domain = domain.encode("idna").decode("utf-8")
253             logger.debug("domain='%s' - AFTER!", domain)
254
255             try:
256                 logger.info("Fetching instances from domain='%s' ...", domain)
257                 federation.fetch_instances(domain, 'tak.teleyal.blog', None, inspect.currentframe().f_code.co_name)
258             except network.exceptions as exception:
259                 logger.warning("Exception '%s' during fetching instances (fetch_bkali) from domain='%s'", type(exception), domain)
260                 instances.set_last_error(domain, exception)
261                 return 100
262
263     logger.debug("Success - EXIT!")
264     return 0
265
266 def fetch_blocks(args: argparse.Namespace) -> int:
267     logger.debug("args[]='%s' - CALLED!", type(args))
268     if args.domain is not None and args.domain != "":
269         logger.debug("args.domain='%s' - checking ...", args.domain)
270         if not validators.domain(args.domain):
271             logger.warning("args.domain='%s' is not valid.", args.domain)
272             return 100
273         elif blacklist.is_blacklisted(args.domain):
274             logger.warning("args.domain='%s' is blacklisted, won't check it!", args.domain)
275             return 101
276         elif not instances.is_registered(args.domain):
277             logger.warning("args.domain='%s' is not registered, please run ./utils.py fetch_instances '%s' first.", args.domain, args.domain)
278             return 102
279
280     logger.debug("Invoking locking.acquire() ...")
281     locking.acquire()
282
283     if args.domain is not None and args.domain != "":
284         # Re-check single domain
285         logger.debug("Querying database for args.domain='%s' ...", args.domain)
286         database.cursor.execute(
287             "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE domain = ? LIMIT 1", [args.domain]
288         )
289     elif args.software is not None and args.software != "":
290         # Re-check single software
291         logger.debug("Querying database for args.software='%s' ...", args.software)
292         database.cursor.execute(
293             "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software = ? AND nodeinfo_url IS NOT NULL ORDER BY total_blocks DESC, last_response_time ASC, last_updated ASC", [args.software]
294         )
295     elif args.force:
296         # Re-check all
297         logger.debug("Re-checking all instances ...")
298         database.cursor.execute(
299             "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'lemmy', 'friendica', 'misskey') AND nodeinfo_url IS NOT NULL ORDER BY total_blocks DESC, last_response_time ASC, last_updated ASC"
300         )
301     else:
302         # Re-check after "timeout" (aka. minimum interval)
303         database.cursor.execute(
304             "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'lemmy', 'friendica', 'misskey') AND (last_blocked IS NULL OR last_blocked < ?) AND nodeinfo_url IS NOT NULL ORDER BY total_blocks DESC, last_response_time ASC, last_updated ASC", [time.time() - config.get("recheck_block")]
305         )
306
307     rows = database.cursor.fetchall()
308     logger.info("Checking %d entries ...", len(rows))
309     for blocker, software, origin, nodeinfo_url in rows:
310         logger.debug("blocker='%s',software='%s',origin='%s',nodeinfo_url='%s'", blocker, software, origin, nodeinfo_url)
311
312         if not domain_helper.is_wanted(blocker):
313             logger.warning("blocker='%s' is not wanted - SKIPPED!", blocker)
314             continue
315
316         logger.debug("Setting last_blocked,has_obfuscation=false for blocker='%s' ...", blocker)
317         instances.set_last_blocked(blocker)
318         instances.set_has_obfuscation(blocker, False)
319
320         # c.s isn't part of oliphant's "hidden" blocklists
321         if blocker == "chaos.social" or software_helper.is_relay(software) or blocklists.has(blocker):
322             logger.debug("Skipping blocker='%s', run ./fba.py fetch_cs, fetch_oliphant, fetch_csv instead!", blocker)
323             continue
324
325         logger.debug("Invoking federation.fetch_blocks(%s) ...", blocker)
326         blocking = federation.fetch_blocks(blocker)
327
328         logger.debug("blocker='%s',software='%s',blocking()=%d", blocker, software, len(blocking))
329         if len(blocking) == 0:
330             logger.debug("blocker='%s',software='%s' - fetching blocklist ...", blocker, software)
331             if software == "pleroma":
332                 blocking = pleroma.fetch_blocks(blocker)
333                 logger.info("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software)
334             elif software == "mastodon":
335                 blocking = mastodon.fetch_blocks(blocker)
336                 logger.info("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software)
337             elif software == "lemmy":
338                 blocking = lemmy.fetch_blocks(blocker)
339                 logger.info("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software)
340             elif software == "friendica":
341                 blocking = friendica.fetch_blocks(blocker)
342                 logger.info("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software)
343             elif software == "misskey":
344                 blocking = misskey.fetch_blocks(blocker)
345                 logger.info("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software)
346             else:
347                 logger.warning("Unknown software: blocker='%s',software='%s'", blocker, software)
348
349         logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
350         instances.set_total_blocks(blocker, blocking)
351
352         blockdict = list()
353
354         logger.info("Checking %d entries from blocker='%s',software='%s' ...", len(blocking), blocker, software)
355         for block in blocking:
356             logger.debug("blocked='%s',block_level='%s',reason='%s'", block["blocked"], block["block_level"], block["reason"])
357
358             if block["block_level"] == "":
359                 logger.warning("block_level is empty, blocker='%s',blocked='%s'", block["blocker"], block["blocked"])
360                 continue
361
362             logger.debug("blocked='%s',reason='%s' - BEFORE!", block["blocked"], block["reason"])
363             block["blocked"] = tidyup.domain(block["blocked"])
364             block["reason"]  = tidyup.reason(block["reason"]) if block["reason"] is not None and block["reason"] != "" else None
365             logger.debug("blocked='%s',reason='%s' - AFTER!", block["blocked"], block["reason"])
366
367             if block["blocked"] == "":
368                 logger.warning("blocked is empty, blocker='%s'", blocker)
369                 continue
370             elif block["blocked"].endswith(".onion"):
371                 logger.debug("blocked='%s' is a TOR .onion domain - SKIPPED", block["blocked"])
372                 continue
373             elif block["blocked"].endswith(".arpa"):
374                 logger.debug("blocked='%s' is a reverse IP address - SKIPPED", block["blocked"])
375                 continue
376             elif block["blocked"].endswith(".tld"):
377                 logger.debug("blocked='%s' is a fake domain - SKIPPED", block["blocked"])
378                 continue
379             elif block["blocked"].find("*") >= 0:
380                 logger.debug("blocker='%s' uses obfuscated domains", blocker)
381
382                 # Some friendica servers also obscure domains without hash
383                 row = instances.deobfuscate("*", block["blocked"], block["hash"] if "hash" in block else None)
384
385                 logger.debug("row[]='%s'", type(row))
386                 if row is None:
387                     logger.warning("Cannot deobfuscate blocked='%s',blocker='%s',software='%s' - SKIPPED!", block["blocked"], blocker, software)
388                     instances.set_has_obfuscation(blocker, True)
389                     continue
390
391                 block["blocked"] = row["domain"]
392                 origin           = row["origin"]
393                 nodeinfo_url     = row["nodeinfo_url"]
394             elif block["blocked"].find("?") >= 0:
395                 logger.debug("blocker='%s' uses obfuscated domains", blocker)
396
397                 # Some obscure them with question marks, not sure if that's dependent on version or not
398                 row = instances.deobfuscate("?", block["blocked"], block["hash"] if "hash" in block else None)
399
400                 logger.debug("row[]='%s'", type(row))
401                 if row is None:
402                     logger.warning("Cannot deobfuscate blocked='%s',blocker='%s',software='%s' - SKIPPED!", block["blocked"], blocker, software)
403                     instances.set_has_obfuscation(blocker, True)
404                     continue
405
406                 block["blocked"] = row["domain"]
407                 origin           = row["origin"]
408                 nodeinfo_url     = row["nodeinfo_url"]
409
410             logger.debug("Looking up instance by domainm, blocked='%s'", block["blocked"])
411             if block["blocked"] == "":
412                 logger.debug("block[blocked] is empty - SKIPPED!")
413                 continue
414
415             logger.debug("block[blocked]='%s' - BEFORE!", block["blocked"])
416             block["blocked"] = block["blocked"].lstrip(".").encode("idna").decode("utf-8")
417             logger.debug("block[blocked]='%s' - AFTER!", block["blocked"])
418
419             if not domain_helper.is_wanted(block["blocked"]):
420                 logger.debug("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
421                 continue
422             elif block["block_level"] in ["accept", "accepted"]:
423                 logger.debug("blocked='%s' is accepted, not wanted here - SKIPPED!", block["blocked"])
424                 continue
425             elif not instances.is_registered(block["blocked"]):
426                 logger.debug("Hash wasn't found, adding: blocked='%s',blocker='%s'", block["blocked"], blocker)
427                 federation.fetch_instances(block["blocked"], blocker, None, inspect.currentframe().f_code.co_name)
428
429             block["block_level"] = blocks.alias_block_level(block["block_level"])
430
431             if processing.block(blocker, block["blocked"], block["reason"], block["block_level"]) and block["block_level"] == "reject" and config.get("bot_enabled"):
432                 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], blocker)
433                 blockdict.append({
434                     "blocked": block["blocked"],
435                     "reason" : block["reason"],
436                 })
437
438             logger.debug("Invoking cookies.clear(%s) ...", block["blocked"])
439             cookies.clear(block["blocked"])
440
441         logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
442         if instances.has_pending(blocker):
443             logger.debug("Flushing updates for blocker='%s' ...", blocker)
444             instances.update(blocker)
445
446         logger.debug("Invoking commit() ...")
447         database.connection.commit()
448
449         logger.debug("Invoking cookies.clear(%s) ...", blocker)
450         cookies.clear(blocker)
451
452         logger.debug("config.get(bot_enabled)='%s',blockdict()=%d'", config.get("bot_enabled"), len(blockdict))
453         if config.get("bot_enabled") and len(blockdict) > 0:
454             logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
455             network.send_bot_post(blocker, blockdict)
456
457     logger.debug("Success! - EXIT!")
458     return 0
459
460 def fetch_observer(args: argparse.Namespace) -> int:
461     logger.debug("args[]='%s' - CALLED!", type(args))
462
463     logger.debug("Invoking locking.acquire() ...")
464     locking.acquire()
465
466     source_domain = "fediverse.observer"
467     if sources.is_recent(source_domain):
468         logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
469         return 1
470     else:
471         logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
472         sources.update(source_domain)
473
474     types = list()
475     if args.software is None:
476         logger.info("Fetching software list ...")
477         raw = utils.fetch_url(
478             f"https://{source_domain}",
479             network.web_headers,
480             (config.get("connection_timeout"), config.get("read_timeout"))
481         ).text
482         logger.debug("raw[%s]()=%d", type(raw), len(raw))
483
484         doc = bs4.BeautifulSoup(raw, features="html.parser")
485         logger.debug("doc[]='%s'", type(doc))
486
487         navbar = doc.find("div", {"aria-labelledby": "navbarDropdownMenuSoftwares"})
488         logger.debug("navbar[]='%s'", type(navbar))
489         if navbar is None:
490             logger.warning("Cannot find navigation bar, cannot continue!")
491             return 1
492
493         items = navbar.findAll("a", {"class": "dropdown-item"})
494         logger.debug("items[]='%s'", type(items))
495
496         logger.info("Checking %d menu items ...", len(items))
497         for item in items:
498             logger.debug("item[%s]='%s'", type(item), item)
499             if item.text.lower() == "all":
500                 logger.debug("Skipping 'All' menu entry ...")
501                 continue
502
503             logger.debug("Appending item.text='%s' ...", item.text)
504             types.append(tidyup.domain(item.text))
505     else:
506         logger.info("Adding args.software='%s' as type ...", args.software)
507         types.append(args.software)
508
509     logger.info("Fetching %d different table data ...", len(types))
510     for software in types:
511         logger.debug("software='%s' - BEFORE!", software)
512         if args.software is not None and args.software != software:
513             logger.debug("args.software='%s' does not match software='%s' - SKIPPED!", args.software, software)
514             continue
515
516         doc = None
517         try:
518             logger.debug("Fetching table data for software='%s' ...", software)
519             raw = utils.fetch_url(
520                 f"https://{source_domain}/app/views/tabledata.php?software={software}",
521                 network.web_headers,
522                 (config.get("connection_timeout"), config.get("read_timeout"))
523             ).text
524             logger.debug("raw[%s]()=%d", type(raw), len(raw))
525
526             doc = bs4.BeautifulSoup(raw, features="html.parser")
527             logger.debug("doc[]='%s'", type(doc))
528         except network.exceptions as exception:
529             logger.warning("Cannot fetch software='%s' from source_domain='%s': '%s'", software, source_domain, type(exception))
530             continue
531
532         items = doc.findAll("a", {"class": "url"})
533         logger.info("Checking %d items,software='%s' ...", len(items), software)
534         for item in items:
535             logger.debug("item[]='%s'", type(item))
536             domain = item.decode_contents()
537             domain = tidyup.domain(domain) if domain != None and domain != "" else None
538             logger.debug("domain='%s' - AFTER!", domain)
539
540             if domain is None or domain == "":
541                 logger.debug("domain='%s' is empty after tidyup.domain() - SKIPPED!", domain)
542                 continue
543
544             logger.debug("domain='%s' - BEFORE!", domain)
545             domain = domain.encode("idna").decode("utf-8")
546             logger.debug("domain='%s' - AFTER!", domain)
547
548             if not domain_helper.is_wanted(domain):
549                 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
550                 continue
551             elif instances.is_registered(domain):
552                 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
553                 continue
554
555             software = software_helper.alias(software)
556             logger.info("Fetching instances for domain='%s'", domain)
557             federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
558
559     logger.debug("Success! - EXIT!")
560     return 0
561
562 def fetch_todon_wiki(args: argparse.Namespace) -> int:
563     logger.debug("args[]='%s' - CALLED!", type(args))
564
565     logger.debug("Invoking locking.acquire() ...")
566     locking.acquire()
567
568     source_domain = "wiki.todon.eu"
569     if sources.is_recent(source_domain):
570         logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
571         return 1
572     else:
573         logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
574         sources.update(source_domain)
575
576     blocklist = {
577         "silenced": list(),
578         "reject": list(),
579     }
580
581     logger.debug("Fetching domainblocks from source_domain='%s'", source_domain)
582     raw = utils.fetch_url(
583         f"https://{source_domain}/todon/domainblocks",
584         network.web_headers,
585         (config.get("connection_timeout"), config.get("read_timeout"))
586     ).text
587     logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
588
589     doc = bs4.BeautifulSoup(raw, "html.parser")
590     logger.debug("doc[]='%s'", type(doc))
591
592     silenced = doc.find("h3", {"id": "silencedlimited_servers"}).find_next("ul").findAll("li")
593     logger.info("Checking %d silenced/limited entries ...", len(silenced))
594     blocklist["silenced"] = utils.find_domains(silenced, "div")
595
596     suspended = doc.find("h3", {"id": "suspended_servers"}).find_next("ul").findAll("li")
597     logger.info("Checking %d suspended entries ...", len(suspended))
598     blocklist["reject"] = utils.find_domains(suspended, "div")
599
600     blocking = blocklist["silenced"] + blocklist["reject"]
601     blocker = "todon.eu"
602
603     logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
604     instances.set_last_blocked(blocker)
605     instances.set_total_blocks(blocker, blocking)
606
607     blockdict = list()
608     for block_level in blocklist:
609         blockers = blocklist[block_level]
610
611         logger.debug("block_level='%s',blockers()=%d'", block_level, len(blockers))
612         for blocked in blockers:
613             logger.debug("blocked='%s'", blocked)
614
615             if not instances.is_registered(blocked):
616                 try:
617                     logger.info("Fetching instances from domain='%s' ...", blocked)
618                     federation.fetch_instances(blocked, blocker, None, inspect.currentframe().f_code.co_name)
619                 except network.exceptions as exception:
620                     logger.warning("Exception '%s' during fetching instances (fetch_cs) from blocked='%s'", type(exception), blocked)
621                     instances.set_last_error(blocked, exception)
622
623             if not domain_helper.is_wanted(blocked):
624                 logger.warning("blocked='%s' is not wanted - SKIPPED!", blocked)
625                 continue
626             elif not domain_helper.is_wanted(blocker):
627                 logger.warning("blocker='%s' is not wanted - SKIPPED!", blocker)
628                 continue
629             elif blocks.is_instance_blocked(blocker, blocked, block_level):
630                 logger.debug("blocked='%s',block_level='%s' is already blocked - SKIPPED!", blocked, block_level)
631                 continue
632
633             logger.info("Adding new block: blocked='%s',block_level='%s'", blocked, block_level)
634             if processing.block(blocker, blocked, None, block_level) and block_level == "reject" and config.get("bot_enabled"):
635                 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", blocked, block_level, blocker)
636                 blockdict.append({
637                     "blocked": blocked,
638                     "reason" : None,
639                 })
640
641         logger.debug("Invoking commit() ...")
642         database.connection.commit()
643
644         logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
645         if config.get("bot_enabled") and len(blockdict) > 0:
646             logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
647             network.send_bot_post(blocker, blockdict)
648
649     logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
650     if instances.has_pending(blocker):
651         logger.debug("Flushing updates for blocker='%s' ...", blocker)
652         instances.update(blocker)
653
654     logger.debug("Success! - EXIT!")
655     return 0
656
657 def fetch_cs(args: argparse.Namespace):
658     logger.debug("args[]='%s' - CALLED!", type(args))
659
660     logger.debug("Invoking locking.acquire() ...")
661     locking.acquire()
662
663     extensions = [
664         "extra",
665         "abbr",
666         "attr_list",
667         "def_list",
668         "fenced_code",
669         "footnotes",
670         "md_in_html",
671         "admonition",
672         "codehilite",
673         "legacy_attrs",
674         "legacy_em",
675         "meta",
676         "nl2br",
677         "sane_lists",
678         "smarty",
679         "toc",
680         "wikilinks"
681     ]
682
683     blocklist = {
684         "silenced": list(),
685         "reject"  : list(),
686     }
687
688     source_domain = "raw.githubusercontent.com"
689     if sources.is_recent(source_domain):
690         logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
691         return 1
692     else:
693         logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
694         sources.update(source_domain)
695
696     logger.info("Fetching federation.md from source_domain='%s' ...", source_domain)
697     raw = utils.fetch_url(
698         f"https://{source_domain}/chaossocial/meta/master/federation.md",
699         network.web_headers,
700         (config.get("connection_timeout"), config.get("read_timeout"))
701     ).text
702     logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
703
704     doc = bs4.BeautifulSoup(markdown.markdown(raw, extensions=extensions), features="html.parser")
705     logger.debug("doc()=%d[]='%s'", len(doc), type(doc))
706
707     silenced = doc.find("h2", {"id": "silenced-instances"}).findNext("table").find("tbody")
708     logger.debug("silenced[%s]()=%d", type(silenced), len(silenced))
709     blocklist["silenced"] = federation.find_domains(silenced)
710
711     blocked = doc.find("h2", {"id": "blocked-instances"}).findNext("table").find("tbody")
712     logger.debug("blocked[%s]()=%d", type(blocked), len(blocked))
713     blocklist["reject"] = federation.find_domains(blocked)
714
715     blocking = blocklist["silenced"] + blocklist["reject"]
716     blocker = "chaos.social"
717
718     logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
719     instances.set_last_blocked(blocker)
720     instances.set_total_blocks(blocker, blocking)
721
722     logger.debug("blocklist[silenced]()=%d,blocklist[reject]()=%d", len(blocklist["silenced"]), len(blocklist["reject"]))
723     if len(blocking) > 0:
724         blockdict = list()
725         for block_level in blocklist:
726             logger.info("block_level='%s' has %d row(s)", block_level, len(blocklist[block_level]))
727
728             for row in blocklist[block_level]:
729                 logger.debug("row[%s]='%s'", type(row), row)
730                 if not "domain" in row:
731                     logger.warning("row[]='%s' has no element 'domain' - SKIPPED!", type(row))
732                     continue
733                 elif not instances.is_registered(row["domain"]):
734                     try:
735                         logger.info("Fetching instances from domain='%s' ...", row["domain"])
736                         federation.fetch_instances(row["domain"], blocker, None, inspect.currentframe().f_code.co_name)
737                     except network.exceptions as exception:
738                         logger.warning("Exception '%s' during fetching instances (fetch_cs) from row[domain]='%s'", type(exception), row["domain"])
739                         instances.set_last_error(row["domain"], exception)
740
741                 if processing.block(blocker, row["domain"], row["reason"], block_level) and block_level == "reject" and config.get("bot_enabled"):
742                     logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", row["domain"], block_level, blocker)
743                     blockdict.append({
744                         "blocked": row["domain"],
745                         "reason" : row["reason"],
746                     })
747
748         logger.debug("Invoking commit() ...")
749         database.connection.commit()
750
751         logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
752         if config.get("bot_enabled") and len(blockdict) > 0:
753             logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
754             network.send_bot_post(blocker, blockdict)
755
756     logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
757     if instances.has_pending(blocker):
758         logger.debug("Flushing updates for blocker='%s' ...", blocker)
759         instances.update(blocker)
760
761     logger.debug("Success! - EXIT!")
762     return 0
763
764 def fetch_fba_rss(args: argparse.Namespace) -> int:
765     logger.debug("args[]='%s' - CALLED!", type(args))
766
767     domains = list()
768
769     logger.debug("Invoking locking.acquire() ...")
770     locking.acquire()
771
772     components = urlparse(args.feed)
773     domain = components.netloc.lower().split(":")[0]
774
775     logger.debug("domain='%s'", domain)
776     if sources.is_recent(domain):
777         logger.info("API from domain='%s' has recently being accessed - EXIT!", domain)
778         return 0
779     else:
780         logger.debug("domain='%s' has not been recently used, marking ...", domain)
781         sources.update(domain)
782
783     logger.info("Fetch FBA-specific RSS args.feed='%s' ...", args.feed)
784     response = utils.fetch_url(args.feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
785
786     logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
787     if response.ok and response.status_code == 200 and len(response.text) > 0:
788         logger.debug("Parsing RSS feed (%d Bytes) ...", len(response.text))
789         rss = atoma.parse_rss_bytes(response.content)
790
791         logger.debug("rss[]='%s'", type(rss))
792         for item in rss.items:
793             logger.debug("item[%s]='%s'", type(item), item)
794             domain = item.link.split("=")[1]
795             domain = tidyup.domain(domain) if domain != None and domain != "" else None
796
797             logger.debug("domain='%s' - AFTER!", domain)
798             if domain is None or domain == "":
799                 logger.debug("domain='%s' is empty after tidyup.domain() - SKIPPED!", domain)
800                 continue
801
802             logger.debug("domain='%s' - BEFORE!", domain)
803             domain = domain.encode("idna").decode("utf-8")
804             logger.debug("domain='%s' - AFTER!", domain)
805
806             if not domain_helper.is_wanted(domain):
807                 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
808                 continue
809             elif domain in domains:
810                 logger.debug("domain='%s' is already added - SKIPPED!", domain)
811                 continue
812             elif instances.is_registered(domain):
813                 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
814                 continue
815             elif instances.is_recent(domain):
816                 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
817                 continue
818
819             logger.debug("Adding domain='%s'", domain)
820             domains.append(domain)
821
822     logger.debug("domains()=%d", len(domains))
823     if len(domains) > 0:
824         logger.info("Adding %d new instances ...", len(domains))
825         for domain in domains:
826             logger.debug("domain='%s'", domain)
827             try:
828                 logger.info("Fetching instances from domain='%s' ...", domain)
829                 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
830             except network.exceptions as exception:
831                 logger.warning("Exception '%s' during fetching instances (fetch_fba_rss) from domain='%s'", type(exception), domain)
832                 instances.set_last_error(domain, exception)
833                 return 100
834
835     logger.debug("Success! - EXIT!")
836     return 0
837
838 def fetch_fbabot_atom(args: argparse.Namespace) -> int:
839     logger.debug("args[]='%s' - CALLED!", type(args))
840
841     logger.debug("Invoking locking.acquire() ...")
842     locking.acquire()
843
844     source_domain = "ryona.agency"
845     feed = f"https://{source_domain}/users/fba/feed.atom"
846
847     logger.debug("args.feed[%s]='%s'", type(args.feed), args.feed)
848     if args.feed is not None and validators.url(args.feed):
849         logger.debug("Setting feed='%s' ...", args.feed)
850         feed = str(args.feed)
851         source_domain = urlparse(args.feed).netloc
852
853     if sources.is_recent(source_domain):
854         logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
855         return 1
856     else:
857         logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
858         sources.update(source_domain)
859
860     domains = list()
861
862     logger.info("Fetching ATOM feed='%s' from FBA bot account ...", feed)
863     response = utils.fetch_url(feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
864
865     logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
866     if response.ok and response.status_code == 200 and len(response.text) > 0:
867         logger.debug("Parsing ATOM feed (%d Bytes) ...", len(response.text))
868         atom = atoma.parse_atom_bytes(response.content)
869
870         logger.debug("atom[]='%s'", type(atom))
871         for entry in atom.entries:
872             logger.debug("entry[]='%s'", type(entry))
873             doc = bs4.BeautifulSoup(entry.content.value, "html.parser")
874             logger.debug("doc[]='%s'", type(doc))
875             for element in doc.findAll("a"):
876                 logger.debug("element[]='%s'", type(element))
877                 for href in element["href"].split(","):
878                     logger.debug("href[%s]='%s' - BEFORE!", type(href), href)
879                     domain = tidyup.domain(href) if href != None and href != "" else None
880
881                     logger.debug("domain='%s' - AFTER!", domain)
882                     if domain is None or domain == "":
883                         logger.debug("domain='%s' is empty after tidyup.domain() - SKIPPED!", domain)
884                         continue
885
886                     logger.debug("domain='%s' - BEFORE!", domain)
887                     domain = domain.encode("idna").decode("utf-8")
888                     logger.debug("domain='%s' - AFTER!", domain)
889
890                     if not domain_helper.is_wanted(domain):
891                         logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
892                         continue
893                     elif domain in domains:
894                         logger.debug("domain='%s' is already added - SKIPPED!", domain)
895                         continue
896                     elif instances.is_registered(domain):
897                         logger.debug("domain='%s' is already registered - SKIPPED!", domain)
898                         continue
899                     elif instances.is_recent(domain):
900                         logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
901                         continue
902
903                     logger.debug("Adding domain='%s',domains()=%d", domain, len(domains))
904                     domains.append(domain)
905
906     logger.debug("domains()=%d", len(domains))
907     if len(domains) > 0:
908         logger.info("Adding %d new instances ...", len(domains))
909         for domain in domains:
910             logger.debug("domain='%s'", domain)
911             try:
912                 logger.info("Fetching instances from domain='%s' ...", domain)
913                 federation.fetch_instances(domain, source_domain, None, inspect.currentframe().f_code.co_name)
914             except network.exceptions as exception:
915                 logger.warning("Exception '%s' during fetching instances (fetch_fbabot_atom) from domain='%s'", type(exception), domain)
916                 instances.set_last_error(domain, exception)
917                 return 100
918
919     logger.debug("Success! - EXIT!")
920     return 0
921
922 def fetch_instances(args: argparse.Namespace) -> int:
923     logger.debug("args[]='%s' - CALLED!", type(args))
924
925     logger.debug("args.domain='%s' - checking ...", args.domain)
926     if not validators.domain(args.domain):
927         logger.warning("args.domain='%s' is not valid.", args.domain)
928         return 100
929     elif blacklist.is_blacklisted(args.domain):
930         logger.warning("args.domain='%s' is blacklisted, won't check it!", args.domain)
931         return 101
932
933     logger.debug("Invoking locking.acquire() ...")
934     locking.acquire()
935
936     # Initialize values
937     domain = tidyup.domain(args.domain)
938     origin = software = None
939
940     # Fetch record
941     database.cursor.execute("SELECT origin, software FROM instances WHERE domain = ? LIMIT 1", [args.domain])
942     row = database.cursor.fetchone()
943     if row is not None:
944         origin = row["origin"]
945         software = row["software"]
946
947     if software_helper.is_relay(software):
948         logger.warning("args.domain='%s' is of software type '%s' which is not supported by this command. Please invoke fetch_relays instead.", args.domain, software)
949         return 102
950
951     # Initial fetch
952     try:
953         logger.info("Fetching instances from args.domain='%s',origin='%s',software='%s' ...", domain, origin, software)
954         federation.fetch_instances(domain, origin, software, inspect.currentframe().f_code.co_name)
955     except network.exceptions as exception:
956         logger.warning("Exception '%s' during fetching instances (fetch_instances) from args.domain='%s'", type(exception), args.domain)
957         instances.set_last_error(args.domain, exception)
958         instances.update(args.domain)
959         return 100
960
961     if args.single:
962         logger.debug("Not fetching more instances - EXIT!")
963         return 0
964
965     # Loop through some instances
966     database.cursor.execute(
967         "SELECT domain, origin, software, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'lemmy', 'peertube', 'takahe', 'gotosocial', 'brighteon', 'wildebeest', 'bookwyrm', 'mitra', 'areionskey', 'mammuthus', 'neodb') AND (last_instance_fetch IS NULL OR last_instance_fetch < ?) ORDER BY total_peers DESC, last_response_time ASC, last_updated ASC", [time.time() - config.get("recheck_instance")]
968     )
969
970     rows = database.cursor.fetchall()
971     logger.info("Checking %d entries ...", len(rows))
972     for row in rows:
973         logger.debug("row[domain]='%s'", row["domain"])
974         if row["domain"] == "":
975             logger.debug("row[domain] is empty - SKIPPED!")
976             continue
977
978         logger.debug("row[domain]='%s' - BEFORE!", row["domain"])
979         domain = row["domain"].encode("idna").decode("utf-8")
980         logger.debug("domain='%s' - AFTER!", domain)
981
982         if not domain_helper.is_wanted(domain):
983             logger.debug("Domain domain='%s' is not wanted - SKIPPED!", domain)
984             continue
985
986         try:
987             logger.info("Fetching instances for domain='%s',origin='%s',software='%s',nodeinfo_url='%s'", domain, row["origin"], row["software"], row["nodeinfo_url"])
988             federation.fetch_instances(domain, row["origin"], row["software"], inspect.currentframe().f_code.co_name, row["nodeinfo_url"])
989         except network.exceptions as exception:
990             logger.warning("Exception '%s' during fetching instances (fetch_instances) from domain='%s'", type(exception), domain)
991             instances.set_last_error(domain, exception)
992
993     logger.debug("Success - EXIT!")
994     return 0
995
996 def fetch_csv(args: argparse.Namespace) -> int:
997     logger.debug("args[]='%s' - CALLED!", type(args))
998
999     logger.debug("Invoking locking.acquire() ...")
1000     locking.acquire()
1001
1002     logger.info("Checking %d CSV files ...", len(blocklists.csv_files))
1003     for block in blocklists.csv_files:
1004         logger.debug("block[blocker]='%s',block[csv_url]='%s'", block["blocker"], block["csv_url"])
1005
1006         # Is domain given and not equal blocker?
1007         if isinstance(args.domain, str) and args.domain != block["blocker"]:
1008             logger.debug("Skipping blocker='%s', not matching args.domain='%s'", block["blocker"], args.domain)
1009             continue
1010
1011         logger.debug("Invoking processing.csv_block(%s, %s, fetch_csv) ...", block["blocker"], block["csv_url"])
1012         processing.csv_block(block["blocker"], block["csv_url"], inspect.currentframe().f_code.co_name)
1013
1014     logger.debug("Success - EXIT!")
1015     return 0
1016
1017 def fetch_oliphant(args: argparse.Namespace) -> int:
1018     logger.debug("args[]='%s' - CALLED!", type(args))
1019
1020     logger.debug("Invoking locking.acquire() ...")
1021     locking.acquire()
1022
1023     source_domain = "codeberg.org"
1024     if sources.is_recent(source_domain):
1025         logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1026         return 1
1027     else:
1028         logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1029         sources.update(source_domain)
1030
1031     # Base URL
1032     base_url = f"https://{source_domain}/oliphant/blocklists/raw/branch/main/blocklists"
1033
1034     logger.debug("Downloading %d files ...", len(blocklists.oliphant_blocklists))
1035     for block in blocklists.oliphant_blocklists:
1036         # Is domain given and not equal blocker?
1037         if isinstance(args.domain, str) and args.domain != block["blocker"]:
1038             logger.debug("Skipping blocker='%s', not matching args.domain='%s'", block["blocker"], args.domain)
1039             continue
1040
1041         url = f"{base_url}/{block['csv_url']}"
1042
1043         logger.debug("Invoking processing.csv_block(%s, %s, fetch_oliphant) ...", block["blocker"], url)
1044         processing.csv_block(block["blocker"], url, inspect.currentframe().f_code.co_name)
1045
1046     logger.debug("Success! - EXIT!")
1047     return 0
1048
1049 def fetch_txt(args: argparse.Namespace) -> int:
1050     logger.debug("args[]='%s' - CALLED!", type(args))
1051
1052     logger.debug("Invoking locking.acquire() ...")
1053     locking.acquire()
1054
1055     # Static URLs
1056     urls = ({
1057         "blocker": "seirdy.one",
1058         "url"    : "https://seirdy.one/pb/bsl.txt",
1059     },)
1060
1061     logger.info("Checking %d text file(s) ...", len(urls))
1062     for row in urls:
1063         logger.debug("Fetching row[url]='%s' ...", row["url"])
1064         response = utils.fetch_url(row["url"], network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
1065
1066         logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
1067         if response.ok and response.status_code == 200 and response.text != "":
1068             logger.debug("Returned %d Bytes for processing", len(response.text.strip()))
1069             domains = response.text.strip().split("\n")
1070
1071             logger.info("Processing %d domains ...", len(domains))
1072             for domain in domains:
1073                 logger.debug("domain='%s' - BEFORE!", domain)
1074                 domain = tidyup.domain(domain) if domain != None and domain != "" else None
1075
1076                 logger.debug("domain='%s' - AFTER!", domain)
1077                 if domain is None or domain == "":
1078                     logger.debug("domain='%s' is empty after tidyup.domain() - SKIPPED!", domain)
1079                     continue
1080                 elif not domain_helper.is_wanted(domain):
1081                     logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1082                     continue
1083                 elif instances.is_recent(domain):
1084                     logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1085                     continue
1086
1087                 logger.debug("Processing domain='%s',row[blocker]='%s'", domain, row["blocker"])
1088                 processed = processing.instance(domain, row["blocker"], inspect.currentframe().f_code.co_name)
1089
1090                 logger.debug("processed='%s'", processed)
1091                 if not processed:
1092                     logger.debug("domain='%s' was not generically processed - SKIPPED!", domain)
1093                     continue
1094
1095     logger.debug("Success! - EXIT!")
1096     return 0
1097
1098 def fetch_fedipact(args: argparse.Namespace) -> int:
1099     logger.debug("args[]='%s' - CALLED!", type(args))
1100
1101     logger.debug("Invoking locking.acquire() ...")
1102     locking.acquire()
1103
1104     source_domain = "fedipact.online"
1105     if sources.is_recent(source_domain):
1106         logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1107         return 1
1108     else:
1109         logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1110         sources.update(source_domain)
1111
1112     logger.info("Fetching / from source_domain='%s' ...", source_domain)
1113     response = utils.fetch_url(
1114         f"https://{source_domain}",
1115         network.web_headers,
1116         (config.get("connection_timeout"), config.get("read_timeout"))
1117     )
1118
1119     logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
1120     if response.ok and response.status_code == 200 and response.text != "":
1121         logger.debug("Parsing %d Bytes ...", len(response.text))
1122
1123         doc = bs4.BeautifulSoup(response.text, "html.parser")
1124         logger.debug("doc[]='%s'", type(doc))
1125
1126         rows = doc.findAll("li")
1127         logger.info("Checking %d row(s) ...", len(rows))
1128         for row in rows:
1129             logger.debug("row[]='%s'", type(row))
1130             domain = tidyup.domain(row.contents[0]) if row.contents[0] != None and row.contents[0] != "" else None
1131
1132             logger.debug("domain='%s' - AFTER!", domain)
1133             if domain is None or domain == "":
1134                 logger.debug("domain='%s' is empty after tidyup.domain() - SKIPPED!", domain)
1135                 continue
1136
1137             logger.debug("domain='%s' - BEFORE!", domain)
1138             domain = domain.encode("idna").decode("utf-8")
1139             logger.debug("domain='%s' - AFTER!", domain)
1140
1141             if not domain_helper.is_wanted(domain):
1142                 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1143                 continue
1144             elif instances.is_registered(domain):
1145                 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
1146                 continue
1147             elif instances.is_recent(domain):
1148                 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1149                 continue
1150
1151             logger.info("Fetching domain='%s' ...", domain)
1152             federation.fetch_instances(domain, "beach.city", None, inspect.currentframe().f_code.co_name)
1153
1154     logger.debug("Success! - EXIT!")
1155     return 0
1156
1157 def fetch_joinmobilizon(args: argparse.Namespace) -> int:
1158     logger.debug("args[]='%s' - CALLED!", type(args))
1159
1160     logger.debug("Invoking locking.acquire() ...")
1161     locking.acquire()
1162
1163     source_domain = "instances.joinmobilizon.org"
1164     if sources.is_recent(source_domain):
1165         logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1166         return 1
1167     else:
1168         logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1169         sources.update(source_domain)
1170
1171     logger.info("Fetching instances from source_domain='%s' ...", source_domain)
1172     raw = utils.fetch_url(
1173         f"https://{source_domain}/api/v1/instances",
1174         network.web_headers,
1175         (config.get("connection_timeout"), config.get("read_timeout"))
1176     ).text
1177     logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
1178
1179     parsed = json.loads(raw)
1180     logger.debug("parsed[%s]()=%d", type(parsed), len(parsed))
1181
1182     if "data" not in parsed:
1183         logger.warning("parsed()=%d does not contain key 'data'")
1184         return 1
1185
1186     logger.info("Checking %d instances ...", len(parsed["data"]))
1187     for row in parsed["data"]:
1188         logger.debug("row[]='%s'", type(row))
1189         if "host" not in row:
1190             logger.warning("row='%s' does not contain key 'host' - SKIPPED!", row)
1191             continue
1192         elif not domain_helper.is_wanted(row["host"]):
1193             logger.debug("row[host]='%s' is not wanted - SKIPPED!", row["host"])
1194             continue
1195         elif instances.is_registered(row["host"]):
1196             logger.debug("row[host]='%s' is already registered - SKIPPED!", row["host"])
1197             continue
1198
1199         logger.info("Fetching row[host]='%s' ...", row["host"])
1200         federation.fetch_instances(row["host"], "demo.mobilizon.org", None, inspect.currentframe().f_code.co_name)
1201
1202     logger.debug("Success! - EXIT!")
1203     return 0
1204
1205 def fetch_joinmisskey(args: argparse.Namespace) -> int:
1206     logger.debug("args[]='%s' - CALLED!", type(args))
1207
1208     logger.debug("Invoking locking.acquire() ...")
1209     locking.acquire()
1210
1211     source_domain = "instanceapp.misskey.page"
1212     if sources.is_recent(source_domain):
1213         logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1214         return 1
1215     else:
1216         logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1217         sources.update(source_domain)
1218
1219     logger.info("Fetching instances.json from source_domain='%s' ...", source_domain)
1220     raw = utils.fetch_url(
1221         f"https://{source_domain}/instances.json",
1222         network.web_headers,
1223         (config.get("connection_timeout"), config.get("read_timeout"))
1224     ).text
1225     logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
1226
1227     parsed = json.loads(raw)
1228     logger.debug("parsed[%s]()=%d", type(parsed), len(parsed))
1229
1230     if "instancesInfos" not in parsed:
1231         logger.warning("parsed()=%d does not contain element 'instancesInfos'")
1232         return 1
1233
1234     logger.info("Checking %d instane(s) ...", len(parsed["instancesInfos"]))
1235     for row in parsed["instancesInfos"]:
1236         logger.debug("row[%s]='%s'", type(row), row)
1237         if "url" not in row:
1238             logger.warning("row()=%d does not have element 'url' - SKIPPED!", len(row))
1239             continue
1240         elif not domain_helper.is_wanted(row["url"]):
1241             logger.debug("row[url]='%s' is not wanted - SKIPPED!", row["url"])
1242             continue
1243         elif instances.is_registered(row["url"]):
1244             logger.debug("row[url]='%s' is already registered - SKIPPED!", row["url"])
1245             continue
1246
1247         logger.info("Fetching row[url]='%s' ...", row["url"])
1248         federation.fetch_instances(row["url"], "misskey.io", None, inspect.currentframe().f_code.co_name)
1249
1250     logger.debug("Success! - EXIT!")
1251     return 0
1252
1253 def recheck_obfuscation(args: argparse.Namespace) -> int:
1254     logger.debug("args[]='%s' - CALLED!", type(args))
1255
1256     logger.debug("Invoking locking.acquire() ...")
1257     locking.acquire()
1258
1259     if isinstance(args.domain, str) and args.domain != "" and domain_helper.is_wanted(args.domain):
1260         database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1 AND domain = ?", [args.domain])
1261     elif isinstance(args.software, str) and args.software != "" and validators.domain(args.software) == args.software:
1262         database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1 AND software = ?", [args.software])
1263     else:
1264         database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1")
1265
1266     rows = database.cursor.fetchall()
1267     logger.info("Checking %d domains ...", len(rows))
1268     for row in rows:
1269         logger.debug("Fetching peers from domain='%s',software='%s',nodeinfo_url='%s' ...", row["domain"], row["software"], row["nodeinfo_url"])
1270         if (args.force is None or not args.force) and args.domain is None and args.software is None and instances.is_recent(row["domain"], "last_blocked"):
1271             logger.debug("row[domain]='%s' has been recently checked, args.force[]='%s' - SKIPPED!", row["domain"], type(args.force))
1272             continue
1273
1274         logger.debug("Invoking federation.fetch_blocks(%s) ...", row["domain"])
1275         blocking = federation.fetch_blocks(row["domain"])
1276
1277         logger.debug("blocking()=%d", len(blocking))
1278         if len(blocking) == 0:
1279             if row["software"] == "pleroma":
1280                 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1281                 blocking = pleroma.fetch_blocks(row["domain"])
1282             elif row["software"] == "mastodon":
1283                 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1284                 blocking = mastodon.fetch_blocks(row["domain"])
1285             elif row["software"] == "lemmy":
1286                 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1287                 blocking = lemmy.fetch_blocks(row["domain"])
1288             elif row["software"] == "friendica":
1289                 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1290                 blocking = friendica.fetch_blocks(row["domain"])
1291             elif row["software"] == "misskey":
1292                 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1293                 blocking = misskey.fetch_blocks(row["domain"])
1294             else:
1295                 logger.warning("Unknown software: domain='%s',software='%s'", row["domain"], row["software"])
1296
1297         # c.s isn't part of oliphant's "hidden" blocklists
1298         logger.debug("row[domain]='%s'", row["domain"])
1299         if row["domain"] != "chaos.social" and not software_helper.is_relay(row["software"]) and not blocklists.has(row["domain"]):
1300             logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", row["domain"], len(blocking))
1301             instances.set_last_blocked(row["domain"])
1302             instances.set_total_blocks(row["domain"], blocking)
1303
1304         obfuscated = 0
1305         blockdict = list()
1306
1307         logger.info("Checking %d block(s) from domain='%s' ...", len(blocking), row["domain"])
1308         for block in blocking:
1309             logger.debug("block[blocked]='%s'", block["blocked"])
1310             blocked = None
1311
1312             if block["blocked"] == "":
1313                 logger.debug("block[blocked] is empty - SKIPPED!")
1314                 continue
1315             elif block["blocked"].endswith(".arpa"):
1316                 logger.debug("blocked='%s' is a reversed IP address - SKIPPED!", block["blocked"])
1317                 continue
1318             elif block["blocked"].endswith(".tld"):
1319                 logger.debug("blocked='%s' is a fake domain name - SKIPPED!", block["blocked"])
1320                 continue
1321             elif block["blocked"].endswith(".onion"):
1322                 logger.debug("blocked='%s' is a TOR onion domain name - SKIPPED!", block["blocked"])
1323                 continue
1324             elif block["blocked"].find("*") >= 0 or block["blocked"].find("?") >= 0:
1325                 logger.debug("block='%s' is obfuscated.", block["blocked"])
1326                 obfuscated = obfuscated + 1
1327                 blocked = utils.deobfuscate(block["blocked"], row["domain"], block["hash"] if "hash" in block else None)
1328             elif not domain_helper.is_wanted(block["blocked"]):
1329                 logger.debug("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
1330                 continue
1331             elif blocks.is_instance_blocked(row["domain"], block["blocked"]):
1332                 logger.debug("blocked='%s' is already blocked - SKIPPED!", block["blocked"])
1333                 continue
1334
1335             logger.debug("blocked[%s]='%s',block[blocked]='%s'", type(blocked), blocked, block["blocked"])
1336             if blocked is not None and blocked != block["blocked"]:
1337                 logger.debug("blocked='%s' was deobfuscated to blocked='%s'", block["blocked"], blocked)
1338                 obfuscated = obfuscated - 1
1339
1340                 if blacklist.is_blacklisted(blocked):
1341                     logger.debug("blocked='%s' is blacklisted - SKIPPED!", blocked)
1342                     continue
1343                 elif blacklist.is_blacklisted(row["domain"]):
1344                     logger.debug("row[domain]='%s' is blacklisted - SKIPPED!", row["domain"])
1345                     continue
1346                 elif blocks.is_instance_blocked(row["domain"], blocked):
1347                     logger.debug("blocked='%s' is already blocked by domain='%s' - SKIPPED!", blocked, row["domain"])
1348                     continue
1349
1350                 block["block_level"] = blocks.alias_block_level(block["block_level"])
1351
1352                 logger.info("blocked='%s' has been deobfuscated to blocked='%s', adding ...", block["blocked"], blocked)
1353                 if processing.block(row["domain"], blocked, block["reason"], block["block_level"]) and block["block_level"] == "reject" and config.get("bot_enabled"):
1354                     logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], row["domain"])
1355                     blockdict.append({
1356                         "blocked": blocked,
1357                         "reason" : block["reason"],
1358                     })
1359
1360         logger.debug("Settings obfuscated=%d for row[domain]='%s' ...", obfuscated, row["domain"])
1361         instances.set_obfuscated_blocks(row["domain"], obfuscated)
1362
1363         logger.info("domain='%s' has %d obfuscated domain(s)", row["domain"], obfuscated)
1364         if instances.has_pending(row["domain"]):
1365             logger.debug("Flushing updates for blocker='%s' ...", row["domain"])
1366             instances.update(row["domain"])
1367
1368         logger.debug("Invoking commit() ...")
1369         database.connection.commit()
1370
1371         logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
1372         if config.get("bot_enabled") and len(blockdict) > 0:
1373             logger.info("Sending bot POST for blocker='%s,blockdict()=%d ...", row["domain"], len(blockdict))
1374             network.send_bot_post(row["domain"], blockdict)
1375
1376     logger.debug("Success! - EXIT!")
1377     return 0
1378
1379 def fetch_fedilist(args: argparse.Namespace) -> int:
1380     logger.debug("args[]='%s' - CALLED!", type(args))
1381
1382     logger.debug("Invoking locking.acquire() ...")
1383     locking.acquire()
1384
1385     source_domain = "demo.fedilist.com"
1386     if sources.is_recent(source_domain):
1387         logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1388         return 1
1389     else:
1390         logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1391         sources.update(source_domain)
1392
1393     url = f"http://{source_domain}/instance/csv?onion=not"
1394     if args.software is not None and args.software != "":
1395         logger.debug("args.software='%s'", args.software)
1396         url = f"http://{source_domain}/instance/csv?software={args.software}&onion=not"
1397
1398     logger.info("Fetching url='%s' ...", url)
1399     response = reqto.get(
1400         url,
1401         headers=network.web_headers,
1402         timeout=(config.get("connection_timeout"), config.get("read_timeout")),
1403         allow_redirects=False
1404     )
1405
1406     logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
1407     if not response.ok or response.status_code > 200 or len(response.content) == 0:
1408         logger.warning("Failed fetching url='%s': response.ok='%s',response.status_code=%d,response.content()=%d - EXIT!", url, response.ok, response.status_code, len(response.text))
1409         return 1
1410
1411     reader = csv.DictReader(response.content.decode("utf-8").splitlines(), dialect="unix")
1412
1413     logger.debug("reader[]='%s'", type(reader))
1414     if reader is None:
1415         logger.warning("Failed parsing response.content()=%d as CSV content", len(response.content))
1416         return 2
1417
1418     rows = list(reader)
1419
1420     logger.info("Checking %d rows ...", len(rows))
1421     for row in rows:
1422         logger.debug("row[]='%s'", type(row))
1423         if "hostname" not in row:
1424             logger.warning("row()=%d has no element 'hostname' - SKIPPED!", len(row))
1425             continue
1426
1427         logger.debug("row[hostname]='%s' - BEFORE!", row["hostname"])
1428         domain = tidyup.domain(row["hostname"]) if row["hostname"] != None and row["hostname"] != "" else None
1429         logger.debug("domain='%s' - AFTER!", domain)
1430
1431         if domain is None or domain == "":
1432             logger.debug("domain='%s' is empty after tidyup.domain(): row[hostname]='%s' - SKIPPED!", domain, row["hostname"])
1433             continue
1434
1435         logger.debug("domain='%s' - BEFORE!", domain)
1436         domain = domain.encode("idna").decode("utf-8")
1437         logger.debug("domain='%s' - AFTER!", domain)
1438
1439         if not domain_helper.is_wanted(domain):
1440             logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1441             continue
1442         elif (args.force is None or not args.force) and instances.is_registered(domain):
1443             logger.debug("domain='%s' is already registered, --force not specified: args.force[]='%s'", domain, type(args.force))
1444             continue
1445         elif instances.is_recent(domain):
1446             logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1447             continue
1448
1449         logger.info("Fetching instances from domain='%s' ...", domain)
1450         federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
1451
1452     logger.debug("Success! - EXIT!")
1453     return 0
1454
1455 def update_nodeinfo(args: argparse.Namespace) -> int:
1456     logger.debug("args[]='%s' - CALLED!", type(args))
1457
1458     logger.debug("Invoking locking.acquire() ...")
1459     locking.acquire()
1460
1461     if args.domain is not None and args.domain != "":
1462         logger.debug("Fetching args.domain='%s'", args.domain)
1463         database.cursor.execute("SELECT domain, software FROM instances WHERE domain = ?", [args.domain])
1464     elif args.software is not None and args.software != "":
1465         logger.info("Fetching domains for args.software='%s'", args.software)
1466         database.cursor.execute("SELECT domain, software FROM instances WHERE software = ? AND (last_nodeinfo < ? OR last_nodeinfo IS NULL)", [args.software.lower(), time.time() - config.get("recheck_nodeinfo")])
1467     elif args.mode is not None and args.mode != "":
1468         logger.info("Fetching domains for args.mode='%s'", args.mode.upper())
1469         database.cursor.execute("SELECT domain, software FROM instances WHERE detection_mode = ? AND (last_nodeinfo < ? OR last_nodeinfo IS NULL)", [args.mode.upper(), time.time() - config.get("recheck_nodeinfo")])
1470     elif args.no_software:
1471         logger.info("Fetching domains with no software type detected ...")
1472         database.cursor.execute("SELECT domain, software FROM instances WHERE software IS NULL AND (last_nodeinfo < ? OR last_nodeinfo IS NULL)", [time.time() - config.get("recheck_nodeinfo")])
1473     else:
1474         logger.info("Fetching domains for recently updated ...")
1475         database.cursor.execute("SELECT domain, software FROM instances WHERE last_nodeinfo < ? OR last_nodeinfo IS NULL", [time.time() - config.get("recheck_nodeinfo")])
1476
1477     domains = database.cursor.fetchall()
1478
1479     logger.info("Checking %d domain(s) ...", len(domains))
1480     cnt = 0
1481     for row in domains:
1482         logger.debug("row[]='%s'", type(row))
1483         if not args.force and instances.is_recent(row["domain"], "last_nodeinfo"):
1484             logger.debug("row[domain]='%s' has been recently checked - SKIPPED!", row["domain"])
1485             continue
1486
1487         try:
1488             logger.info("Checking nodeinfo for row[domain]='%s',row[software]='%s' (%s%%) ...", row["domain"], row["software"], "{:5.1f}".format(cnt / len(domains) * 100))
1489             software = federation.determine_software(row["domain"])
1490
1491             logger.debug("Determined software='%s'", software)
1492             if (software != row["software"] and software is not None) or args.force is True:
1493                 logger.debug("software='%s'", software)
1494                 if software is None:
1495                     logger.debug("Setting nodeinfo_url to 'None' for row[domain]='%s' ...", row["domain"])
1496                     instances.set_nodeinfo_url(row["domain"], None)
1497
1498                 logger.warning("Software type for row[domain]='%s' has changed from '%s' to '%s'!", row["domain"], row["software"], software)
1499                 instances.set_software(row["domain"], software)
1500
1501             if software is not None:
1502                 logger.debug("Setting row[domain]='%s' as successfully determined ...", row["domain"])
1503                 instances.set_success(row["domain"])
1504         except network.exceptions as exception:
1505             logger.warning("Exception '%s' during updating nodeinfo for row[domain]='%s'", type(exception), row["domain"])
1506             instances.set_last_error(row["domain"], exception)
1507
1508         instances.set_last_nodeinfo(row["domain"])
1509         instances.update(row["domain"])
1510         cnt = cnt + 1
1511
1512     logger.debug("Success! - EXIT!")
1513     return 0
1514
1515 def fetch_instances_social(args: argparse.Namespace) -> int:
1516     logger.debug("args[]='%s' - CALLED!", type(args))
1517
1518     logger.debug("Invoking locking.acquire() ...")
1519     locking.acquire()
1520
1521     source_domain = "instances.social"
1522
1523     if config.get("instances_social_api_key") == "":
1524         logger.error("API key not set. Please set in your config.json file.")
1525         return 1
1526     elif sources.is_recent(source_domain):
1527         logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1528         return 2
1529     else:
1530         logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1531         sources.update(source_domain)
1532
1533     headers = {
1534         "Authorization": f"Bearer {config.get('instances_social_api_key')}",
1535     }
1536
1537     logger.info("Fetching list from source_domain='%s' ...", source_domain)
1538     fetched = network.get_json_api(
1539         source_domain,
1540         "/api/1.0/instances/list?count=0&sort_by=name",
1541         headers,
1542         (config.get("connection_timeout"), config.get("read_timeout"))
1543     )
1544     logger.debug("fetched[]='%s'", type(fetched))
1545
1546     if "error_message" in fetched:
1547         logger.warning("Error during fetching API result: '%s' - EXIT!", fetched["error_message"])
1548         return 2
1549     elif "exception" in fetched:
1550         logger.warning("Exception '%s' during fetching API result - EXIT!", type(fetched["exception"]))
1551         return 3
1552     elif "json" not in fetched:
1553         logger.warning("fetched has no element 'json' - EXIT!")
1554         return 4
1555     elif "instances" not in fetched["json"]:
1556         logger.warning("fetched[row] has no element 'instances' - EXIT!")
1557         return 5
1558
1559     domains = list()
1560     rows = fetched["json"]["instances"]
1561
1562     logger.info("Checking %d row(s) ...", len(rows))
1563     for row in rows:
1564         logger.debug("row[]='%s'", type(row))
1565         domain = tidyup.domain(row["name"]) if row["name"] != None and row["name"] != "" else None
1566         logger.debug("domain='%s' - AFTER!", domain)
1567
1568         if domain is None and domain == "":
1569             logger.debug("domain='%s' is empty after tidyup.domain() - SKIPPED!", domain)
1570             continue
1571
1572         logger.debug("domain='%s' - BEFORE!", domain)
1573         domain = domain.encode("idna").decode("utf-8")
1574         logger.debug("domain='%s' - AFTER!", domain)
1575
1576         if not domain_helper.is_wanted(domain):
1577             logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1578             continue
1579         elif domain in domains:
1580             logger.debug("domain='%s' is already added - SKIPPED!", domain)
1581             continue
1582         elif instances.is_registered(domain):
1583             logger.debug("domain='%s' is already registered - SKIPPED!", domain)
1584             continue
1585         elif instances.is_recent(domain):
1586             logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1587             continue
1588
1589         logger.info("Fetching instances from domain='%s'", domain)
1590         federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
1591
1592     logger.debug("Success! - EXIT!")
1593     return 0
1594
1595 def fetch_relays(args: argparse.Namespace) -> int:
1596     logger.debug("args[]='%s' - CALLED!", type(args))
1597
1598     logger.debug("Invoking locking.acquire() ...")
1599     locking.acquire()
1600
1601     if args.domain is not None and args.domain != "":
1602         database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE software IN ('activityrelay', 'aoderelay', 'selective-relay', 'pub-relay') AND domain = ? LIMIT 1", [args.domain])
1603     elif args.software is not None and args.software != "":
1604         database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE software IN ('activityrelay', 'aoderelay', 'selective-relay', 'pub-relay') AND software = ?", [args.software])
1605     else:
1606         database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE software IN ('activityrelay', 'aoderelay', 'selective-relay', 'pub-relay')")
1607
1608     domains = list()
1609     rows = database.cursor.fetchall()
1610
1611     logger.info("Checking %d relays ...", len(rows))
1612     for row in rows:
1613         logger.debug("row[domain]='%s',row[software]='%s' ...", row["domain"], row["software"])
1614         peers = list()
1615         if not args.force and instances.is_recent(row["domain"]):
1616             logger.debug("row[domain]='%s' has been recently fetched - SKIPPED!", row["domain"])
1617             continue
1618
1619         try:
1620             if row["software"] == "pub-relay":
1621                 logger.info("Fetching row[nodeinfo_url]='%s' from relay row[domain]='%s',row[software]='%s' ...", row["nodeinfo_url"], row["domain"], row["software"])
1622                 raw = network.fetch_api_url(
1623                     row["nodeinfo_url"],
1624                     (config.get("connection_timeout"), config.get("read_timeout"))
1625                 )
1626
1627                 logger.debug("raw[%s]()=%d", type(raw), len(raw))
1628                 if "exception" in raw:
1629                     logger.warning("row[domain]='%s' has caused an exception: '%s' - raising again ...", row["domain"], type(raw["exception"]))
1630                     raise raw["exception"]
1631                 elif "error_message" in raw:
1632                     logger.warning("row[domain]='%s' has caused error message: '%s' - SKIPPED!", row["domain"], raw["error_message"])
1633                     instances.set_last_error(row["domain"], raw)
1634                     instances.set_last_instance_fetch(row["domain"])
1635                     instances.update(row["domain"])
1636                     continue
1637                 elif not "json" in raw:
1638                     logger.warning("raw()=%d does not contain key 'json' in response - SKIPPED!", len(raw))
1639                     continue
1640                 elif not "metadata" in raw["json"]:
1641                     logger.warning("raw[json]()=%d does not contain key 'json' in response - SKIPPED!", len(raw["json"]))
1642                     continue
1643                 elif not "peers" in raw["json"]["metadata"]:
1644                     logger.warning("raw[json][metadata()=%d does not contain key 'json' in response - SKIPPED!", len(raw["json"]["metadata"]))
1645                     continue
1646             else:
1647                 logger.info("Fetching / from relay row[domain]='%s',row[software]='%s' ...", row["domain"], row["software"])
1648                 raw = utils.fetch_url(
1649                     f"https://{row['domain']}",
1650                     network.web_headers,
1651                     (config.get("connection_timeout"), config.get("read_timeout"))
1652                 ).text
1653                 logger.debug("raw[%s]()=%d", type(raw), len(raw))
1654
1655                 doc = bs4.BeautifulSoup(raw, features="html.parser")
1656                 logger.debug("doc[]='%s'", type(doc))
1657
1658         except network.exceptions as exception:
1659             logger.warning("Exception '%s' during fetching from relay '%s': '%s'", type(exception), row["domain"], str(exception))
1660             instances.set_last_error(row["domain"], exception)
1661             instances.set_last_instance_fetch(row["domain"])
1662             instances.update(row["domain"])
1663             continue
1664
1665         logger.debug("row[software]='%s'", row["software"])
1666         if row["software"] == "activityrelay":
1667             logger.debug("Checking row[domain]='%s' ...", row["domain"])
1668             tags = doc.findAll("p")
1669
1670             logger.debug("Checking %d paragraphs ...", len(tags))
1671             for tag in tags:
1672                 logger.debug("tag[]='%s'", type(tag))
1673                 if len(tag.contents) == 0:
1674                     logger.debug("tag='%s' is an empty tag - SKIPPED!", tag)
1675                     continue
1676                 elif "registered instances" not in tag.contents[0]:
1677                     logger.debug("Skipping paragraph, text not found.")
1678                     continue
1679
1680                 logger.debug("Found tag.contents[0][]='%s'", tag.contents[0])
1681                 for domain in tag.contents:
1682                     logger.debug("domain[%s]='%s'", type(domain), domain)
1683                     if not isinstance(domain, bs4.element.NavigableString) or "registered instances" in domain:
1684                         continue
1685
1686                     domain = str(domain)
1687                     logger.debug("domain='%s'", domain)
1688                     if not domain_helper.is_wanted(domain):
1689                         logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1690                         continue
1691
1692                     logger.debug("domain='%s' - BEFORE!", domain)
1693                     domain = tidyup.domain(domain) if domain != None and domain != "" else None
1694                     logger.debug("domain='%s' - AFTER!", domain)
1695
1696                     if domain is None or domain == "":
1697                         logger.debug("domain='%s' is empty after tidyup.domain() from origin='%s' - SKIPPED!", domain, row["domain"])
1698                         continue
1699                     elif domain not in peers:
1700                         logger.debug("Appending domain='%s' to peers list for relay='%s' ...", domain, row["domain"])
1701                         peers.append(domain)
1702
1703                     if dict_helper.has_key(domains, "domain", domain):
1704                         logger.debug("domain='%s' already added", domain)
1705                         continue
1706
1707                     logger.debug("Appending domain='%s',origin='%s',software='%s' ...", domain, row["domain"], row["software"])
1708                     domains.append({
1709                         "domain": domain,
1710                         "origin": row["domain"],
1711                     })
1712         elif row["software"] in ["aoderelay", "selective-relay"]:
1713             logger.debug("Checking row[domain]='%s' ...", row["domain"])
1714             if row["software"] == "aoderelay":
1715                 tags = doc.findAll("section", {"class": "instance"})
1716             else:
1717                 tags = doc.find("div", {"id": "instances"}).findAll("li")
1718
1719             logger.debug("Checking %d tags ...", len(tags))
1720             for tag in tags:
1721                 logger.debug("tag[]='%s'", type(tag))
1722
1723                 link = tag.find("a")
1724                 logger.debug("link[%s]='%s'", type(link), link)
1725                 if not isinstance(link, bs4.element.Tag):
1726                     logger.warning("tag[%s]='%s' is not type of 'bs4.element.Tag' - SKIPPED!", type(tag), tag)
1727                     continue
1728
1729                 components = urlparse(link.get("href"))
1730                 logger.debug("components(%d)='%s'", len(components), components)
1731                 domain = components.netloc.lower().split(":")[0]
1732
1733                 logger.debug("domain='%s' - BEFORE!", domain)
1734                 domain = tidyup.domain(domain) if domain != None and domain != "" else None
1735                 logger.debug("domain='%s' - AFTER!", domain)
1736
1737                 if domain is None or domain == "":
1738                     logger.debug("domain='%s' is empty after tidyup.domain() from origin='%s' - SKIPPED!", domain, row["domain"])
1739                     continue
1740                 elif domain not in peers:
1741                     logger.debug("Appending domain='%s' to peers list for relay='%s' ...", domain, row["domain"])
1742                     peers.append(domain)
1743
1744                 if dict_helper.has_key(domains, "domain", domain):
1745                     logger.debug("domain='%s' already added", domain)
1746                     continue
1747
1748                 logger.debug("Appending domain='%s',origin='%s',software='%s'", domain, row["domain"], row["software"])
1749                 domains.append({
1750                     "domain": domain,
1751                     "origin": row["domain"],
1752                 })
1753         elif row["software"] == "pub-relay":
1754             logger.debug("Checking %d peer(s) row[domain]='%s' ...", len(raw["json"]["metadata"]["peers"]), row["domain"])
1755             for domain in raw["json"]["metadata"]["peers"]:
1756                 logger.debug("domain='%s' - BEFORE!", domain)
1757                 domain = tidyup.domain(domain) if domain != None and domain != "" else None
1758                 logger.debug("domain='%s' - AFTER!", domain)
1759
1760                 if domain is None or domain == "":
1761                     logger.debug("domain='%s' is empty after tidyup.domain() from origin='%s' - SKIPPED!", domain, row["domain"])
1762                     continue
1763                 elif domain not in peers:
1764                     logger.debug("Appending domain='%s' to peers list for relay='%s' ...", domain, row["domain"])
1765                     peers.append(domain)
1766
1767                 if dict_helper.has_key(domains, "domain", domain):
1768                     logger.debug("domain='%s' already added", domain)
1769                     continue
1770
1771                 logger.debug("Appending domain='%s',origin='%s',software='%s'", domain, row["domain"], row["software"])
1772                 domains.append({
1773                     "domain": domain,
1774                     "origin": row["domain"],
1775                 })
1776         else:
1777             logger.warning("row[domain]='%s',row[software]='%s' is not supported", row["domain"], row["software"])
1778             continue
1779
1780         logger.debug("Updating last_instance_fetch for row[domain]='%s' ...", row["domain"])
1781         instances.set_last_instance_fetch(row["domain"])
1782
1783         logger.info("Relay '%s' has %d peer(s) registered.", row["domain"], len(peers))
1784         instances.set_total_peers(row["domain"], peers)
1785
1786         logger.debug("Flushing data for row[domain]='%s'", row["domain"])
1787         instances.update(row["domain"])
1788
1789     logger.info("Checking %d domains ...", len(domains))
1790     for row in domains:
1791         logger.debug("row[domain]='%s',row[origin]='%s'", row["domain"], row["origin"])
1792         if not domain_helper.is_wanted(row["domain"]):
1793             logger.debug("row[domain]='%s' is not wanted - SKIPPED!", row["domain"])
1794             continue
1795         elif instances.is_registered(row["domain"]):
1796             logger.debug("row[domain]='%s' is already registered - SKIPPED!", row["domain"])
1797             continue
1798
1799         logger.info("Fetching row[domain]='%s',row[origin]='%s' ...", row["domain"], row["origin"])
1800         federation.fetch_instances(row["domain"], row["origin"], None, inspect.currentframe().f_code.co_name)
1801
1802     logger.debug("Success! - EXIT!")
1803     return 0
1804
1805 def convert_idna(args: argparse.Namespace) -> int:
1806     logger.debug("args[]='%s' - CALLED!", type(args))
1807
1808     database.cursor.execute("SELECT domain FROM instances WHERE domain NOT LIKE '%xn--%' ORDER BY domain ASC")
1809     rows = database.cursor.fetchall()
1810
1811     logger.debug("rows[]='%s'", type(rows))
1812     instances.translate_idnas(rows, "domain")
1813
1814     database.cursor.execute("SELECT origin FROM instances WHERE origin NOT LIKE '%xn--%' ORDER BY origin ASC")
1815     rows = database.cursor.fetchall()
1816
1817     logger.debug("rows[]='%s'", type(rows))
1818     instances.translate_idnas(rows, "origin")
1819
1820     database.cursor.execute("SELECT blocker FROM blocks WHERE blocker NOT LIKE '%xn--%' ORDER BY blocker ASC")
1821     rows = database.cursor.fetchall()
1822
1823     logger.debug("rows[]='%s'", type(rows))
1824     blocks.translate_idnas(rows, "blocker")
1825
1826     database.cursor.execute("SELECT blocked FROM blocks WHERE blocked NOT LIKE '%xn--%' ORDER BY blocked ASC")
1827     rows = database.cursor.fetchall()
1828
1829     logger.debug("rows[]='%s'", type(rows))
1830     blocks.translate_idnas(rows, "blocked")
1831
1832     logger.debug("Success! - EXIT!")
1833     return 0
1834
1835 def remove_invalid(args: argparse.Namespace) -> int:
1836     logger.debug("args[]='%s' - CALLED!", type(args))
1837
1838     logger.debug("Invoking locking.acquire() ...")
1839     locking.acquire()
1840
1841     database.cursor.execute("SELECT domain FROM instances ORDER BY domain ASC")
1842     rows = database.cursor.fetchall()
1843
1844     logger.info("Checking %d domains ...", len(rows))
1845     for row in rows:
1846         logger.debug("row[domain]='%s'", row["domain"])
1847         if not validators.domain(row["domain"].split("/")[0]):
1848             logger.info("Invalid row[domain]='%s' found, removing ...", row["domain"])
1849             database.cursor.execute("DELETE FROM blocks WHERE blocker = ? OR blocked = ?", [row["domain"], row["domain"]])
1850             database.cursor.execute("DELETE FROM instances WHERE domain = ? LIMIT 1", [row["domain"]])
1851
1852     logger.debug("Invoking commit() ...")
1853     database.connection.commit()
1854
1855     logger.info("Vaccum cleaning database ...")
1856     database.cursor.execute("VACUUM")
1857
1858     logger.debug("Success! - EXIT!")
1859     return 0