]> git.mxchange.org Git - fba.git/blob - fba/commands.py
Continued:
[fba.git] / fba / commands.py
1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
3 #
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
8 #
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 # GNU Affero General Public License for more details.
13 #
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program.  If not, see <https://www.gnu.org/licenses/>.
16
17 import csv
18 import inspect
19 import json
20 import logging
21 import time
22
23 from urllib.parse import urlparse
24
25 import argparse
26 import atoma
27 import bs4
28 import markdown
29 import reqto
30 import validators
31
32 from fba import database
33 from fba import utils
34
35 from fba.helpers import blacklist
36 from fba.helpers import blocklists
37 from fba.helpers import config
38 from fba.helpers import cookies
39 from fba.helpers import dicts as dict_helper
40 from fba.helpers import domain as domain_helper
41 from fba.helpers import locking
42 from fba.helpers import processing
43 from fba.helpers import software as software_helper
44 from fba.helpers import tidyup
45
46 from fba.http import csrf
47 from fba.http import federation
48 from fba.http import network
49
50 from fba.models import blocks
51 from fba.models import instances
52 from fba.models import sources
53
54 from fba.networks import friendica
55 from fba.networks import lemmy
56 from fba.networks import mastodon
57 from fba.networks import misskey
58 from fba.networks import pleroma
59
60 logging.basicConfig(level=logging.INFO)
61 logger = logging.getLogger(__name__)
62 #logger.setLevel(logging.DEBUG)
63
64 def check_instance(args: argparse.Namespace) -> int:
65     logger.debug("args.domain='%s' - CALLED!", args.domain)
66
67     status = 0
68     if not validators.domain(args.domain):
69         logger.warning("args.domain='%s' is not valid", args.domain)
70         status = 100
71     elif blacklist.is_blacklisted(args.domain):
72         logger.warning("args.domain='%s' is blacklisted", args.domain)
73         status = 101
74     elif instances.is_registered(args.domain):
75         logger.warning("args.domain='%s' is already registered", args.domain)
76         status = 102
77     else:
78         logger.info("args.domain='%s' is not known", args.domain)
79
80     logger.debug("status=%d - EXIT!", status)
81     return status
82
83 def check_nodeinfo(args: argparse.Namespace) -> int:
84     logger.debug("args[]='%s' - CALLED!", type(args))
85
86     # Fetch rows
87     database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE nodeinfo_url IS NOT NULL ORDER BY domain ASC")
88
89     cnt = 0
90     for row in database.cursor.fetchall():
91         logger.debug("Checking row[domain]='%s',row[software]='%s',row[nodeinfo_url]='%s' ...", row["domain"], row["software"], row["nodeinfo_url"])
92         punycode = row["domain"].encode("idna").decode("utf-8")
93
94         if row["nodeinfo_url"].startswith("/"):
95             logger.debug("row[nodeinfo_url]='%s' is a relative URL and always matches", row["nodeinfo_url"])
96             continue
97         elif row["nodeinfo_url"].find(punycode) == -1 and row["nodeinfo_url"].find(row["domain"]) == -1:
98             logger.warning("punycode='%s' is not found in row[nodeinfo_url]='%s',row[software]='%s'", punycode, row["nodeinfo_url"], row["software"])
99             cnt = cnt + 1
100
101     logger.info("Found %d row(s)", cnt)
102
103     logger.debug("EXIT!")
104     return 0
105
106 def fetch_pixelfed_api(args: argparse.Namespace) -> int:
107     logger.debug("args[]='%s' - CALLED!", type(args))
108
109     # No CSRF by default, you don't have to add network.source_headers by yourself here
110     headers = tuple()
111     source_domain = "pixelfed.org"
112
113     if sources.is_recent(source_domain):
114         logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
115         return 1
116     else:
117         logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
118         sources.update(source_domain)
119
120     try:
121         logger.debug("Checking CSRF from source_domain='%s' ...", source_domain)
122         headers = csrf.determine(source_domain, dict())
123     except network.exceptions as exception:
124         logger.warning("Exception '%s' during checking CSRF (fetch_peers,%s) - EXIT!", type(exception), __name__)
125         return list()
126
127     try:
128         logger.info("Fetching JSON from pixelfed.org API, headers()=%d ...", len(headers))
129         fetched = network.get_json_api(
130             source_domain,
131             "/api/v1/servers/all.json?scope=All&country=all&language=all",
132             headers,
133             (config.get("connection_timeout"), config.get("read_timeout"))
134         )
135
136         logger.debug("fetched(%d)[]='%s'", len(fetched), type(fetched))
137         if "error_message" in fetched:
138             logger.warning("API returned error_message='%s' - EXIT!", fetched["error_message"])
139             return 101
140         elif "data" not in fetched["json"]:
141             logger.warning("API did not return JSON with 'data' element - EXIT!")
142             return 102
143
144         rows = fetched["json"]["data"]
145         logger.info("Checking %d fetched rows ...", len(rows))
146         for row in rows:
147             logger.debug("row[]='%s'", type(row))
148             if "domain" not in row:
149                 logger.warning("row='%s' does not contain element 'domain' - SKIPPED!", row)
150                 continue
151             elif row["domain"] in [None, ""]:
152                 logger.debug("row[domain]='%s' is empty - SKIPPED!", row["domain"])
153                 continue
154
155             logger.debug("row[domain]='%s' - BEFORE!", row["domain"])
156             domain = row["domain"].encode("idna").decode("utf-8")
157             logger.debug("domain='%s' - AFTER!", domain)
158
159             if not domain_helper.is_wanted(domain):
160                 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
161                 continue
162             elif instances.is_registered(domain):
163                 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
164                 continue
165             elif instances.is_recent(domain):
166                 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
167                 continue
168
169             logger.debug("Fetching instances from domain='%s' ...", domain)
170             federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
171
172     except network.exceptions as exception:
173         logger.warning("Cannot fetch graphql,exception[%s]:'%s' - EXIT!", type(exception), str(exception))
174         return 103
175
176     logger.debug("Success! - EXIT!")
177     return 0
178
179 def fetch_bkali(args: argparse.Namespace) -> int:
180     logger.debug("args[]='%s' - CALLED!", type(args))
181
182     logger.debug("Invoking locking.acquire() ...")
183     locking.acquire()
184
185     source_domain = "gql.api.bka.li"
186     if sources.is_recent(source_domain):
187         logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
188         return 1
189     else:
190         logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
191         sources.update(source_domain)
192
193     domains = list()
194     try:
195         logger.info("Fetching domainlist from source_domain='%s' ...", source_domain)
196         fetched = network.post_json_api(
197             source_domain,
198             "/v1/graphql",
199             json.dumps({
200                 "query": "query domainlist {nodeinfo(order_by: {domain: asc}) {domain}}"
201             })
202         )
203
204         logger.debug("fetched[]='%s'", type(fetched))
205         if "error_message" in fetched:
206             logger.warning("post_json_api() for 'gql.sources.bka.li' returned error message='%s", fetched["error_message"])
207             return 100
208         elif isinstance(fetched["json"], dict) and "error" in fetched["json"] and "message" in fetched["json"]["error"]:
209             logger.warning("post_json_api() returned error: '%s", fetched["error"]["message"])
210             return 101
211
212         rows = fetched["json"]
213
214         logger.debug("rows(%d)[]='%s'", len(rows), type(rows))
215         if len(rows) == 0:
216             raise Exception("WARNING: Returned no records")
217         elif "data" not in rows:
218             raise Exception(f"WARNING: rows()={len(rows)} does not contain key 'data'")
219         elif "nodeinfo" not in rows["data"]:
220             raise Exception(f"WARNING: rows()={len(rows['data'])} does not contain key 'nodeinfo'")
221
222         for entry in rows["data"]["nodeinfo"]:
223             logger.debug("entry[%s]='%s'", type(entry), entry)
224             if "domain" not in entry:
225                 logger.warning("entry()=%d does not contain 'domain' - SKIPPED!", len(entry))
226                 continue
227             elif entry["domain"] in [None, ""]:
228                 logger.debug("entry[domain]='%s' is empty - SKIPPED!", entry["domain"])
229                 continue
230             elif not domain_helper.is_wanted(entry["domain"]):
231                 logger.debug("entry[domain]='%s' is not wanted - SKIPPED!", entry["domain"])
232                 continue
233             elif instances.is_registered(entry["domain"]):
234                 logger.debug("entry[domain]='%s' is already registered - SKIPPED!", entry["domain"])
235                 continue
236             elif instances.is_recent(entry["domain"]):
237                 logger.debug("entry[domain]='%s' has been recently crawled - SKIPPED!", entry["domain"])
238                 continue
239
240             logger.debug("Adding domain='%s' ...", entry["domain"])
241             domains.append(entry["domain"])
242
243     except network.exceptions as exception:
244         logger.warning("Cannot fetch graphql,exception[%s]:'%s' - EXIT!", type(exception), str(exception))
245         return 102
246
247     logger.debug("domains()=%d", len(domains))
248     if len(domains) > 0:
249         logger.info("Adding %d new instances ...", len(domains))
250         for domain in domains:
251             logger.debug("domain='%s' - BEFORE!", domain)
252             domain = domain.encode("idna").decode("utf-8")
253             logger.debug("domain='%s' - AFTER!", domain)
254
255             try:
256                 logger.info("Fetching instances from domain='%s' ...", domain)
257                 federation.fetch_instances(domain, "tak.teleyal.blog", None, inspect.currentframe().f_code.co_name)
258             except network.exceptions as exception:
259                 logger.warning("Exception '%s' during fetching instances (fetch_bkali) from domain='%s'", type(exception), domain)
260                 instances.set_last_error(domain, exception)
261                 return 100
262
263     logger.debug("Success - EXIT!")
264     return 0
265
266 def fetch_blocks(args: argparse.Namespace) -> int:
267     logger.debug("args[]='%s' - CALLED!", type(args))
268     if args.domain is not None and args.domain != "":
269         logger.debug("args.domain='%s' - checking ...", args.domain)
270         if not validators.domain(args.domain):
271             logger.warning("args.domain='%s' is not valid.", args.domain)
272             return 100
273         elif blacklist.is_blacklisted(args.domain):
274             logger.warning("args.domain='%s' is blacklisted, won't check it!", args.domain)
275             return 101
276         elif not instances.is_registered(args.domain):
277             logger.warning("args.domain='%s' is not registered, please run ./utils.py fetch_instances '%s' first.", args.domain, args.domain)
278             return 102
279
280     logger.debug("Invoking locking.acquire() ...")
281     locking.acquire()
282
283     if args.domain is not None and args.domain != "":
284         # Re-check single domain
285         logger.debug("Querying database for args.domain='%s' ...", args.domain)
286         database.cursor.execute(
287             "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE domain = ? LIMIT 1", [args.domain]
288         )
289     elif args.software is not None and args.software != "":
290         # Re-check single software
291         logger.debug("Querying database for args.software='%s' ...", args.software)
292         database.cursor.execute(
293             "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software = ? AND nodeinfo_url IS NOT NULL ORDER BY total_blocks DESC, last_response_time ASC, last_updated ASC", [args.software]
294         )
295     elif args.only_none:
296         # Check only entries with total_blocked=None
297         database.cursor.execute(
298             "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'lemmy', 'friendica', 'misskey') AND total_blocks IS NULL ORDER BY total_blocks DESC, last_response_time ASC, last_updated ASC"
299         )
300     else:
301         # Re-check after "timeout" (aka. minimum interval)
302         database.cursor.execute(
303             "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'lemmy', 'friendica', 'misskey') AND nodeinfo_url IS NOT NULL ORDER BY total_blocks DESC, last_response_time ASC, last_updated ASC"
304         )
305
306     rows = database.cursor.fetchall()
307     logger.info("Checking %d entries ...", len(rows))
308     for blocker, software, origin, nodeinfo_url in rows:
309         logger.debug("blocker='%s',software='%s',origin='%s',nodeinfo_url='%s'", blocker, software, origin, nodeinfo_url)
310
311         if not domain_helper.is_wanted(blocker):
312             logger.warning("blocker='%s' is not wanted - SKIPPED!", blocker)
313             continue
314         elif not args.force and instances.is_recent(blocker, "last_blocked"):
315             logger.debug("blocker='%s' has been recently accessed - SKIPPED!", blocker)
316             continue
317
318         logger.debug("Setting last_blocked,has_obfuscation=false for blocker='%s' ...", blocker)
319         instances.set_last_blocked(blocker)
320         instances.set_has_obfuscation(blocker, False)
321
322         # c.s isn't part of oliphant's "hidden" blocklists
323         if blocker == "chaos.social" or software_helper.is_relay(software) or blocklists.has(blocker):
324             logger.debug("Skipping blocker='%s', run ./fba.py fetch_cs, fetch_oliphant, fetch_csv instead!", blocker)
325             continue
326
327         logger.debug("Invoking federation.fetch_blocks(%s) ...", blocker)
328         blocking = federation.fetch_blocks(blocker)
329
330         logger.debug("blocker='%s',software='%s',blocking()=%d", blocker, software, len(blocking))
331         if len(blocking) == 0:
332             logger.debug("blocker='%s',software='%s' - fetching blocklist ...", blocker, software)
333             if software == "pleroma":
334                 blocking = pleroma.fetch_blocks(blocker)
335                 logger.debug("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software)
336             elif software == "mastodon":
337                 blocking = mastodon.fetch_blocks(blocker)
338                 logger.debug("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software)
339             elif software == "lemmy":
340                 blocking = lemmy.fetch_blocks(blocker)
341                 logger.debug("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software)
342             elif software == "friendica":
343                 blocking = friendica.fetch_blocks(blocker)
344                 logger.debug("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software)
345             elif software == "misskey":
346                 blocking = misskey.fetch_blocks(blocker)
347                 logger.debug("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software)
348             else:
349                 logger.warning("Unknown software: blocker='%s',software='%s'", blocker, software)
350
351         logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
352         instances.set_total_blocks(blocker, blocking)
353
354         blockdict = list()
355         deobfuscated = obfuscated = 0
356
357         logger.info("Checking %d entries from blocker='%s',software='%s' ...", len(blocking), blocker, software)
358         for block in blocking:
359             logger.debug("blocked='%s',block_level='%s',reason='%s'", block["blocked"], block["block_level"], block["reason"])
360
361             if block["block_level"] == "":
362                 logger.warning("block_level is empty, blocker='%s',blocked='%s'", block["blocker"], block["blocked"])
363                 continue
364
365             logger.debug("blocked='%s',reason='%s' - BEFORE!", block["blocked"], block["reason"])
366             block["blocked"] = tidyup.domain(block["blocked"])
367             block["reason"]  = tidyup.reason(block["reason"]) if block["reason"] is not None and block["reason"] != "" else None
368             logger.debug("blocked='%s',reason='%s' - AFTER!", block["blocked"], block["reason"])
369
370             if block["blocked"] in [None, ""]:
371                 logger.warning("block[blocked]='%s' is empty, blocker='%s'", block["blocked"], blocker)
372                 continue
373             elif block["blocked"].endswith(".onion"):
374                 logger.debug("blocked='%s' is a TOR .onion domain - SKIPPED", block["blocked"])
375                 continue
376             elif block["blocked"].endswith(".i2p") and config.get("allow_i2p_domain") == "true":
377                 logger.debug("blocked='%s' is an I2P .onion domain - SKIPPED", block["blocked"])
378                 continue
379             elif block["blocked"].endswith(".arpa"):
380                 logger.debug("blocked='%s' is a reverse IP address - SKIPPED", block["blocked"])
381                 continue
382             elif block["blocked"].endswith(".tld"):
383                 logger.debug("blocked='%s' is a fake domain - SKIPPED", block["blocked"])
384                 continue
385             elif block["blocked"].find("*") >= 0:
386                 logger.debug("blocker='%s' uses obfuscated domains", blocker)
387                 instances.set_has_obfuscation(blocker, True)
388                 obfuscated = obfuscated + 1
389
390                 # Some friendica servers also obscure domains without hash
391                 row = instances.deobfuscate("*", block["blocked"], block["digest"] if "digest" in block else None)
392
393                 logger.debug("row[]='%s'", type(row))
394                 if row is None:
395                     logger.warning("Cannot deobfuscate blocked='%s',blocker='%s',software='%s' - SKIPPED!", block["blocked"], blocker, software)
396                     continue
397
398                 deobfuscated = deobfuscated + 1
399                 block["blocked"] = row["domain"]
400                 origin           = row["origin"]
401                 nodeinfo_url     = row["nodeinfo_url"]
402             elif block["blocked"].find("?") >= 0:
403                 logger.debug("blocker='%s' uses obfuscated domains", blocker)
404                 instances.set_has_obfuscation(blocker, True)
405                 obfuscated = obfuscated + 1
406
407                 # Some obscure them with question marks, not sure if that's dependent on version or not
408                 row = instances.deobfuscate("?", block["blocked"], block["digest"] if "digest" in block else None)
409
410                 logger.debug("row[]='%s'", type(row))
411                 if row is None:
412                     logger.warning("Cannot deobfuscate blocked='%s',blocker='%s',software='%s' - SKIPPED!", block["blocked"], blocker, software)
413                     continue
414
415                 deobfuscated = deobfuscated + 1
416                 block["blocked"] = row["domain"]
417                 origin           = row["origin"]
418                 nodeinfo_url     = row["nodeinfo_url"]
419
420             logger.debug("Looking up instance by domain, blocked='%s'", block["blocked"])
421             if block["blocked"] in [None, ""]:
422                 logger.debug("block[blocked]='%s' is empty - SKIPPED!", block["blocked"])
423                 continue
424
425             logger.debug("block[blocked]='%s' - BEFORE!", block["blocked"])
426             block["blocked"] = block["blocked"].lstrip(".").encode("idna").decode("utf-8")
427             logger.debug("block[blocked]='%s' - AFTER!", block["blocked"])
428
429             if not domain_helper.is_wanted(block["blocked"]):
430                 logger.debug("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
431                 continue
432             elif block["block_level"] in ["accept", "accepted"]:
433                 logger.debug("blocked='%s' is accepted, not wanted here - SKIPPED!", block["blocked"])
434                 continue
435             elif not instances.is_registered(block["blocked"]):
436                 logger.debug("Hash wasn't found, adding: blocked='%s',blocker='%s'", block["blocked"], blocker)
437                 federation.fetch_instances(block["blocked"], blocker, None, inspect.currentframe().f_code.co_name)
438
439             block["block_level"] = blocks.alias_block_level(block["block_level"])
440
441             if processing.block(blocker, block["blocked"], block["reason"], block["block_level"]) and block["block_level"] == "reject" and config.get("bot_enabled"):
442                 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], blocker)
443                 blockdict.append({
444                     "blocked": block["blocked"],
445                     "reason" : block["reason"],
446                 })
447
448             logger.debug("Invoking cookies.clear(%s) ...", block["blocked"])
449             cookies.clear(block["blocked"])
450
451         logger.info("blocker='%s' has %d obfuscated domain(s) and %d of them could be deobfuscated.", blocker, obfuscated, deobfuscated)
452         instances.set_obfuscated_blocks(blocker, obfuscated)
453
454         logger.debug("Flushing updates for blocker='%s' ...", blocker)
455         instances.update(blocker)
456
457         logger.debug("Invoking commit() ...")
458         database.connection.commit()
459
460         logger.debug("Invoking cookies.clear(%s) ...", blocker)
461         cookies.clear(blocker)
462
463         logger.debug("config.get(bot_enabled)='%s',blockdict()=%d'", config.get("bot_enabled"), len(blockdict))
464         if config.get("bot_enabled") and len(blockdict) > 0:
465             logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
466             network.send_bot_post(blocker, blockdict)
467
468     logger.debug("Success! - EXIT!")
469     return 0
470
471 def fetch_observer(args: argparse.Namespace) -> int:
472     logger.debug("args[]='%s' - CALLED!", type(args))
473
474     logger.debug("Invoking locking.acquire() ...")
475     locking.acquire()
476
477     source_domain = "fediverse.observer"
478     if sources.is_recent(source_domain):
479         logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
480         return 1
481     else:
482         logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
483         sources.update(source_domain)
484
485     types = list()
486     if args.software is None:
487         logger.info("Fetching software list ...")
488         raw = utils.fetch_url(
489             f"https://{source_domain}",
490             network.web_headers,
491             (config.get("connection_timeout"), config.get("read_timeout"))
492         ).text
493         logger.debug("raw[%s]()=%d", type(raw), len(raw))
494
495         doc = bs4.BeautifulSoup(raw, features="html.parser")
496         logger.debug("doc[]='%s'", type(doc))
497
498         navbar = doc.find("div", {"aria-labelledby": "navbarDropdownMenuSoftwares"})
499         logger.debug("navbar[]='%s'", type(navbar))
500         if navbar is None:
501             logger.warning("Cannot find navigation bar, cannot continue!")
502             return 1
503
504         items = navbar.findAll("a", {"class": "dropdown-item"})
505         logger.debug("items[]='%s'", type(items))
506
507         logger.info("Checking %d menu items ...", len(items))
508         for item in items:
509             logger.debug("item[%s]='%s'", type(item), item)
510             if item.text.lower() == "all":
511                 logger.debug("Skipping 'All' menu entry ...")
512                 continue
513
514             logger.debug("Appending item.text='%s' ...", item.text)
515             types.append(tidyup.domain(item.text))
516     else:
517         logger.info("Adding args.software='%s' as type ...", args.software)
518         types.append(args.software)
519
520     logger.info("Fetching %d different table data ...", len(types))
521     for software in types:
522         logger.debug("software='%s'", software)
523
524         if args.software is not None and args.software != software:
525             logger.debug("args.software='%s' does not match software='%s' - SKIPPED!", args.software, software)
526             continue
527
528         doc = None
529         try:
530             logger.debug("Fetching table data for software='%s' ...", software)
531             raw = utils.fetch_url(
532                 f"https://{source_domain}/app/views/tabledata.php?software={software}",
533                 network.web_headers,
534                 (config.get("connection_timeout"), config.get("read_timeout"))
535             ).text
536             logger.debug("raw[%s]()=%d", type(raw), len(raw))
537
538             doc = bs4.BeautifulSoup(raw, features="html.parser")
539             logger.debug("doc[]='%s'", type(doc))
540         except network.exceptions as exception:
541             logger.warning("Cannot fetch software='%s' from source_domain='%s': '%s'", software, source_domain, type(exception))
542             continue
543
544         items = doc.findAll("a", {"class": "url"})
545         logger.info("Checking %d items,software='%s' ...", len(items), software)
546         for item in items:
547             logger.debug("item[]='%s'", type(item))
548             domain = item.decode_contents()
549             logger.debug("domain[%s]='%s'", type(domain), domain)
550             domain = tidyup.domain(domain) if domain not in [None, ""] else None
551             logger.debug("domain='%s' - AFTER!", domain)
552
553             if domain in [None, ""]:
554                 logger.debug("domain='%s' is empty after tidyup.domain() - SKIPPED!", domain)
555                 continue
556
557             logger.debug("domain='%s' - BEFORE!", domain)
558             domain = domain.encode("idna").decode("utf-8")
559             logger.debug("domain='%s' - AFTER!", domain)
560
561             if not domain_helper.is_wanted(domain):
562                 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
563                 continue
564             elif instances.is_registered(domain):
565                 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
566                 continue
567
568             logger.info("Fetching instances for domain='%s' ...", domain)
569             federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
570
571     logger.debug("Success! - EXIT!")
572     return 0
573
574 def fetch_todon_wiki(args: argparse.Namespace) -> int:
575     logger.debug("args[]='%s' - CALLED!", type(args))
576
577     logger.debug("Invoking locking.acquire() ...")
578     locking.acquire()
579
580     source_domain = "wiki.todon.eu"
581     if sources.is_recent(source_domain):
582         logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
583         return 1
584     else:
585         logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
586         sources.update(source_domain)
587
588     blocklist = {
589         "silenced": list(),
590         "reject": list(),
591     }
592
593     logger.debug("Fetching domainblocks from source_domain='%s'", source_domain)
594     raw = utils.fetch_url(
595         f"https://{source_domain}/todon/domainblocks",
596         network.web_headers,
597         (config.get("connection_timeout"), config.get("read_timeout"))
598     ).text
599     logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
600
601     doc = bs4.BeautifulSoup(raw, "html.parser")
602     logger.debug("doc[]='%s'", type(doc))
603
604     silenced = doc.find("h3", {"id": "silencedlimited_servers"}).find_next("ul").findAll("li")
605     logger.info("Checking %d silenced/limited entries ...", len(silenced))
606     blocklist["silenced"] = utils.find_domains(silenced, "div")
607
608     suspended = doc.find("h3", {"id": "suspended_servers"}).find_next("ul").findAll("li")
609     logger.info("Checking %d suspended entries ...", len(suspended))
610     blocklist["reject"] = utils.find_domains(suspended, "div")
611
612     blocking = blocklist["silenced"] + blocklist["reject"]
613     blocker = "todon.eu"
614
615     logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
616     instances.set_last_blocked(blocker)
617     instances.set_total_blocks(blocker, blocking)
618
619     blockdict = list()
620     for block_level in blocklist:
621         blockers = blocklist[block_level]
622
623         logger.debug("block_level='%s',blockers()=%d'", block_level, len(blockers))
624         for blocked in blockers:
625             logger.debug("blocked='%s'", blocked)
626
627             if not instances.is_registered(blocked):
628                 try:
629                     logger.info("Fetching instances from domain='%s' ...", blocked)
630                     federation.fetch_instances(blocked, blocker, None, inspect.currentframe().f_code.co_name)
631                 except network.exceptions as exception:
632                     logger.warning("Exception '%s' during fetching instances (fetch_cs) from blocked='%s'", type(exception), blocked)
633                     instances.set_last_error(blocked, exception)
634
635             if not domain_helper.is_wanted(blocked):
636                 logger.warning("blocked='%s' is not wanted - SKIPPED!", blocked)
637                 continue
638             elif not domain_helper.is_wanted(blocker):
639                 logger.warning("blocker='%s' is not wanted - SKIPPED!", blocker)
640                 continue
641             elif blocks.is_instance_blocked(blocker, blocked, block_level):
642                 logger.debug("blocked='%s',block_level='%s' is already blocked - SKIPPED!", blocked, block_level)
643                 continue
644
645             logger.info("Adding new block: blocked='%s',block_level='%s'", blocked, block_level)
646             if processing.block(blocker, blocked, None, block_level) and block_level == "reject" and config.get("bot_enabled"):
647                 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", blocked, block_level, blocker)
648                 blockdict.append({
649                     "blocked": blocked,
650                     "reason" : None,
651                 })
652
653         logger.debug("Invoking commit() ...")
654         database.connection.commit()
655
656         logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
657         if config.get("bot_enabled") and len(blockdict) > 0:
658             logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
659             network.send_bot_post(blocker, blockdict)
660
661     logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
662     if instances.has_pending(blocker):
663         logger.debug("Flushing updates for blocker='%s' ...", blocker)
664         instances.update(blocker)
665
666     logger.debug("Success! - EXIT!")
667     return 0
668
669 def fetch_cs(args: argparse.Namespace):
670     logger.debug("args[]='%s' - CALLED!", type(args))
671
672     logger.debug("Invoking locking.acquire() ...")
673     locking.acquire()
674
675     extensions = [
676         "extra",
677         "abbr",
678         "attr_list",
679         "def_list",
680         "fenced_code",
681         "footnotes",
682         "md_in_html",
683         "admonition",
684         "codehilite",
685         "legacy_attrs",
686         "legacy_em",
687         "meta",
688         "nl2br",
689         "sane_lists",
690         "smarty",
691         "toc",
692         "wikilinks"
693     ]
694
695     blocklist = {
696         "silenced": list(),
697         "reject"  : list(),
698     }
699
700     source_domain = "raw.githubusercontent.com"
701     if sources.is_recent(source_domain):
702         logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
703         return 1
704     else:
705         logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
706         sources.update(source_domain)
707
708     logger.info("Fetching federation.md from source_domain='%s' ...", source_domain)
709     raw = utils.fetch_url(
710         f"https://{source_domain}/chaossocial/meta/master/federation.md",
711         network.web_headers,
712         (config.get("connection_timeout"), config.get("read_timeout"))
713     ).text
714     logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
715
716     doc = bs4.BeautifulSoup(markdown.markdown(raw, extensions=extensions), features="html.parser")
717     logger.debug("doc()=%d[]='%s'", len(doc), type(doc))
718
719     silenced = doc.find("h2", {"id": "silenced-instances"}).findNext("table").find("tbody")
720     logger.debug("silenced[%s]()=%d", type(silenced), len(silenced))
721     blocklist["silenced"] = federation.find_domains(silenced)
722
723     blocked = doc.find("h2", {"id": "blocked-instances"}).findNext("table").find("tbody")
724     logger.debug("blocked[%s]()=%d", type(blocked), len(blocked))
725     blocklist["reject"] = federation.find_domains(blocked)
726
727     blocking = blocklist["silenced"] + blocklist["reject"]
728     blocker = "chaos.social"
729
730     logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
731     instances.set_last_blocked(blocker)
732     instances.set_total_blocks(blocker, blocking)
733
734     logger.debug("blocklist[silenced]()=%d,blocklist[reject]()=%d", len(blocklist["silenced"]), len(blocklist["reject"]))
735     if len(blocking) > 0:
736         blockdict = list()
737         for block_level in blocklist:
738             logger.info("block_level='%s' has %d row(s)", block_level, len(blocklist[block_level]))
739
740             for row in blocklist[block_level]:
741                 logger.debug("row[%s]='%s'", type(row), row)
742                 if not "domain" in row:
743                     logger.warning("row[]='%s' has no element 'domain' - SKIPPED!", type(row))
744                     continue
745                 elif not instances.is_registered(row["domain"]):
746                     try:
747                         logger.info("Fetching instances from domain='%s' ...", row["domain"])
748                         federation.fetch_instances(row["domain"], blocker, None, inspect.currentframe().f_code.co_name)
749                     except network.exceptions as exception:
750                         logger.warning("Exception '%s' during fetching instances (fetch_cs) from row[domain]='%s'", type(exception), row["domain"])
751                         instances.set_last_error(row["domain"], exception)
752
753                 if processing.block(blocker, row["domain"], row["reason"], block_level) and block_level == "reject" and config.get("bot_enabled"):
754                     logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", row["domain"], block_level, blocker)
755                     blockdict.append({
756                         "blocked": row["domain"],
757                         "reason" : row["reason"],
758                     })
759
760         logger.debug("Invoking commit() ...")
761         database.connection.commit()
762
763         logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
764         if config.get("bot_enabled") and len(blockdict) > 0:
765             logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
766             network.send_bot_post(blocker, blockdict)
767
768     logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
769     if instances.has_pending(blocker):
770         logger.debug("Flushing updates for blocker='%s' ...", blocker)
771         instances.update(blocker)
772
773     logger.debug("Success! - EXIT!")
774     return 0
775
776 def fetch_fba_rss(args: argparse.Namespace) -> int:
777     logger.debug("args[]='%s' - CALLED!", type(args))
778
779     domains = list()
780
781     logger.debug("Invoking locking.acquire() ...")
782     locking.acquire()
783
784     components = urlparse(args.feed)
785     domain = components.netloc.lower().split(":")[0]
786
787     logger.debug("domain='%s'", domain)
788     if sources.is_recent(domain):
789         logger.info("API from domain='%s' has recently being accessed - EXIT!", domain)
790         return 0
791     else:
792         logger.debug("domain='%s' has not been recently used, marking ...", domain)
793         sources.update(domain)
794
795     logger.info("Fetch FBA-specific RSS args.feed='%s' ...", args.feed)
796     response = utils.fetch_url(args.feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
797
798     logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
799     if response.ok and response.status_code == 200 and len(response.text) > 0:
800         logger.debug("Parsing RSS feed (%d Bytes) ...", len(response.text))
801         rss = atoma.parse_rss_bytes(response.content)
802
803         logger.debug("rss[]='%s'", type(rss))
804         for item in rss.items:
805             logger.debug("item[%s]='%s'", type(item), item)
806             domain = item.link.split("=")[1]
807             domain = tidyup.domain(domain) if domain not in[None, ""] else None
808
809             logger.debug("domain='%s' - AFTER!", domain)
810             if domain in [None, ""]:
811                 logger.debug("domain='%s' is empty after tidyup.domain() - SKIPPED!", domain)
812                 continue
813
814             logger.debug("domain='%s' - BEFORE!", domain)
815             domain = domain.encode("idna").decode("utf-8")
816             logger.debug("domain='%s' - AFTER!", domain)
817
818             if not domain_helper.is_wanted(domain):
819                 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
820                 continue
821             elif domain in domains:
822                 logger.debug("domain='%s' is already added - SKIPPED!", domain)
823                 continue
824             elif instances.is_registered(domain):
825                 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
826                 continue
827             elif instances.is_recent(domain):
828                 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
829                 continue
830
831             logger.debug("Adding domain='%s'", domain)
832             domains.append(domain)
833
834     logger.debug("domains()=%d", len(domains))
835     if len(domains) > 0:
836         logger.info("Adding %d new instances ...", len(domains))
837         for domain in domains:
838             logger.debug("domain='%s'", domain)
839             try:
840                 logger.info("Fetching instances from domain='%s' ...", domain)
841                 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
842             except network.exceptions as exception:
843                 logger.warning("Exception '%s' during fetching instances (fetch_fba_rss) from domain='%s'", type(exception), domain)
844                 instances.set_last_error(domain, exception)
845                 return 100
846
847     logger.debug("Success! - EXIT!")
848     return 0
849
850 def fetch_fbabot_atom(args: argparse.Namespace) -> int:
851     logger.debug("args[]='%s' - CALLED!", type(args))
852
853     logger.debug("Invoking locking.acquire() ...")
854     locking.acquire()
855
856     source_domain = "ryona.agency"
857     feed = f"https://{source_domain}/users/fba/feed.atom"
858
859     logger.debug("args.feed[%s]='%s'", type(args.feed), args.feed)
860     if args.feed is not None and validators.url(args.feed):
861         logger.debug("Setting feed='%s' ...", args.feed)
862         feed = str(args.feed)
863         source_domain = urlparse(args.feed).netloc
864
865     if sources.is_recent(source_domain):
866         logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
867         return 1
868     else:
869         logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
870         sources.update(source_domain)
871
872     domains = list()
873
874     logger.info("Fetching ATOM feed='%s' from FBA bot account ...", feed)
875     response = utils.fetch_url(feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
876
877     logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
878     if response.ok and response.status_code == 200 and len(response.text) > 0:
879         logger.debug("Parsing ATOM feed (%d Bytes) ...", len(response.text))
880         atom = atoma.parse_atom_bytes(response.content)
881
882         logger.debug("atom[]='%s'", type(atom))
883         for entry in atom.entries:
884             logger.debug("entry[]='%s'", type(entry))
885             doc = bs4.BeautifulSoup(entry.content.value, "html.parser")
886             logger.debug("doc[]='%s'", type(doc))
887             elements = doc.findAll("a")
888
889             logger.debug("Checking %d element(s) ...", len(elements))
890             for element in elements:
891                 logger.debug("element[%s]='%s'", type(element), element)
892                 for href in element["href"].split(","):
893                     logger.debug("href[%s]='%s' - BEFORE!", type(href), href)
894                     domain = tidyup.domain(href) if href not in [None, ""] else None
895
896                     logger.debug("domain='%s' - AFTER!", domain)
897                     if domain in [None, ""]:
898                         logger.debug("domain='%s' is empty after tidyup.domain() - SKIPPED!", domain)
899                         continue
900
901                     logger.debug("domain='%s' - BEFORE!", domain)
902                     domain = domain.encode("idna").decode("utf-8")
903                     logger.debug("domain='%s' - AFTER!", domain)
904
905                     if not domain_helper.is_wanted(domain):
906                         logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
907                         continue
908                     elif domain in domains:
909                         logger.debug("domain='%s' is already added - SKIPPED!", domain)
910                         continue
911                     elif instances.is_registered(domain):
912                         logger.debug("domain='%s' is already registered - SKIPPED!", domain)
913                         continue
914                     elif instances.is_recent(domain):
915                         logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
916                         continue
917
918                     logger.debug("Adding domain='%s',domains()=%d", domain, len(domains))
919                     domains.append(domain)
920
921     logger.debug("domains()=%d", len(domains))
922     if len(domains) > 0:
923         logger.info("Adding %d new instances ...", len(domains))
924         for domain in domains:
925             logger.debug("domain='%s'", domain)
926             try:
927                 logger.info("Fetching instances from domain='%s' ...", domain)
928                 federation.fetch_instances(domain, source_domain, None, inspect.currentframe().f_code.co_name)
929             except network.exceptions as exception:
930                 logger.warning("Exception '%s' during fetching instances (fetch_fbabot_atom) from domain='%s'", type(exception), domain)
931                 instances.set_last_error(domain, exception)
932                 return 100
933
934     logger.debug("Success! - EXIT!")
935     return 0
936
937 def fetch_instances(args: argparse.Namespace) -> int:
938     logger.debug("args[]='%s' - CALLED!", type(args))
939
940     logger.debug("args.domain='%s' - checking ...", args.domain)
941     if not validators.domain(args.domain):
942         logger.warning("args.domain='%s' is not valid.", args.domain)
943         return 100
944     elif blacklist.is_blacklisted(args.domain):
945         logger.warning("args.domain='%s' is blacklisted, won't check it!", args.domain)
946         return 101
947
948     logger.debug("Invoking locking.acquire() ...")
949     locking.acquire()
950
951     # Initialize values
952     domain = tidyup.domain(args.domain)
953     origin = software = None
954
955     # Fetch record
956     database.cursor.execute("SELECT origin, software FROM instances WHERE domain = ? LIMIT 1", [args.domain])
957     row = database.cursor.fetchone()
958     if row is not None:
959         origin = row["origin"]
960         software = row["software"]
961
962     if software is None:
963         logger.warning("args.domain='%s' has no software detected. You can try to run ./fba.py update_nodeinfo --domain=%s --force to get it updated.", args.domain, args.domain)
964         return 102
965     elif software_helper.is_relay(software):
966         logger.warning("args.domain='%s' is of software type '%s' which is not supported by this command. Please invoke fetch_relays instead.", args.domain, software)
967         return 103
968
969     # Initial fetch
970     try:
971         logger.info("Fetching instances from args.domain='%s',origin='%s',software='%s' ...", domain, origin, software)
972         federation.fetch_instances(domain, origin, software, inspect.currentframe().f_code.co_name)
973     except network.exceptions as exception:
974         logger.warning("Exception '%s' during fetching instances (fetch_instances) from args.domain='%s'", type(exception), args.domain)
975         instances.set_last_error(args.domain, exception)
976         instances.update(args.domain)
977         return 104
978
979     if args.single:
980         logger.debug("Not fetching more instances - EXIT!")
981         return 0
982
983     # Loop through some instances
984     database.cursor.execute(
985         "SELECT domain, origin, software FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'lemmy', 'peertube', 'takahe', 'gotosocial', 'brighteon', 'wildebeest', 'bookwyrm', 'mitra', 'areionskey', 'mammuthus', 'neodb') AND (last_instance_fetch IS NULL OR last_instance_fetch < ?) ORDER BY total_peers DESC, last_response_time ASC, last_updated ASC", [time.time() - config.get("recheck_instance")]
986     )
987
988     rows = database.cursor.fetchall()
989     logger.info("Checking %d entries ...", len(rows))
990     for row in rows:
991         logger.debug("row[domain]='%s' - BEFORE!", row["domain"])
992         domain = row["domain"].encode("idna").decode("utf-8")
993         logger.debug("domain='%s' - AFTER!", domain)
994
995         if not domain_helper.is_wanted(domain):
996             logger.debug("Domain domain='%s' is not wanted - SKIPPED!", domain)
997             continue
998
999         try:
1000             logger.info("Fetching instances for domain='%s',origin='%s',software='%s' ...", domain, row["origin"], row["software"])
1001             federation.fetch_instances(domain, row["origin"], row["software"], inspect.currentframe().f_code.co_name)
1002         except network.exceptions as exception:
1003             logger.warning("Exception '%s' during fetching instances (fetch_instances) from domain='%s'", type(exception), domain)
1004             instances.set_last_error(domain, exception)
1005
1006     logger.debug("Success - EXIT!")
1007     return 0
1008
1009 def fetch_csv(args: argparse.Namespace) -> int:
1010     logger.debug("args[]='%s' - CALLED!", type(args))
1011
1012     logger.debug("Invoking locking.acquire() ...")
1013     locking.acquire()
1014
1015     logger.info("Checking %d CSV files ...", len(blocklists.csv_files))
1016     for block in blocklists.csv_files:
1017         logger.debug("block[blocker]='%s',block[csv_url]='%s'", block["blocker"], block["csv_url"])
1018
1019         # Is domain given and not equal blocker?
1020         if isinstance(args.domain, str) and args.domain != block["blocker"]:
1021             logger.debug("Skipping blocker='%s', not matching args.domain='%s'", block["blocker"], args.domain)
1022             continue
1023
1024         logger.debug("Invoking processing.csv_block(%s, %s, fetch_csv) ...", block["blocker"], block["csv_url"])
1025         processing.csv_block(block["blocker"], block["csv_url"], inspect.currentframe().f_code.co_name)
1026
1027     logger.debug("Success - EXIT!")
1028     return 0
1029
1030 def fetch_oliphant(args: argparse.Namespace) -> int:
1031     logger.debug("args[]='%s' - CALLED!", type(args))
1032
1033     logger.debug("Invoking locking.acquire() ...")
1034     locking.acquire()
1035
1036     source_domain = "codeberg.org"
1037     if sources.is_recent(source_domain):
1038         logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1039         return 1
1040     else:
1041         logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1042         sources.update(source_domain)
1043
1044     # Base URL
1045     base_url = f"https://{source_domain}/oliphant/blocklists/raw/branch/main/blocklists"
1046
1047     logger.debug("Downloading %d files ...", len(blocklists.oliphant_blocklists))
1048     for block in blocklists.oliphant_blocklists:
1049         # Is domain given and not equal blocker?
1050         logger.debug("block[blocker]='%s',block[csv_url]='%s'", block["blocker"], block["csv_url"])
1051         if isinstance(args.domain, str) and args.domain != block["blocker"]:
1052             logger.debug("Skipping blocker='%s', not matching args.domain='%s'", block["blocker"], args.domain)
1053             continue
1054
1055         url = f"{base_url}/{block['csv_url']}"
1056
1057         logger.debug("Invoking processing.csv_block(%s, %s, fetch_oliphant) ...", block["blocker"], url)
1058         processing.csv_block(block["blocker"], url, inspect.currentframe().f_code.co_name)
1059
1060     logger.debug("Success! - EXIT!")
1061     return 0
1062
1063 def fetch_txt(args: argparse.Namespace) -> int:
1064     logger.debug("args[]='%s' - CALLED!", type(args))
1065
1066     logger.debug("Invoking locking.acquire() ...")
1067     locking.acquire()
1068
1069     logger.info("Checking %d text file(s) ...", len(blocklists.txt_files))
1070     for row in blocklists.txt_files:
1071         logger.debug("Fetching row[url]='%s' ...", row["url"])
1072         response = utils.fetch_url(row["url"], network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
1073
1074         logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
1075         if response.ok and response.status_code == 200 and response.text != "":
1076             logger.debug("Returned %d Bytes for processing", len(response.text.strip()))
1077             domains = response.text.strip().split("\n")
1078
1079             logger.info("Processing %d domains ...", len(domains))
1080             for domain in domains:
1081                 logger.debug("domain='%s' - BEFORE!", domain)
1082                 domain = tidyup.domain(domain) if domain not in[None, ""] else None
1083                 logger.debug("domain='%s' - AFTER!", domain)
1084
1085                 if domain in [None, ""]:
1086                     logger.debug("domain='%s' is empty after tidyup.domain() - SKIPPED!", domain)
1087                     continue
1088                 elif not domain_helper.is_wanted(domain):
1089                     logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1090                     continue
1091                 elif not args.force and instances.is_registered(domain):
1092                     logger.debug("domain='%s' is already registered - SKIPPED!", domain)
1093                     continue
1094
1095                 logger.debug("Processing domain='%s',row[blocker]='%s' ...", domain, row["blocker"])
1096                 processed = processing.instance(domain, row["blocker"], inspect.currentframe().f_code.co_name, force=args.force)
1097                 logger.debug("processed='%s'", processed)
1098
1099     logger.debug("Success! - EXIT!")
1100     return 0
1101
1102 def fetch_fedipact(args: argparse.Namespace) -> int:
1103     logger.debug("args[]='%s' - CALLED!", type(args))
1104
1105     logger.debug("Invoking locking.acquire() ...")
1106     locking.acquire()
1107
1108     source_domain = "fedipact.online"
1109     if sources.is_recent(source_domain):
1110         logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1111         return 1
1112     else:
1113         logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1114         sources.update(source_domain)
1115
1116     logger.info("Fetching / from source_domain='%s' ...", source_domain)
1117     response = utils.fetch_url(
1118         f"https://{source_domain}",
1119         network.web_headers,
1120         (config.get("connection_timeout"), config.get("read_timeout"))
1121     )
1122
1123     logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
1124     if response.ok and response.status_code == 200 and response.text != "":
1125         logger.debug("Parsing %d Bytes ...", len(response.text))
1126
1127         doc = bs4.BeautifulSoup(response.text, "html.parser")
1128         logger.debug("doc[]='%s'", type(doc))
1129
1130         rows = doc.findAll("li")
1131         logger.info("Checking %d row(s) ...", len(rows))
1132         for row in rows:
1133             logger.debug("row[]='%s'", type(row))
1134             domain = tidyup.domain(row.contents[0]) if row.contents[0] not in [None, ""] else None
1135
1136             logger.debug("domain='%s' - AFTER!", domain)
1137             if domain in [None, ""]:
1138                 logger.debug("domain='%s' is empty after tidyup.domain() - SKIPPED!", domain)
1139                 continue
1140
1141             logger.debug("domain='%s' - BEFORE!", domain)
1142             domain = domain.encode("idna").decode("utf-8")
1143             logger.debug("domain='%s' - AFTER!", domain)
1144
1145             if not domain_helper.is_wanted(domain):
1146                 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1147                 continue
1148             elif instances.is_registered(domain):
1149                 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
1150                 continue
1151             elif instances.is_recent(domain):
1152                 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1153                 continue
1154
1155             logger.info("Fetching domain='%s' ...", domain)
1156             federation.fetch_instances(domain, "beach.city", None, inspect.currentframe().f_code.co_name)
1157
1158     logger.debug("Success! - EXIT!")
1159     return 0
1160
1161 def fetch_joinmobilizon(args: argparse.Namespace) -> int:
1162     logger.debug("args[]='%s' - CALLED!", type(args))
1163
1164     logger.debug("Invoking locking.acquire() ...")
1165     locking.acquire()
1166
1167     source_domain = "instances.joinmobilizon.org"
1168     if sources.is_recent(source_domain):
1169         logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1170         return 1
1171     else:
1172         logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1173         sources.update(source_domain)
1174
1175     logger.info("Fetching instances from source_domain='%s' ...", source_domain)
1176     raw = utils.fetch_url(
1177         f"https://{source_domain}/api/v1/instances",
1178         network.web_headers,
1179         (config.get("connection_timeout"), config.get("read_timeout"))
1180     ).text
1181     logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
1182
1183     parsed = json.loads(raw)
1184     logger.debug("parsed[%s]()=%d", type(parsed), len(parsed))
1185
1186     if "data" not in parsed:
1187         logger.warning("parsed()=%d does not contain key 'data'")
1188         return 1
1189
1190     logger.info("Checking %d instances ...", len(parsed["data"]))
1191     for row in parsed["data"]:
1192         logger.debug("row[]='%s'", type(row))
1193         if "host" not in row:
1194             logger.warning("row='%s' does not contain key 'host' - SKIPPED!", row)
1195             continue
1196         elif not domain_helper.is_wanted(row["host"]):
1197             logger.debug("row[host]='%s' is not wanted - SKIPPED!", row["host"])
1198             continue
1199         elif instances.is_registered(row["host"]):
1200             logger.debug("row[host]='%s' is already registered - SKIPPED!", row["host"])
1201             continue
1202
1203         logger.info("Fetching row[host]='%s' ...", row["host"])
1204         federation.fetch_instances(row["host"], "demo.mobilizon.org", None, inspect.currentframe().f_code.co_name)
1205
1206     logger.debug("Success! - EXIT!")
1207     return 0
1208
1209 def fetch_joinmisskey(args: argparse.Namespace) -> int:
1210     logger.debug("args[]='%s' - CALLED!", type(args))
1211
1212     logger.debug("Invoking locking.acquire() ...")
1213     locking.acquire()
1214
1215     source_domain = "instanceapp.misskey.page"
1216     if sources.is_recent(source_domain):
1217         logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1218         return 1
1219     else:
1220         logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1221         sources.update(source_domain)
1222
1223     logger.info("Fetching instances.json from source_domain='%s' ...", source_domain)
1224     raw = utils.fetch_url(
1225         f"https://{source_domain}/instances.json",
1226         network.web_headers,
1227         (config.get("connection_timeout"), config.get("read_timeout"))
1228     ).text
1229     logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
1230
1231     parsed = json.loads(raw)
1232     logger.debug("parsed[%s]()=%d", type(parsed), len(parsed))
1233
1234     if "instancesInfos" not in parsed:
1235         logger.warning("parsed()=%d does not contain element 'instancesInfos'")
1236         return 1
1237
1238     logger.info("Checking %d instane(s) ...", len(parsed["instancesInfos"]))
1239     for row in parsed["instancesInfos"]:
1240         logger.debug("row[%s]='%s'", type(row), row)
1241         if "url" not in row:
1242             logger.warning("row()=%d does not have element 'url' - SKIPPED!", len(row))
1243             continue
1244         elif not domain_helper.is_wanted(row["url"]):
1245             logger.debug("row[url]='%s' is not wanted - SKIPPED!", row["url"])
1246             continue
1247         elif instances.is_registered(row["url"]):
1248             logger.debug("row[url]='%s' is already registered - SKIPPED!", row["url"])
1249             continue
1250
1251         logger.info("Fetching row[url]='%s' ...", row["url"])
1252         federation.fetch_instances(row["url"], "misskey.io", None, inspect.currentframe().f_code.co_name)
1253
1254     logger.debug("Success! - EXIT!")
1255     return 0
1256
1257 def recheck_obfuscation(args: argparse.Namespace) -> int:
1258     logger.debug("args[]='%s' - CALLED!", type(args))
1259
1260     logger.debug("Invoking locking.acquire() ...")
1261     locking.acquire()
1262
1263     if isinstance(args.domain, str) and args.domain != "" and domain_helper.is_wanted(args.domain):
1264         database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE (has_obfuscation = 1 OR has_obfuscation IS NULL) AND domain = ?", [args.domain])
1265     elif isinstance(args.software, str) and args.software != "" and validators.domain(args.software) == args.software:
1266         database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE (has_obfuscation = 1 OR has_obfuscation IS NULL) AND software = ?", [args.software])
1267     else:
1268         database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1 OR has_obfuscation IS NULL")
1269
1270     rows = database.cursor.fetchall()
1271     logger.info("Checking %d domains ...", len(rows))
1272     for row in rows:
1273         logger.debug("Fetching peers from domain='%s',software='%s',nodeinfo_url='%s' ...", row["domain"], row["software"], row["nodeinfo_url"])
1274         if (args.force is None or not args.force) and args.domain is None and args.software is None and instances.is_recent(row["domain"], "last_blocked"):
1275             logger.debug("row[domain]='%s' has been recently checked, args.force[]='%s' - SKIPPED!", row["domain"], type(args.force))
1276             continue
1277         elif blacklist.is_blacklisted(row["domain"]):
1278             logger.warning("row[domain]='%s' is blacklisted - SKIPPED!", row["domain"])
1279             continue
1280
1281         logger.debug("Invoking federation.fetch_blocks(%s) ...", row["domain"])
1282         blocking = federation.fetch_blocks(row["domain"])
1283
1284         logger.debug("blocking()=%d", len(blocking))
1285         if len(blocking) == 0:
1286             logger.debug("Empty blocking list, trying individual fetch_blocks() for row[software]='%s' ...", row["software"])
1287             if row["software"] == "pleroma":
1288                 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1289                 blocking = pleroma.fetch_blocks(row["domain"])
1290             elif row["software"] == "mastodon":
1291                 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1292                 blocking = mastodon.fetch_blocks(row["domain"])
1293             elif row["software"] == "lemmy":
1294                 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1295                 blocking = lemmy.fetch_blocks(row["domain"])
1296             elif row["software"] == "friendica":
1297                 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1298                 blocking = friendica.fetch_blocks(row["domain"])
1299             elif row["software"] == "misskey":
1300                 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1301                 blocking = misskey.fetch_blocks(row["domain"])
1302             else:
1303                 logger.warning("Unknown software: domain='%s',software='%s'", row["domain"], row["software"])
1304
1305         # c.s isn't part of oliphant's "hidden" blocklists
1306         logger.debug("row[domain]='%s'", row["domain"])
1307         if row["domain"] != "chaos.social" and row["software"] is not None and not software_helper.is_relay(row["software"]) and not blocklists.has(row["domain"]):
1308             logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", row["domain"], len(blocking))
1309             instances.set_last_blocked(row["domain"])
1310             instances.set_total_blocks(row["domain"], blocking)
1311
1312         obfuscated = 0
1313         blockdict = list()
1314
1315         logger.info("Checking %d block(s) from domain='%s' ...", len(blocking), row["domain"])
1316         for block in blocking:
1317             logger.debug("block[blocked]='%s'", block["blocked"])
1318             blocked = None
1319
1320             if block["blocked"] == "":
1321                 logger.debug("block[blocked] is empty - SKIPPED!")
1322                 continue
1323             elif block["blocked"].endswith(".onion"):
1324                 logger.debug("blocked='%s' is a TOR onion domain name - SKIPPED!", block["blocked"])
1325                 continue
1326             elif block["blocked"].endswith(".i2p") and config.get("allow_i2p_domain") == "true":
1327                 logger.debug("blocked='%s' is an I2P onion domain name - SKIPPED!", block["blocked"])
1328                 continue
1329             elif block["blocked"].endswith(".arpa"):
1330                 logger.debug("blocked='%s' is a reversed IP address - SKIPPED!", block["blocked"])
1331                 continue
1332             elif block["blocked"].endswith(".tld"):
1333                 logger.debug("blocked='%s' is a fake domain name - SKIPPED!", block["blocked"])
1334                 continue
1335             elif block["blocked"].find("*") >= 0 or block["blocked"].find("?") >= 0:
1336                 logger.debug("block='%s' is obfuscated.", block["blocked"])
1337                 obfuscated = obfuscated + 1
1338                 blocked = utils.deobfuscate(block["blocked"], row["domain"], block["digest"] if "digest" in block else None)
1339             elif not domain_helper.is_wanted(block["blocked"]):
1340                 logger.debug("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
1341                 continue
1342             elif blocks.is_instance_blocked(row["domain"], block["blocked"]):
1343                 logger.debug("blocked='%s' is already blocked - SKIPPED!", block["blocked"])
1344                 continue
1345
1346             logger.debug("blocked[%s]='%s',block[blocked]='%s'", type(blocked), blocked, block["blocked"])
1347             if blocked is not None and blocked != block["blocked"]:
1348                 logger.debug("blocked='%s' was deobfuscated to blocked='%s'", block["blocked"], blocked)
1349                 obfuscated = obfuscated - 1
1350
1351                 if blacklist.is_blacklisted(blocked):
1352                     logger.debug("blocked='%s' is blacklisted - SKIPPED!", blocked)
1353                     continue
1354                 elif blacklist.is_blacklisted(row["domain"]):
1355                     logger.debug("row[domain]='%s' is blacklisted - SKIPPED!", row["domain"])
1356                     continue
1357                 elif blocks.is_instance_blocked(row["domain"], blocked):
1358                     logger.debug("blocked='%s' is already blocked by domain='%s' - SKIPPED!", blocked, row["domain"])
1359                     continue
1360
1361                 block["block_level"] = blocks.alias_block_level(block["block_level"])
1362
1363                 logger.info("blocked='%s' has been deobfuscated to blocked='%s', adding ...", block["blocked"], blocked)
1364                 if processing.block(row["domain"], blocked, block["reason"], block["block_level"]) and block["block_level"] == "reject" and config.get("bot_enabled"):
1365                     logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], row["domain"])
1366                     blockdict.append({
1367                         "blocked": blocked,
1368                         "reason" : block["reason"],
1369                     })
1370
1371         logger.debug("Setting obfuscated=%d for row[domain]='%s' ...", obfuscated, row["domain"])
1372         instances.set_has_obfuscation(row["domain"], (obfuscated > 0))
1373         instances.set_obfuscated_blocks(row["domain"], obfuscated)
1374
1375         logger.info("domain='%s' has %d obfuscated domain(s)", row["domain"], obfuscated)
1376         if instances.has_pending(row["domain"]):
1377             logger.debug("Flushing updates for blocker='%s' ...", row["domain"])
1378             instances.update(row["domain"])
1379
1380         logger.debug("Invoking commit() ...")
1381         database.connection.commit()
1382
1383         logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
1384         if config.get("bot_enabled") and len(blockdict) > 0:
1385             logger.info("Sending bot POST for blocker='%s,blockdict()=%d ...", row["domain"], len(blockdict))
1386             network.send_bot_post(row["domain"], blockdict)
1387
1388     logger.debug("Success! - EXIT!")
1389     return 0
1390
1391 def fetch_fedilist(args: argparse.Namespace) -> int:
1392     logger.debug("args[]='%s' - CALLED!", type(args))
1393
1394     logger.debug("Invoking locking.acquire() ...")
1395     locking.acquire()
1396
1397     source_domain = "demo.fedilist.com"
1398     if sources.is_recent(source_domain):
1399         logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1400         return 1
1401     else:
1402         logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1403         sources.update(source_domain)
1404
1405     url = f"http://{source_domain}/instance/csv?onion=not"
1406     if args.software is not None and args.software != "":
1407         logger.debug("args.software='%s'", args.software)
1408         url = f"http://{source_domain}/instance/csv?software={args.software}&onion=not"
1409
1410     logger.info("Fetching url='%s' ...", url)
1411     response = reqto.get(
1412         url,
1413         headers=network.web_headers,
1414         timeout=(config.get("connection_timeout"), config.get("read_timeout")),
1415         allow_redirects=False
1416     )
1417
1418     logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
1419     if not response.ok or response.status_code > 200 or len(response.content) == 0:
1420         logger.warning("Failed fetching url='%s': response.ok='%s',response.status_code=%d,response.content()=%d - EXIT!", url, response.ok, response.status_code, len(response.text))
1421         return 1
1422
1423     reader = csv.DictReader(response.content.decode("utf-8").splitlines(), dialect="unix")
1424
1425     logger.debug("reader[]='%s'", type(reader))
1426     if reader is None:
1427         logger.warning("Failed parsing response.content()=%d as CSV content", len(response.content))
1428         return 2
1429
1430     rows = list(reader)
1431
1432     logger.info("Checking %d rows ...", len(rows))
1433     for row in rows:
1434         logger.debug("row[]='%s'", type(row))
1435         if "hostname" not in row:
1436             logger.warning("row()=%d has no element 'hostname' - SKIPPED!", len(row))
1437             continue
1438
1439         logger.debug("row[hostname]='%s' - BEFORE!", row["hostname"])
1440         domain = tidyup.domain(row["hostname"]) if row["hostname"] not in [None, ""] else None
1441         logger.debug("domain='%s' - AFTER!", domain)
1442
1443         if domain in [None, ""]:
1444             logger.debug("domain='%s' is empty after tidyup.domain(): row[hostname]='%s' - SKIPPED!", domain, row["hostname"])
1445             continue
1446
1447         logger.debug("domain='%s' - BEFORE!", domain)
1448         domain = domain.encode("idna").decode("utf-8")
1449         logger.debug("domain='%s' - AFTER!", domain)
1450
1451         if not domain_helper.is_wanted(domain):
1452             logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1453             continue
1454         elif (args.force is None or not args.force) and instances.is_registered(domain):
1455             logger.debug("domain='%s' is already registered, --force not specified: args.force[]='%s'", domain, type(args.force))
1456             continue
1457         elif instances.is_recent(domain):
1458             logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1459             continue
1460
1461         logger.info("Fetching instances from domain='%s' ...", domain)
1462         federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
1463
1464     logger.debug("Success! - EXIT!")
1465     return 0
1466
1467 def update_nodeinfo(args: argparse.Namespace) -> int:
1468     logger.debug("args[]='%s' - CALLED!", type(args))
1469
1470     logger.debug("Invoking locking.acquire() ...")
1471     locking.acquire()
1472
1473     if args.domain is not None and args.domain != "":
1474         logger.debug("Fetching args.domain='%s'", args.domain)
1475         database.cursor.execute("SELECT domain, software FROM instances WHERE domain = ? LIMIT 1", [args.domain])
1476     elif args.software is not None and args.software != "":
1477         logger.info("Fetching domains for args.software='%s'", args.software)
1478         database.cursor.execute("SELECT domain, software FROM instances WHERE software = ? ORDER BY last_updated ASC", [args.software])
1479     elif args.mode is not None and args.mode != "":
1480         logger.info("Fetching domains for args.mode='%s'", args.mode.upper())
1481         database.cursor.execute("SELECT domain, software FROM instances WHERE detection_mode = ? ORDER BY last_updated ASC", [args.mode])
1482     elif args.no_software:
1483         logger.info("Fetching domains with no software type detected ...")
1484         database.cursor.execute("SELECT domain, software FROM instances WHERE software IS NULL ORDER BY last_updated ASC")
1485     elif args.with_software:
1486         logger.info("Fetching domains with any software type detected ...")
1487         database.cursor.execute("SELECT domain, software FROM instances WHERE software IS NOT NULL ORDER BY last_updated ASC")
1488     elif args.no_auto:
1489         logger.info("Fetching domains with other detection mode than AUTO_DISOVERY being set ...")
1490         database.cursor.execute("SELECT domain, software FROM instances WHERE detection_mode IS NOT NULL AND detection_mode != 'AUTO_DISCOVERY' ORDER BY last_updated ASC")
1491     elif args.no_detection:
1492         logger.info("Fetching domains with no detection mode being set ...")
1493         database.cursor.execute("SELECT domain, software FROM instances WHERE detection_mode IS NULL ORDER BY last_updated ASC")
1494     else:
1495         logger.info("Fetching domains for recently updated ...")
1496         database.cursor.execute("SELECT domain, software FROM instances ORDER BY last_updated ASC")
1497
1498     domains = database.cursor.fetchall()
1499
1500     logger.info("Checking %d domain(s) ...", len(domains))
1501     cnt = 0
1502     for row in domains:
1503         logger.debug("row[]='%s'", type(row))
1504         if not args.force and instances.is_recent(row["domain"], "last_nodeinfo"):
1505             logger.debug("row[domain]='%s' has been recently checked - SKIPPED!", row["domain"])
1506             continue
1507         elif blacklist.is_blacklisted(row["domain"]):
1508             logger.debug("row[domain]='%s' is blacklisted - SKIPPED!", row["domain"])
1509             continue
1510
1511         try:
1512             logger.info("Checking nodeinfo for row[domain]='%s',row[software]='%s' (%s%%) ...", row["domain"], row["software"], "{:5.1f}".format(cnt / len(domains) * 100))
1513             software = federation.determine_software(row["domain"])
1514
1515             logger.debug("Determined software='%s'", software)
1516             if (software != row["software"] and software is not None) or args.force is True:
1517                 logger.debug("software='%s'", software)
1518                 if software is None:
1519                     logger.debug("Setting nodeinfo_url to 'None' for row[domain]='%s' ...", row["domain"])
1520                     instances.set_nodeinfo_url(row["domain"], None)
1521
1522                 logger.warning("Software type for row[domain]='%s' has changed from '%s' to '%s'!", row["domain"], row["software"], software)
1523                 instances.set_software(row["domain"], software)
1524
1525             if software is not None:
1526                 logger.debug("Setting row[domain]='%s' as successfully determined ...", row["domain"])
1527                 instances.set_success(row["domain"])
1528         except network.exceptions as exception:
1529             logger.warning("Exception '%s' during updating nodeinfo for row[domain]='%s'", type(exception), row["domain"])
1530             instances.set_last_error(row["domain"], exception)
1531
1532         instances.set_last_nodeinfo(row["domain"])
1533         instances.update(row["domain"])
1534         cnt = cnt + 1
1535
1536     logger.debug("Success! - EXIT!")
1537     return 0
1538
1539 def fetch_instances_social(args: argparse.Namespace) -> int:
1540     logger.debug("args[]='%s' - CALLED!", type(args))
1541
1542     logger.debug("Invoking locking.acquire() ...")
1543     locking.acquire()
1544
1545     source_domain = "instances.social"
1546
1547     if config.get("instances_social_api_key") == "":
1548         logger.error("API key not set. Please set in your config.json file.")
1549         return 1
1550     elif sources.is_recent(source_domain):
1551         logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1552         return 2
1553     else:
1554         logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1555         sources.update(source_domain)
1556
1557     headers = {
1558         "Authorization": f"Bearer {config.get('instances_social_api_key')}",
1559     }
1560
1561     logger.info("Fetching list from source_domain='%s' ...", source_domain)
1562     fetched = network.get_json_api(
1563         source_domain,
1564         "/api/1.0/instances/list?count=0&sort_by=name",
1565         headers=headers,
1566         timeout=(config.get("connection_timeout"), config.get("read_timeout"))
1567     )
1568     logger.debug("fetched(%d)[]='%s'", len(fetched), type(fetched))
1569
1570     if "error_message" in fetched:
1571         logger.warning("Error during fetching API result: '%s' - EXIT!", fetched["error_message"])
1572         return 2
1573     elif "exception" in fetched:
1574         logger.warning("Exception '%s' during fetching API result - EXIT!", type(fetched["exception"]))
1575         return 3
1576     elif "json" not in fetched:
1577         logger.warning("fetched has no element 'json' - EXIT!")
1578         return 4
1579     elif "instances" not in fetched["json"]:
1580         logger.warning("fetched[row] has no element 'instances' - EXIT!")
1581         return 5
1582
1583     domains = list()
1584     rows = fetched["json"]["instances"]
1585
1586     logger.info("Checking %d row(s) ...", len(rows))
1587     for row in rows:
1588         logger.debug("row[]='%s'", type(row))
1589         domain = tidyup.domain(row["name"]) if row["name"] not in [None, ""] else None
1590         logger.debug("domain='%s' - AFTER!", domain)
1591
1592         if domain is None and domain == "":
1593             logger.debug("domain='%s' is empty after tidyup.domain() - SKIPPED!", domain)
1594             continue
1595
1596         logger.debug("domain='%s' - BEFORE!", domain)
1597         domain = domain.encode("idna").decode("utf-8")
1598         logger.debug("domain='%s' - AFTER!", domain)
1599
1600         if not domain_helper.is_wanted(domain):
1601             logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1602             continue
1603         elif domain in domains:
1604             logger.debug("domain='%s' is already added - SKIPPED!", domain)
1605             continue
1606         elif instances.is_registered(domain):
1607             logger.debug("domain='%s' is already registered - SKIPPED!", domain)
1608             continue
1609         elif instances.is_recent(domain):
1610             logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1611             continue
1612
1613         logger.info("Fetching instances from domain='%s'", domain)
1614         federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
1615
1616     logger.debug("Success! - EXIT!")
1617     return 0
1618
1619 def fetch_relaylist(args: argparse.Namespace) -> int:
1620     logger.debug("args[]='%s' - CALLED!", type(args))
1621
1622     logger.debug("Invoking locking.acquire() ...")
1623     locking.acquire()
1624
1625     source_domain = "api.relaylist.com"
1626
1627     if sources.is_recent(source_domain):
1628         logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1629         return 1
1630     else:
1631         logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1632         sources.update(source_domain)
1633
1634     logger.info("Fetching list from source_domain='%s' ...", source_domain)
1635     fetched = network.get_json_api(
1636         source_domain,
1637         "/relays",
1638         {},
1639         (config.get("connection_timeout"), config.get("read_timeout"))
1640     )
1641     logger.debug("fetched(%d)[]='%s'", len(fetched), type(fetched))
1642
1643     if "error_message" in fetched:
1644         logger.warning("Error during fetching API result: '%s' - EXIT!", fetched["error_message"])
1645         return 2
1646     elif "exception" in fetched:
1647         logger.warning("Exception '%s' during fetching API result - EXIT!", type(fetched["exception"]))
1648         return 3
1649     elif "json" not in fetched:
1650         logger.warning("fetched has no element 'json' - EXIT!")
1651         return 4
1652
1653     domains = list()
1654
1655     logger.info("Checking %d row(s) ...", len(fetched["json"]))
1656     for row in fetched["json"]:
1657         logger.debug("row[]='%s'", type(row))
1658         domain = urlparse(row["url"]).netloc.lower().split(":")[0]
1659         logger.debug("domain='%s' - AFTER!", domain)
1660
1661         if domain is None and domain == "":
1662             logger.debug("domain='%s' is empty after tidyup.domain() - SKIPPED!", domain)
1663             continue
1664
1665         logger.debug("domain='%s' - BEFORE!", domain)
1666         domain = domain.encode("idna").decode("utf-8")
1667         logger.debug("domain='%s' - AFTER!", domain)
1668
1669         if not domain_helper.is_wanted(domain):
1670             logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1671             continue
1672         elif domain in domains:
1673             logger.debug("domain='%s' is already added - SKIPPED!", domain)
1674             continue
1675         elif instances.is_registered(domain):
1676             logger.debug("domain='%s' is already registered - SKIPPED!", domain)
1677             continue
1678         elif instances.is_recent(domain):
1679             logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1680             continue
1681
1682         logger.info("Fetching instances from domain='%s'", domain)
1683         federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
1684
1685     logger.debug("Success! - EXIT!")
1686     return 0
1687
1688 def fetch_relays(args: argparse.Namespace) -> int:
1689     logger.debug("args[]='%s' - CALLED!", type(args))
1690
1691     logger.debug("Invoking locking.acquire() ...")
1692     locking.acquire()
1693
1694     if args.domain is not None and args.domain != "":
1695         database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE software IN ('activityrelay', 'aoderelay', 'selective-relay', 'pub-relay') AND domain = ? LIMIT 1", [args.domain])
1696     elif args.software is not None and args.software != "":
1697         database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE software IN ('activityrelay', 'aoderelay', 'selective-relay', 'pub-relay') AND software = ?", [args.software])
1698     else:
1699         database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE software IN ('activityrelay', 'aoderelay', 'selective-relay', 'pub-relay')")
1700
1701     domains = list()
1702     rows = database.cursor.fetchall()
1703
1704     logger.info("Checking %d relays ...", len(rows))
1705     for row in rows:
1706         logger.debug("row[domain]='%s',row[software]='%s' ...", row["domain"], row["software"])
1707         if not args.force and instances.is_recent(row["domain"]):
1708             logger.debug("row[domain]='%s' has been recently fetched - SKIPPED!", row["domain"])
1709             continue
1710
1711         peers = list()
1712         try:
1713             if row["software"] == "pub-relay":
1714                 logger.info("Fetching row[nodeinfo_url]='%s' from relay row[domain]='%s',row[software]='%s' ...", row["nodeinfo_url"], row["domain"], row["software"])
1715                 raw = network.fetch_api_url(
1716                     row["nodeinfo_url"],
1717                     (config.get("connection_timeout"), config.get("read_timeout"))
1718                 )
1719
1720                 logger.debug("raw[%s]()=%d", type(raw), len(raw))
1721                 if "exception" in raw:
1722                     logger.warning("row[domain]='%s' has caused an exception: '%s' - raising again ...", row["domain"], type(raw["exception"]))
1723                     raise raw["exception"]
1724                 elif "error_message" in raw:
1725                     logger.warning("row[domain]='%s' has caused error message: '%s' - SKIPPED!", row["domain"], raw["error_message"])
1726                     instances.set_last_error(row["domain"], raw)
1727                     instances.set_last_instance_fetch(row["domain"])
1728                     instances.update(row["domain"])
1729                     continue
1730                 elif "json" not in raw:
1731                     logger.warning("raw()=%d does not contain key 'json' in response - SKIPPED!", len(raw))
1732                     continue
1733                 elif not "metadata" in raw["json"]:
1734                     logger.warning("raw[json]()=%d does not contain key 'json' in response - SKIPPED!", len(raw["json"]))
1735                     continue
1736                 elif not "peers" in raw["json"]["metadata"]:
1737                     logger.warning("raw[json][metadata()=%d does not contain key 'json' in response - SKIPPED!", len(raw["json"]["metadata"]))
1738                     continue
1739             else:
1740                 logger.info("Fetching / from relay row[domain]='%s',row[software]='%s' ...", row["domain"], row["software"])
1741                 raw = utils.fetch_url(
1742                     f"https://{row['domain']}",
1743                     network.web_headers,
1744                     (config.get("connection_timeout"), config.get("read_timeout"))
1745                 ).text
1746                 logger.debug("raw[%s]()=%d", type(raw), len(raw))
1747
1748                 doc = bs4.BeautifulSoup(raw, features="html.parser")
1749                 logger.debug("doc[]='%s'", type(doc))
1750
1751         except network.exceptions as exception:
1752             logger.warning("Exception '%s' during fetching from relay '%s': '%s'", type(exception), row["domain"], str(exception))
1753             instances.set_last_error(row["domain"], exception)
1754             instances.set_last_instance_fetch(row["domain"])
1755             instances.update(row["domain"])
1756             continue
1757
1758         logger.debug("row[software]='%s'", row["software"])
1759         if row["software"] == "activityrelay":
1760             logger.debug("Checking row[domain]='%s' ...", row["domain"])
1761             tags = doc.findAll("p")
1762
1763             logger.debug("Checking %d paragraphs ...", len(tags))
1764             for tag in tags:
1765                 logger.debug("tag[]='%s'", type(tag))
1766                 if len(tag.contents) == 0:
1767                     logger.debug("tag='%s' is an empty tag - SKIPPED!", tag)
1768                     continue
1769                 elif "registered instances" not in tag.contents[0]:
1770                     logger.debug("Skipping paragraph, text not found.")
1771                     continue
1772
1773                 logger.debug("Found tag.contents[0][]='%s'", tag.contents[0])
1774                 for domain in tag.contents:
1775                     logger.debug("domain[%s]='%s'", type(domain), domain)
1776                     if not isinstance(domain, bs4.element.NavigableString) or "registered instances" in domain:
1777                         continue
1778
1779                     domain = str(domain)
1780                     logger.debug("domain='%s'", domain)
1781                     if not domain_helper.is_wanted(domain):
1782                         logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1783                         continue
1784
1785                     logger.debug("domain='%s' - BEFORE!", domain)
1786                     domain = tidyup.domain(domain) if domain not in[None, ""] else None
1787                     logger.debug("domain='%s' - AFTER!", domain)
1788
1789                     if domain in [None, ""]:
1790                         logger.debug("domain='%s' is empty after tidyup.domain() from origin='%s' - SKIPPED!", domain, row["domain"])
1791                         continue
1792                     elif domain not in peers:
1793                         logger.debug("Appending domain='%s' to peers list for relay='%s' ...", domain, row["domain"])
1794                         peers.append(domain)
1795
1796                     logger.debug("domains()=%d,domain='%s'", len(domains), domain)
1797                     if dict_helper.has_key(domains, "domain", domain):
1798                         logger.debug("domain='%s' already added", domain)
1799                         continue
1800
1801                     logger.debug("Appending domain='%s',origin='%s',software='%s' ...", domain, row["domain"], row["software"])
1802                     domains.append({
1803                         "domain": domain,
1804                         "origin": row["domain"],
1805                     })
1806         elif row["software"] in ["aoderelay", "selective-relay"]:
1807             logger.debug("Checking row[domain]='%s' ...", row["domain"])
1808             if row["software"] == "aoderelay":
1809                 tags = doc.findAll("section", {"class": "instance"})
1810             else:
1811                 tags = doc.find("div", {"id": "instances"}).findAll("li")
1812
1813             logger.debug("Checking %d tags ...", len(tags))
1814             for tag in tags:
1815                 logger.debug("tag[]='%s'", type(tag))
1816
1817                 link = tag.find("a")
1818                 logger.debug("link[%s]='%s'", type(link), link)
1819                 if not isinstance(link, bs4.element.Tag):
1820                     logger.warning("tag[%s]='%s' is not type of 'bs4.element.Tag' - SKIPPED!", type(tag), tag)
1821                     continue
1822
1823                 components = urlparse(link.get("href"))
1824                 logger.debug("components(%d)='%s'", len(components), components)
1825                 domain = components.netloc.lower().split(":")[0]
1826
1827                 logger.debug("domain='%s' - BEFORE!", domain)
1828                 domain = tidyup.domain(domain) if domain not in[None, ""] else None
1829                 logger.debug("domain='%s' - AFTER!", domain)
1830
1831                 if domain in [None, ""]:
1832                     logger.debug("domain='%s' is empty after tidyup.domain() from origin='%s' - SKIPPED!", domain, row["domain"])
1833                     continue
1834                 elif domain not in peers:
1835                     logger.debug("Appending domain='%s' to peers list for relay='%s' ...", domain, row["domain"])
1836                     peers.append(domain)
1837
1838                 logger.debug("domains()=%d,domain='%s'", len(domains), domain)
1839                 if dict_helper.has_key(domains, "domain", domain):
1840                     logger.debug("domain='%s' already added", domain)
1841                     continue
1842
1843                 logger.debug("Appending domain='%s',origin='%s',software='%s'", domain, row["domain"], row["software"])
1844                 domains.append({
1845                     "domain": domain,
1846                     "origin": row["domain"],
1847                 })
1848         elif row["software"] == "pub-relay":
1849             logger.debug("Checking %d peer(s) row[domain]='%s' ...", len(raw["json"]["metadata"]["peers"]), row["domain"])
1850             for domain in raw["json"]["metadata"]["peers"]:
1851                 logger.debug("domain='%s' - BEFORE!", domain)
1852                 domain = tidyup.domain(domain) if domain not in[None, ""] else None
1853                 logger.debug("domain='%s' - AFTER!", domain)
1854
1855                 if domain in [None, ""]:
1856                     logger.debug("domain='%s' is empty after tidyup.domain() from origin='%s' - SKIPPED!", domain, row["domain"])
1857                     continue
1858                 elif domain not in peers:
1859                     logger.debug("Appending domain='%s' to peers list for relay='%s' ...", domain, row["domain"])
1860                     peers.append(domain)
1861
1862                 logger.debug("domains()=%d,domain='%s'", len(domains), domain)
1863                 if dict_helper.has_key(domains, "domain", domain):
1864                     logger.debug("domain='%s' already added", domain)
1865                     continue
1866
1867                 logger.debug("Appending domain='%s',origin='%s',software='%s' ...", domain, row["domain"], row["software"])
1868                 domains.append({
1869                     "domain": domain,
1870                     "origin": row["domain"],
1871                 })
1872         else:
1873             logger.warning("row[domain]='%s',row[software]='%s' is not supported", row["domain"], row["software"])
1874             continue
1875
1876         logger.debug("Updating last_instance_fetch for row[domain]='%s' ...", row["domain"])
1877         instances.set_last_instance_fetch(row["domain"])
1878
1879         logger.info("Relay '%s' has %d peer(s) registered.", row["domain"], len(peers))
1880         instances.set_total_peers(row["domain"], peers)
1881
1882         logger.debug("Flushing data for row[domain]='%s'", row["domain"])
1883         instances.update(row["domain"])
1884
1885     logger.info("Checking %d domains ...", len(domains))
1886     for row in domains:
1887         logger.debug("row[domain]='%s',row[origin]='%s'", row["domain"], row["origin"])
1888         if not domain_helper.is_wanted(row["domain"]):
1889             logger.debug("row[domain]='%s' is not wanted - SKIPPED!", row["domain"])
1890             continue
1891         elif instances.is_registered(row["domain"]):
1892             logger.debug("row[domain]='%s' is already registered - SKIPPED!", row["domain"])
1893             continue
1894
1895         logger.info("Fetching row[domain]='%s',row[origin]='%s' ...", row["domain"], row["origin"])
1896         federation.fetch_instances(row["domain"], row["origin"], None, inspect.currentframe().f_code.co_name)
1897
1898     logger.debug("Success! - EXIT!")
1899     return 0
1900
1901 def convert_idna(args: argparse.Namespace) -> int:
1902     logger.debug("args[]='%s' - CALLED!", type(args))
1903
1904     database.cursor.execute("SELECT domain FROM instances WHERE domain NOT LIKE '%xn--%' ORDER BY domain ASC")
1905     rows = database.cursor.fetchall()
1906
1907     logger.debug("rows[]='%s'", type(rows))
1908     instances.translate_idnas(rows, "domain")
1909
1910     database.cursor.execute("SELECT origin FROM instances WHERE origin NOT LIKE '%xn--%' ORDER BY origin ASC")
1911     rows = database.cursor.fetchall()
1912
1913     logger.debug("rows[]='%s'", type(rows))
1914     instances.translate_idnas(rows, "origin")
1915
1916     database.cursor.execute("SELECT blocker FROM blocks WHERE blocker NOT LIKE '%xn--%' ORDER BY blocker ASC")
1917     rows = database.cursor.fetchall()
1918
1919     logger.debug("rows[]='%s'", type(rows))
1920     blocks.translate_idnas(rows, "blocker")
1921
1922     database.cursor.execute("SELECT blocked FROM blocks WHERE blocked NOT LIKE '%xn--%' ORDER BY blocked ASC")
1923     rows = database.cursor.fetchall()
1924
1925     logger.debug("rows[]='%s'", type(rows))
1926     blocks.translate_idnas(rows, "blocked")
1927
1928     logger.debug("Success! - EXIT!")
1929     return 0
1930
1931 def remove_invalid(args: argparse.Namespace) -> int:
1932     logger.debug("args[]='%s' - CALLED!", type(args))
1933
1934     logger.debug("Invoking locking.acquire() ...")
1935     locking.acquire()
1936
1937     database.cursor.execute("SELECT domain FROM instances ORDER BY domain ASC")
1938     rows = database.cursor.fetchall()
1939
1940     logger.info("Checking %d domains ...", len(rows))
1941     for row in rows:
1942         logger.debug("row[domain]='%s'", row["domain"])
1943         if not validators.domain(row["domain"].split("/")[0]):
1944             logger.info("Invalid row[domain]='%s' found, removing ...", row["domain"])
1945             database.cursor.execute("DELETE FROM blocks WHERE blocker = ? OR blocked = ?", [row["domain"], row["domain"]])
1946             database.cursor.execute("DELETE FROM instances WHERE domain = ? LIMIT 1", [row["domain"]])
1947
1948     logger.debug("Invoking commit() ...")
1949     database.connection.commit()
1950
1951     logger.info("Vaccum cleaning database ...")
1952     database.cursor.execute("VACUUM")
1953
1954     logger.debug("Success! - EXIT!")
1955     return 0