]> git.mxchange.org Git - fba.git/blob - fba/commands.py
Continued:
[fba.git] / fba / commands.py
1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
3 #
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
8 #
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 # GNU Affero General Public License for more details.
13 #
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program.  If not, see <https://www.gnu.org/licenses/>.
16
17 import csv
18 import inspect
19 import json
20 import logging
21 import time
22
23 from urllib.parse import urlparse
24
25 import argparse
26 import atoma
27 import bs4
28 import markdown
29 import reqto
30 import validators
31
32 from fba import database
33 from fba import utils
34
35 from fba.helpers import blacklist
36 from fba.helpers import blocklists
37 from fba.helpers import config
38 from fba.helpers import cookies
39 from fba.helpers import dicts as dict_helper
40 from fba.helpers import domain as domain_helper
41 from fba.helpers import locking
42 from fba.helpers import processing
43 from fba.helpers import software as software_helper
44 from fba.helpers import tidyup
45
46 from fba.http import csrf
47 from fba.http import federation
48 from fba.http import network
49
50 from fba.models import blocks
51 from fba.models import instances
52 from fba.models import sources
53
54 from fba.networks import friendica
55 from fba.networks import lemmy
56 from fba.networks import mastodon
57 from fba.networks import misskey
58 from fba.networks import pleroma
59
60 logging.basicConfig(level=logging.INFO)
61 logger = logging.getLogger(__name__)
62 #logger.setLevel(logging.DEBUG)
63
64 def check_instance(args: argparse.Namespace) -> int:
65     logger.debug("args.domain='%s' - CALLED!", args.domain)
66
67     status = 0
68     if not validators.domain(args.domain):
69         logger.warning("args.domain='%s' is not valid", args.domain)
70         status = 100
71     elif blacklist.is_blacklisted(args.domain):
72         logger.warning("args.domain='%s' is blacklisted", args.domain)
73         status = 101
74     elif instances.is_registered(args.domain):
75         logger.warning("args.domain='%s' is already registered", args.domain)
76         status = 102
77     else:
78         logger.info("args.domain='%s' is not known", args.domain)
79
80     logger.debug("status=%d - EXIT!", status)
81     return status
82
83 def check_nodeinfo(args: argparse.Namespace) -> int:
84     logger.debug("args[]='%s' - CALLED!", type(args))
85
86     # Fetch rows
87     database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE nodeinfo_url IS NOT NULL ORDER BY domain ASC")
88
89     cnt = 0
90     for row in database.cursor.fetchall():
91         logger.debug("Checking row[domain]='%s',row[software]='%s',row[nodeinfo_url]='%s' ...", row["domain"], row["software"], row["nodeinfo_url"])
92         punycode = row["domain"].encode("idna").decode("utf-8")
93
94         if row["nodeinfo_url"].startswith("/"):
95             logger.debug("row[nodeinfo_url]='%s' is a relative URL and always matches", row["nodeinfo_url"])
96             continue
97         elif row["nodeinfo_url"].find(punycode) == -1 and row["nodeinfo_url"].find(row["domain"]) == -1:
98             logger.warning("punycode='%s' is not found in row[nodeinfo_url]='%s',row[software]='%s'", punycode, row["nodeinfo_url"], row["software"])
99             cnt = cnt + 1
100
101     logger.info("Found %d row(s)", cnt)
102
103     logger.debug("EXIT!")
104     return 0
105
106 def fetch_pixelfed_api(args: argparse.Namespace) -> int:
107     logger.debug("args[]='%s' - CALLED!", type(args))
108
109     # No CSRF by default, you don't have to add network.source_headers by yourself here
110     headers = tuple()
111     source_domain = "pixelfed.org"
112
113     if sources.is_recent(source_domain):
114         logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
115         return 1
116     else:
117         logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
118         sources.update(source_domain)
119
120     try:
121         logger.debug("Checking CSRF from source_domain='%s' ...", source_domain)
122         headers = csrf.determine(source_domain, dict())
123     except network.exceptions as exception:
124         logger.warning("Exception '%s' during checking CSRF (fetch_peers,%s) - EXIT!", type(exception), __name__)
125         return list()
126
127     try:
128         logger.info("Fetching JSON from pixelfed.org API, headers()=%d ...", len(headers))
129         fetched = network.get_json_api(
130             source_domain,
131             "/api/v1/servers/all.json?scope=All&country=all&language=all",
132             headers,
133             (config.get("connection_timeout"), config.get("read_timeout"))
134         )
135
136         logger.debug("JSON API returned %d elements", len(fetched))
137         if "error_message" in fetched:
138             logger.warning("API returned error_message='%s' - EXIT!", fetched["error_message"])
139             return 101
140         elif "data" not in fetched["json"]:
141             logger.warning("API did not return JSON with 'data' element - EXIT!")
142             return 102
143
144         rows = fetched["json"]["data"]
145         logger.info("Checking %d fetched rows ...", len(rows))
146         for row in rows:
147             logger.debug("row[]='%s'", type(row))
148             if "domain" not in row:
149                 logger.warning("row='%s' does not contain element 'domain' - SKIPPED!", row)
150                 continue
151             elif row["domain"] == "":
152                 logger.debug("row[domain] is empty - SKIPPED!")
153                 continue
154
155             logger.debug("row[domain]='%s' - BEFORE!", row["domain"])
156             domain = row["domain"].encode("idna").decode("utf-8")
157             logger.debug("domain='%s' - AFTER!", domain)
158
159             if not domain_helper.is_wanted(domain):
160                 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
161                 continue
162             elif instances.is_registered(domain):
163                 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
164                 continue
165             elif instances.is_recent(domain):
166                 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
167                 continue
168
169             logger.debug("Fetching instances from domain='%s' ...", domain)
170             federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
171
172     except network.exceptions as exception:
173         logger.warning("Cannot fetch graphql,exception[%s]:'%s' - EXIT!", type(exception), str(exception))
174         return 103
175
176     logger.debug("Success! - EXIT!")
177     return 0
178
179 def fetch_bkali(args: argparse.Namespace) -> int:
180     logger.debug("args[]='%s' - CALLED!", type(args))
181
182     logger.debug("Invoking locking.acquire() ...")
183     locking.acquire()
184
185     source_domain = "gql.api.bka.li"
186     if sources.is_recent(source_domain):
187         logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
188         return 1
189     else:
190         logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
191         sources.update(source_domain)
192
193     domains = list()
194     try:
195         logger.info("Fetching domainlist from source_domain='%s' ...", source_domain)
196         fetched = network.post_json_api(
197             source_domain,
198             "/v1/graphql",
199             json.dumps({
200                 "query": "query domainlist {nodeinfo(order_by: {domain: asc}) {domain}}"
201             })
202         )
203
204         logger.debug("fetched[]='%s'", type(fetched))
205         if "error_message" in fetched:
206             logger.warning("post_json_api() for 'gql.sources.bka.li' returned error message='%s", fetched["error_message"])
207             return 100
208         elif isinstance(fetched["json"], dict) and "error" in fetched["json"] and "message" in fetched["json"]["error"]:
209             logger.warning("post_json_api() returned error: '%s", fetched["error"]["message"])
210             return 101
211
212         rows = fetched["json"]
213
214         logger.debug("rows(%d)[]='%s'", len(rows), type(rows))
215         if len(rows) == 0:
216             raise Exception("WARNING: Returned no records")
217         elif "data" not in rows:
218             raise Exception(f"WARNING: rows()={len(rows)} does not contain key 'data'")
219         elif "nodeinfo" not in rows["data"]:
220             raise Exception(f"WARNING: rows()={len(rows['data'])} does not contain key 'nodeinfo'")
221
222         for entry in rows["data"]["nodeinfo"]:
223             logger.debug("entry[%s]='%s'", type(entry), entry)
224             if "domain" not in entry:
225                 logger.warning("entry()=%d does not contain 'domain' - SKIPPED!", len(entry))
226                 continue
227             elif entry["domain"] == "":
228                 logger.debug("entry[domain] is empty - SKIPPED!")
229                 continue
230             elif not domain_helper.is_wanted(entry["domain"]):
231                 logger.debug("entry[domain]='%s' is not wanted - SKIPPED!", entry["domain"])
232                 continue
233             elif instances.is_registered(entry["domain"]):
234                 logger.debug("entry[domain]='%s' is already registered - SKIPPED!", entry["domain"])
235                 continue
236             elif instances.is_recent(entry["domain"]):
237                 logger.debug("entry[domain]='%s' has been recently crawled - SKIPPED!", entry["domain"])
238                 continue
239
240             logger.debug("Adding domain='%s' ...", entry["domain"])
241             domains.append(entry["domain"])
242
243     except network.exceptions as exception:
244         logger.warning("Cannot fetch graphql,exception[%s]:'%s' - EXIT!", type(exception), str(exception))
245         return 102
246
247     logger.debug("domains()=%d", len(domains))
248     if len(domains) > 0:
249         logger.info("Adding %d new instances ...", len(domains))
250         for domain in domains:
251             logger.debug("domain='%s' - BEFORE!", domain)
252             domain = domain.encode("idna").decode("utf-8")
253             logger.debug("domain='%s' - AFTER!", domain)
254
255             try:
256                 logger.info("Fetching instances from domain='%s' ...", domain)
257                 federation.fetch_instances(domain, 'tak.teleyal.blog', None, inspect.currentframe().f_code.co_name)
258             except network.exceptions as exception:
259                 logger.warning("Exception '%s' during fetching instances (fetch_bkali) from domain='%s'", type(exception), domain)
260                 instances.set_last_error(domain, exception)
261                 return 100
262
263     logger.debug("Success - EXIT!")
264     return 0
265
266 def fetch_blocks(args: argparse.Namespace) -> int:
267     logger.debug("args[]='%s' - CALLED!", type(args))
268     if args.domain is not None and args.domain != "":
269         logger.debug("args.domain='%s' - checking ...", args.domain)
270         if not validators.domain(args.domain):
271             logger.warning("args.domain='%s' is not valid.", args.domain)
272             return 100
273         elif blacklist.is_blacklisted(args.domain):
274             logger.warning("args.domain='%s' is blacklisted, won't check it!", args.domain)
275             return 101
276         elif not instances.is_registered(args.domain):
277             logger.warning("args.domain='%s' is not registered, please run ./utils.py fetch_instances '%s' first.", args.domain, args.domain)
278             return 102
279
280     logger.debug("Invoking locking.acquire() ...")
281     locking.acquire()
282
283     if args.domain is not None and args.domain != "":
284         # Re-check single domain
285         logger.debug("Querying database for args.domain='%s' ...", args.domain)
286         database.cursor.execute(
287             "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE domain = ? LIMIT 1", [args.domain]
288         )
289     elif args.software is not None and args.software != "":
290         # Re-check single software
291         logger.debug("Querying database for args.software='%s' ...", args.software)
292         database.cursor.execute(
293             "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software = ? AND nodeinfo_url IS NOT NULL ORDER BY total_blocks DESC, last_response_time ASC, last_updated ASC", [args.software]
294         )
295     elif args.force:
296         # Re-check all
297         logger.debug("Re-checking all instances ...")
298         database.cursor.execute(
299             "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'lemmy', 'friendica', 'misskey') AND nodeinfo_url IS NOT NULL ORDER BY total_blocks DESC, last_response_time ASC, last_updated ASC"
300         )
301     else:
302         # Re-check after "timeout" (aka. minimum interval)
303         database.cursor.execute(
304             "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'lemmy', 'friendica', 'misskey') AND (last_blocked IS NULL OR last_blocked < ?) AND nodeinfo_url IS NOT NULL ORDER BY total_blocks DESC, last_response_time ASC, last_updated ASC", [time.time() - config.get("recheck_block")]
305         )
306
307     rows = database.cursor.fetchall()
308     logger.info("Checking %d entries ...", len(rows))
309     for blocker, software, origin, nodeinfo_url in rows:
310         logger.debug("blocker='%s',software='%s',origin='%s',nodeinfo_url='%s'", blocker, software, origin, nodeinfo_url)
311
312         if not domain_helper.is_wanted(blocker):
313             logger.warning("blocker='%s' is not wanted - SKIPPED!", blocker)
314             continue
315
316         logger.debug("Setting last_blocked,has_obfuscation=false for blocker='%s' ...", blocker)
317         instances.set_last_blocked(blocker)
318         instances.set_has_obfuscation(blocker, False)
319
320         # c.s isn't part of oliphant's "hidden" blocklists
321         if blocker == "chaos.social" or software_helper.is_relay(software) or blocklists.has(blocker):
322             logger.debug("Skipping blocker='%s', run ./fba.py fetch_cs, fetch_oliphant, fetch_csv instead!", blocker)
323             continue
324
325         logger.debug("Invoking federation.fetch_blocks(%s) ...", blocker)
326         blocking = federation.fetch_blocks(blocker)
327
328         logger.debug("blocker='%s',software='%s',blocking()=%d", blocker, software, len(blocking))
329         if len(blocking) == 0:
330             logger.debug("blocker='%s',software='%s' - fetching blocklist ...", blocker, software)
331             if software == "pleroma":
332                 blocking = pleroma.fetch_blocks(blocker)
333                 logger.debug("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software)
334             elif software == "mastodon":
335                 blocking = mastodon.fetch_blocks(blocker)
336                 logger.debug("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software)
337             elif software == "lemmy":
338                 blocking = lemmy.fetch_blocks(blocker)
339                 logger.debug("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software)
340             elif software == "friendica":
341                 blocking = friendica.fetch_blocks(blocker)
342                 logger.debug("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software)
343             elif software == "misskey":
344                 blocking = misskey.fetch_blocks(blocker)
345                 logger.debug("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software)
346             else:
347                 logger.warning("Unknown software: blocker='%s',software='%s'", blocker, software)
348
349         logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
350         instances.set_total_blocks(blocker, blocking)
351
352         blockdict = list()
353         deobfuscated = obfuscated = 0
354
355         logger.info("Checking %d entries from blocker='%s',software='%s' ...", len(blocking), blocker, software)
356         for block in blocking:
357             logger.debug("blocked='%s',block_level='%s',reason='%s'", block["blocked"], block["block_level"], block["reason"])
358
359             if block["block_level"] == "":
360                 logger.warning("block_level is empty, blocker='%s',blocked='%s'", block["blocker"], block["blocked"])
361                 continue
362
363             logger.debug("blocked='%s',reason='%s' - BEFORE!", block["blocked"], block["reason"])
364             block["blocked"] = tidyup.domain(block["blocked"])
365             block["reason"]  = tidyup.reason(block["reason"]) if block["reason"] is not None and block["reason"] != "" else None
366             logger.debug("blocked='%s',reason='%s' - AFTER!", block["blocked"], block["reason"])
367
368             if block["blocked"] == "":
369                 logger.warning("blocked is empty, blocker='%s'", blocker)
370                 continue
371             elif block["blocked"].endswith(".onion"):
372                 logger.debug("blocked='%s' is a TOR .onion domain - SKIPPED", block["blocked"])
373                 continue
374             elif block["blocked"].endswith(".arpa"):
375                 logger.debug("blocked='%s' is a reverse IP address - SKIPPED", block["blocked"])
376                 continue
377             elif block["blocked"].endswith(".tld"):
378                 logger.debug("blocked='%s' is a fake domain - SKIPPED", block["blocked"])
379                 continue
380             elif block["blocked"].find("*") >= 0:
381                 logger.debug("blocker='%s' uses obfuscated domains", blocker)
382                 instances.set_has_obfuscation(blocker, True)
383                 obfuscated = obfuscated + 1
384
385                 # Some friendica servers also obscure domains without hash
386                 row = instances.deobfuscate("*", block["blocked"], block["digest"] if "digest" in block else None)
387
388                 logger.debug("row[]='%s'", type(row))
389                 if row is None:
390                     logger.warning("Cannot deobfuscate blocked='%s',blocker='%s',software='%s' - SKIPPED!", block["blocked"], blocker, software)
391                     continue
392
393                 deobfuscated = deobfuscated + 1
394                 block["blocked"] = row["domain"]
395                 origin           = row["origin"]
396                 nodeinfo_url     = row["nodeinfo_url"]
397             elif block["blocked"].find("?") >= 0:
398                 logger.debug("blocker='%s' uses obfuscated domains", blocker)
399                 instances.set_has_obfuscation(blocker, True)
400                 obfuscated = obfuscated + 1
401
402                 # Some obscure them with question marks, not sure if that's dependent on version or not
403                 row = instances.deobfuscate("?", block["blocked"], block["digest"] if "digest" in block else None)
404
405                 logger.debug("row[]='%s'", type(row))
406                 if row is None:
407                     logger.warning("Cannot deobfuscate blocked='%s',blocker='%s',software='%s' - SKIPPED!", block["blocked"], blocker, software)
408                     continue
409
410                 deobfuscated = deobfuscated + 1
411                 block["blocked"] = row["domain"]
412                 origin           = row["origin"]
413                 nodeinfo_url     = row["nodeinfo_url"]
414
415             logger.debug("Looking up instance by domainm, blocked='%s'", block["blocked"])
416             if block["blocked"] == "":
417                 logger.debug("block[blocked] is empty - SKIPPED!")
418                 continue
419
420             logger.debug("block[blocked]='%s' - BEFORE!", block["blocked"])
421             block["blocked"] = block["blocked"].lstrip(".").encode("idna").decode("utf-8")
422             logger.debug("block[blocked]='%s' - AFTER!", block["blocked"])
423
424             if not domain_helper.is_wanted(block["blocked"]):
425                 logger.debug("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
426                 continue
427             elif block["block_level"] in ["accept", "accepted"]:
428                 logger.debug("blocked='%s' is accepted, not wanted here - SKIPPED!", block["blocked"])
429                 continue
430             elif not instances.is_registered(block["blocked"]):
431                 logger.debug("Hash wasn't found, adding: blocked='%s',blocker='%s'", block["blocked"], blocker)
432                 federation.fetch_instances(block["blocked"], blocker, None, inspect.currentframe().f_code.co_name)
433
434             block["block_level"] = blocks.alias_block_level(block["block_level"])
435
436             if processing.block(blocker, block["blocked"], block["reason"], block["block_level"]) and block["block_level"] == "reject" and config.get("bot_enabled"):
437                 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], blocker)
438                 blockdict.append({
439                     "blocked": block["blocked"],
440                     "reason" : block["reason"],
441                 })
442
443             logger.debug("Invoking cookies.clear(%s) ...", block["blocked"])
444             cookies.clear(block["blocked"])
445
446         logger.info("blocker='%s' has %d obfuscated domain(s) and %d of them could be deobfuscated.", blocker, obfuscated, deobfuscated)
447         instances.set_obfuscated_blocks(blocker, obfuscated)
448
449         logger.debug("Flushing updates for blocker='%s' ...", blocker)
450         instances.update(blocker)
451
452         logger.debug("Invoking commit() ...")
453         database.connection.commit()
454
455         logger.debug("Invoking cookies.clear(%s) ...", blocker)
456         cookies.clear(blocker)
457
458         logger.debug("config.get(bot_enabled)='%s',blockdict()=%d'", config.get("bot_enabled"), len(blockdict))
459         if config.get("bot_enabled") and len(blockdict) > 0:
460             logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
461             network.send_bot_post(blocker, blockdict)
462
463     logger.debug("Success! - EXIT!")
464     return 0
465
466 def fetch_observer(args: argparse.Namespace) -> int:
467     logger.debug("args[]='%s' - CALLED!", type(args))
468
469     logger.debug("Invoking locking.acquire() ...")
470     locking.acquire()
471
472     source_domain = "fediverse.observer"
473     if sources.is_recent(source_domain):
474         logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
475         return 1
476     else:
477         logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
478         sources.update(source_domain)
479
480     types = list()
481     if args.software is None:
482         logger.info("Fetching software list ...")
483         raw = utils.fetch_url(
484             f"https://{source_domain}",
485             network.web_headers,
486             (config.get("connection_timeout"), config.get("read_timeout"))
487         ).text
488         logger.debug("raw[%s]()=%d", type(raw), len(raw))
489
490         doc = bs4.BeautifulSoup(raw, features="html.parser")
491         logger.debug("doc[]='%s'", type(doc))
492
493         navbar = doc.find("div", {"aria-labelledby": "navbarDropdownMenuSoftwares"})
494         logger.debug("navbar[]='%s'", type(navbar))
495         if navbar is None:
496             logger.warning("Cannot find navigation bar, cannot continue!")
497             return 1
498
499         items = navbar.findAll("a", {"class": "dropdown-item"})
500         logger.debug("items[]='%s'", type(items))
501
502         logger.info("Checking %d menu items ...", len(items))
503         for item in items:
504             logger.debug("item[%s]='%s'", type(item), item)
505             if item.text.lower() == "all":
506                 logger.debug("Skipping 'All' menu entry ...")
507                 continue
508
509             logger.debug("Appending item.text='%s' ...", item.text)
510             types.append(tidyup.domain(item.text))
511     else:
512         logger.info("Adding args.software='%s' as type ...", args.software)
513         types.append(args.software)
514
515     logger.info("Fetching %d different table data ...", len(types))
516     for software in types:
517         logger.debug("software='%s'", software)
518
519         if args.software is not None and args.software != software:
520             logger.debug("args.software='%s' does not match software='%s' - SKIPPED!", args.software, software)
521             continue
522
523         doc = None
524         try:
525             logger.debug("Fetching table data for software='%s' ...", software)
526             raw = utils.fetch_url(
527                 f"https://{source_domain}/app/views/tabledata.php?software={software}",
528                 network.web_headers,
529                 (config.get("connection_timeout"), config.get("read_timeout"))
530             ).text
531             logger.debug("raw[%s]()=%d", type(raw), len(raw))
532
533             doc = bs4.BeautifulSoup(raw, features="html.parser")
534             logger.debug("doc[]='%s'", type(doc))
535         except network.exceptions as exception:
536             logger.warning("Cannot fetch software='%s' from source_domain='%s': '%s'", software, source_domain, type(exception))
537             continue
538
539         items = doc.findAll("a", {"class": "url"})
540         logger.info("Checking %d items,software='%s' ...", len(items), software)
541         for item in items:
542             logger.debug("item[]='%s'", type(item))
543             domain = item.decode_contents()
544             domain = tidyup.domain(domain) if domain not in [None, ""] else None
545             logger.debug("domain='%s' - AFTER!", domain)
546
547             if domain is None or domain == "":
548                 logger.debug("domain='%s' is empty after tidyup.domain() - SKIPPED!", domain)
549                 continue
550
551             logger.debug("domain='%s' - BEFORE!", domain)
552             domain = domain.encode("idna").decode("utf-8")
553             logger.debug("domain='%s' - AFTER!", domain)
554
555             if not domain_helper.is_wanted(domain):
556                 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
557                 continue
558             elif instances.is_registered(domain):
559                 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
560                 continue
561
562             logger.info("Fetching instances for domain='%s'", domain)
563             federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
564
565     logger.debug("Success! - EXIT!")
566     return 0
567
568 def fetch_todon_wiki(args: argparse.Namespace) -> int:
569     logger.debug("args[]='%s' - CALLED!", type(args))
570
571     logger.debug("Invoking locking.acquire() ...")
572     locking.acquire()
573
574     source_domain = "wiki.todon.eu"
575     if sources.is_recent(source_domain):
576         logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
577         return 1
578     else:
579         logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
580         sources.update(source_domain)
581
582     blocklist = {
583         "silenced": list(),
584         "reject": list(),
585     }
586
587     logger.debug("Fetching domainblocks from source_domain='%s'", source_domain)
588     raw = utils.fetch_url(
589         f"https://{source_domain}/todon/domainblocks",
590         network.web_headers,
591         (config.get("connection_timeout"), config.get("read_timeout"))
592     ).text
593     logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
594
595     doc = bs4.BeautifulSoup(raw, "html.parser")
596     logger.debug("doc[]='%s'", type(doc))
597
598     silenced = doc.find("h3", {"id": "silencedlimited_servers"}).find_next("ul").findAll("li")
599     logger.info("Checking %d silenced/limited entries ...", len(silenced))
600     blocklist["silenced"] = utils.find_domains(silenced, "div")
601
602     suspended = doc.find("h3", {"id": "suspended_servers"}).find_next("ul").findAll("li")
603     logger.info("Checking %d suspended entries ...", len(suspended))
604     blocklist["reject"] = utils.find_domains(suspended, "div")
605
606     blocking = blocklist["silenced"] + blocklist["reject"]
607     blocker = "todon.eu"
608
609     logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
610     instances.set_last_blocked(blocker)
611     instances.set_total_blocks(blocker, blocking)
612
613     blockdict = list()
614     for block_level in blocklist:
615         blockers = blocklist[block_level]
616
617         logger.debug("block_level='%s',blockers()=%d'", block_level, len(blockers))
618         for blocked in blockers:
619             logger.debug("blocked='%s'", blocked)
620
621             if not instances.is_registered(blocked):
622                 try:
623                     logger.info("Fetching instances from domain='%s' ...", blocked)
624                     federation.fetch_instances(blocked, blocker, None, inspect.currentframe().f_code.co_name)
625                 except network.exceptions as exception:
626                     logger.warning("Exception '%s' during fetching instances (fetch_cs) from blocked='%s'", type(exception), blocked)
627                     instances.set_last_error(blocked, exception)
628
629             if not domain_helper.is_wanted(blocked):
630                 logger.warning("blocked='%s' is not wanted - SKIPPED!", blocked)
631                 continue
632             elif not domain_helper.is_wanted(blocker):
633                 logger.warning("blocker='%s' is not wanted - SKIPPED!", blocker)
634                 continue
635             elif blocks.is_instance_blocked(blocker, blocked, block_level):
636                 logger.debug("blocked='%s',block_level='%s' is already blocked - SKIPPED!", blocked, block_level)
637                 continue
638
639             logger.info("Adding new block: blocked='%s',block_level='%s'", blocked, block_level)
640             if processing.block(blocker, blocked, None, block_level) and block_level == "reject" and config.get("bot_enabled"):
641                 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", blocked, block_level, blocker)
642                 blockdict.append({
643                     "blocked": blocked,
644                     "reason" : None,
645                 })
646
647         logger.debug("Invoking commit() ...")
648         database.connection.commit()
649
650         logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
651         if config.get("bot_enabled") and len(blockdict) > 0:
652             logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
653             network.send_bot_post(blocker, blockdict)
654
655     logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
656     if instances.has_pending(blocker):
657         logger.debug("Flushing updates for blocker='%s' ...", blocker)
658         instances.update(blocker)
659
660     logger.debug("Success! - EXIT!")
661     return 0
662
663 def fetch_cs(args: argparse.Namespace):
664     logger.debug("args[]='%s' - CALLED!", type(args))
665
666     logger.debug("Invoking locking.acquire() ...")
667     locking.acquire()
668
669     extensions = [
670         "extra",
671         "abbr",
672         "attr_list",
673         "def_list",
674         "fenced_code",
675         "footnotes",
676         "md_in_html",
677         "admonition",
678         "codehilite",
679         "legacy_attrs",
680         "legacy_em",
681         "meta",
682         "nl2br",
683         "sane_lists",
684         "smarty",
685         "toc",
686         "wikilinks"
687     ]
688
689     blocklist = {
690         "silenced": list(),
691         "reject"  : list(),
692     }
693
694     source_domain = "raw.githubusercontent.com"
695     if sources.is_recent(source_domain):
696         logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
697         return 1
698     else:
699         logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
700         sources.update(source_domain)
701
702     logger.info("Fetching federation.md from source_domain='%s' ...", source_domain)
703     raw = utils.fetch_url(
704         f"https://{source_domain}/chaossocial/meta/master/federation.md",
705         network.web_headers,
706         (config.get("connection_timeout"), config.get("read_timeout"))
707     ).text
708     logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
709
710     doc = bs4.BeautifulSoup(markdown.markdown(raw, extensions=extensions), features="html.parser")
711     logger.debug("doc()=%d[]='%s'", len(doc), type(doc))
712
713     silenced = doc.find("h2", {"id": "silenced-instances"}).findNext("table").find("tbody")
714     logger.debug("silenced[%s]()=%d", type(silenced), len(silenced))
715     blocklist["silenced"] = federation.find_domains(silenced)
716
717     blocked = doc.find("h2", {"id": "blocked-instances"}).findNext("table").find("tbody")
718     logger.debug("blocked[%s]()=%d", type(blocked), len(blocked))
719     blocklist["reject"] = federation.find_domains(blocked)
720
721     blocking = blocklist["silenced"] + blocklist["reject"]
722     blocker = "chaos.social"
723
724     logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
725     instances.set_last_blocked(blocker)
726     instances.set_total_blocks(blocker, blocking)
727
728     logger.debug("blocklist[silenced]()=%d,blocklist[reject]()=%d", len(blocklist["silenced"]), len(blocklist["reject"]))
729     if len(blocking) > 0:
730         blockdict = list()
731         for block_level in blocklist:
732             logger.info("block_level='%s' has %d row(s)", block_level, len(blocklist[block_level]))
733
734             for row in blocklist[block_level]:
735                 logger.debug("row[%s]='%s'", type(row), row)
736                 if not "domain" in row:
737                     logger.warning("row[]='%s' has no element 'domain' - SKIPPED!", type(row))
738                     continue
739                 elif not instances.is_registered(row["domain"]):
740                     try:
741                         logger.info("Fetching instances from domain='%s' ...", row["domain"])
742                         federation.fetch_instances(row["domain"], blocker, None, inspect.currentframe().f_code.co_name)
743                     except network.exceptions as exception:
744                         logger.warning("Exception '%s' during fetching instances (fetch_cs) from row[domain]='%s'", type(exception), row["domain"])
745                         instances.set_last_error(row["domain"], exception)
746
747                 if processing.block(blocker, row["domain"], row["reason"], block_level) and block_level == "reject" and config.get("bot_enabled"):
748                     logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", row["domain"], block_level, blocker)
749                     blockdict.append({
750                         "blocked": row["domain"],
751                         "reason" : row["reason"],
752                     })
753
754         logger.debug("Invoking commit() ...")
755         database.connection.commit()
756
757         logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
758         if config.get("bot_enabled") and len(blockdict) > 0:
759             logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
760             network.send_bot_post(blocker, blockdict)
761
762     logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
763     if instances.has_pending(blocker):
764         logger.debug("Flushing updates for blocker='%s' ...", blocker)
765         instances.update(blocker)
766
767     logger.debug("Success! - EXIT!")
768     return 0
769
770 def fetch_fba_rss(args: argparse.Namespace) -> int:
771     logger.debug("args[]='%s' - CALLED!", type(args))
772
773     domains = list()
774
775     logger.debug("Invoking locking.acquire() ...")
776     locking.acquire()
777
778     components = urlparse(args.feed)
779     domain = components.netloc.lower().split(":")[0]
780
781     logger.debug("domain='%s'", domain)
782     if sources.is_recent(domain):
783         logger.info("API from domain='%s' has recently being accessed - EXIT!", domain)
784         return 0
785     else:
786         logger.debug("domain='%s' has not been recently used, marking ...", domain)
787         sources.update(domain)
788
789     logger.info("Fetch FBA-specific RSS args.feed='%s' ...", args.feed)
790     response = utils.fetch_url(args.feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
791
792     logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
793     if response.ok and response.status_code == 200 and len(response.text) > 0:
794         logger.debug("Parsing RSS feed (%d Bytes) ...", len(response.text))
795         rss = atoma.parse_rss_bytes(response.content)
796
797         logger.debug("rss[]='%s'", type(rss))
798         for item in rss.items:
799             logger.debug("item[%s]='%s'", type(item), item)
800             domain = item.link.split("=")[1]
801             domain = tidyup.domain(domain) if domain not in[None, ""] else None
802
803             logger.debug("domain='%s' - AFTER!", domain)
804             if domain is None or domain == "":
805                 logger.debug("domain='%s' is empty after tidyup.domain() - SKIPPED!", domain)
806                 continue
807
808             logger.debug("domain='%s' - BEFORE!", domain)
809             domain = domain.encode("idna").decode("utf-8")
810             logger.debug("domain='%s' - AFTER!", domain)
811
812             if not domain_helper.is_wanted(domain):
813                 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
814                 continue
815             elif domain in domains:
816                 logger.debug("domain='%s' is already added - SKIPPED!", domain)
817                 continue
818             elif instances.is_registered(domain):
819                 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
820                 continue
821             elif instances.is_recent(domain):
822                 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
823                 continue
824
825             logger.debug("Adding domain='%s'", domain)
826             domains.append(domain)
827
828     logger.debug("domains()=%d", len(domains))
829     if len(domains) > 0:
830         logger.info("Adding %d new instances ...", len(domains))
831         for domain in domains:
832             logger.debug("domain='%s'", domain)
833             try:
834                 logger.info("Fetching instances from domain='%s' ...", domain)
835                 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
836             except network.exceptions as exception:
837                 logger.warning("Exception '%s' during fetching instances (fetch_fba_rss) from domain='%s'", type(exception), domain)
838                 instances.set_last_error(domain, exception)
839                 return 100
840
841     logger.debug("Success! - EXIT!")
842     return 0
843
844 def fetch_fbabot_atom(args: argparse.Namespace) -> int:
845     logger.debug("args[]='%s' - CALLED!", type(args))
846
847     logger.debug("Invoking locking.acquire() ...")
848     locking.acquire()
849
850     source_domain = "ryona.agency"
851     feed = f"https://{source_domain}/users/fba/feed.atom"
852
853     logger.debug("args.feed[%s]='%s'", type(args.feed), args.feed)
854     if args.feed is not None and validators.url(args.feed):
855         logger.debug("Setting feed='%s' ...", args.feed)
856         feed = str(args.feed)
857         source_domain = urlparse(args.feed).netloc
858
859     if sources.is_recent(source_domain):
860         logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
861         return 1
862     else:
863         logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
864         sources.update(source_domain)
865
866     domains = list()
867
868     logger.info("Fetching ATOM feed='%s' from FBA bot account ...", feed)
869     response = utils.fetch_url(feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
870
871     logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
872     if response.ok and response.status_code == 200 and len(response.text) > 0:
873         logger.debug("Parsing ATOM feed (%d Bytes) ...", len(response.text))
874         atom = atoma.parse_atom_bytes(response.content)
875
876         logger.debug("atom[]='%s'", type(atom))
877         for entry in atom.entries:
878             logger.debug("entry[]='%s'", type(entry))
879             doc = bs4.BeautifulSoup(entry.content.value, "html.parser")
880             logger.debug("doc[]='%s'", type(doc))
881             for element in doc.findAll("a"):
882                 logger.debug("element[]='%s'", type(element))
883                 for href in element["href"].split(","):
884                     logger.debug("href[%s]='%s' - BEFORE!", type(href), href)
885                     domain = tidyup.domain(href) if href not in [None, ""] else None
886
887                     logger.debug("domain='%s' - AFTER!", domain)
888                     if domain is None or domain == "":
889                         logger.debug("domain='%s' is empty after tidyup.domain() - SKIPPED!", domain)
890                         continue
891
892                     logger.debug("domain='%s' - BEFORE!", domain)
893                     domain = domain.encode("idna").decode("utf-8")
894                     logger.debug("domain='%s' - AFTER!", domain)
895
896                     if not domain_helper.is_wanted(domain):
897                         logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
898                         continue
899                     elif domain in domains:
900                         logger.debug("domain='%s' is already added - SKIPPED!", domain)
901                         continue
902                     elif instances.is_registered(domain):
903                         logger.debug("domain='%s' is already registered - SKIPPED!", domain)
904                         continue
905                     elif instances.is_recent(domain):
906                         logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
907                         continue
908
909                     logger.debug("Adding domain='%s',domains()=%d", domain, len(domains))
910                     domains.append(domain)
911
912     logger.debug("domains()=%d", len(domains))
913     if len(domains) > 0:
914         logger.info("Adding %d new instances ...", len(domains))
915         for domain in domains:
916             logger.debug("domain='%s'", domain)
917             try:
918                 logger.info("Fetching instances from domain='%s' ...", domain)
919                 federation.fetch_instances(domain, source_domain, None, inspect.currentframe().f_code.co_name)
920             except network.exceptions as exception:
921                 logger.warning("Exception '%s' during fetching instances (fetch_fbabot_atom) from domain='%s'", type(exception), domain)
922                 instances.set_last_error(domain, exception)
923                 return 100
924
925     logger.debug("Success! - EXIT!")
926     return 0
927
928 def fetch_instances(args: argparse.Namespace) -> int:
929     logger.debug("args[]='%s' - CALLED!", type(args))
930
931     logger.debug("args.domain='%s' - checking ...", args.domain)
932     if not validators.domain(args.domain):
933         logger.warning("args.domain='%s' is not valid.", args.domain)
934         return 100
935     elif blacklist.is_blacklisted(args.domain):
936         logger.warning("args.domain='%s' is blacklisted, won't check it!", args.domain)
937         return 101
938
939     logger.debug("Invoking locking.acquire() ...")
940     locking.acquire()
941
942     # Initialize values
943     domain = tidyup.domain(args.domain)
944     origin = software = None
945
946     # Fetch record
947     database.cursor.execute("SELECT origin, software FROM instances WHERE domain = ? LIMIT 1", [args.domain])
948     row = database.cursor.fetchone()
949     if row is not None:
950         origin = row["origin"]
951         software = row["software"]
952
953     if software_helper.is_relay(software):
954         logger.warning("args.domain='%s' is of software type '%s' which is not supported by this command. Please invoke fetch_relays instead.", args.domain, software)
955         return 102
956
957     # Initial fetch
958     try:
959         logger.info("Fetching instances from args.domain='%s',origin='%s',software='%s' ...", domain, origin, software)
960         federation.fetch_instances(domain, origin, software, inspect.currentframe().f_code.co_name)
961     except network.exceptions as exception:
962         logger.warning("Exception '%s' during fetching instances (fetch_instances) from args.domain='%s'", type(exception), args.domain)
963         instances.set_last_error(args.domain, exception)
964         instances.update(args.domain)
965         return 100
966
967     if args.single:
968         logger.debug("Not fetching more instances - EXIT!")
969         return 0
970
971     # Loop through some instances
972     database.cursor.execute(
973         "SELECT domain, origin, software, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'lemmy', 'peertube', 'takahe', 'gotosocial', 'brighteon', 'wildebeest', 'bookwyrm', 'mitra', 'areionskey', 'mammuthus', 'neodb') AND (last_instance_fetch IS NULL OR last_instance_fetch < ?) ORDER BY total_peers DESC, last_response_time ASC, last_updated ASC", [time.time() - config.get("recheck_instance")]
974     )
975
976     rows = database.cursor.fetchall()
977     logger.info("Checking %d entries ...", len(rows))
978     for row in rows:
979         logger.debug("row[domain]='%s'", row["domain"])
980         if row["domain"] == "":
981             logger.debug("row[domain] is empty - SKIPPED!")
982             continue
983
984         logger.debug("row[domain]='%s' - BEFORE!", row["domain"])
985         domain = row["domain"].encode("idna").decode("utf-8")
986         logger.debug("domain='%s' - AFTER!", domain)
987
988         if not domain_helper.is_wanted(domain):
989             logger.debug("Domain domain='%s' is not wanted - SKIPPED!", domain)
990             continue
991
992         try:
993             logger.info("Fetching instances for domain='%s',origin='%s',software='%s',nodeinfo_url='%s'", domain, row["origin"], row["software"], row["nodeinfo_url"])
994             federation.fetch_instances(domain, row["origin"], row["software"], inspect.currentframe().f_code.co_name, row["nodeinfo_url"])
995         except network.exceptions as exception:
996             logger.warning("Exception '%s' during fetching instances (fetch_instances) from domain='%s'", type(exception), domain)
997             instances.set_last_error(domain, exception)
998
999     logger.debug("Success - EXIT!")
1000     return 0
1001
1002 def fetch_csv(args: argparse.Namespace) -> int:
1003     logger.debug("args[]='%s' - CALLED!", type(args))
1004
1005     logger.debug("Invoking locking.acquire() ...")
1006     locking.acquire()
1007
1008     logger.info("Checking %d CSV files ...", len(blocklists.csv_files))
1009     for block in blocklists.csv_files:
1010         logger.debug("block[blocker]='%s',block[csv_url]='%s'", block["blocker"], block["csv_url"])
1011
1012         # Is domain given and not equal blocker?
1013         if isinstance(args.domain, str) and args.domain != block["blocker"]:
1014             logger.debug("Skipping blocker='%s', not matching args.domain='%s'", block["blocker"], args.domain)
1015             continue
1016
1017         logger.debug("Invoking processing.csv_block(%s, %s, fetch_csv) ...", block["blocker"], block["csv_url"])
1018         processing.csv_block(block["blocker"], block["csv_url"], inspect.currentframe().f_code.co_name)
1019
1020     logger.debug("Success - EXIT!")
1021     return 0
1022
1023 def fetch_oliphant(args: argparse.Namespace) -> int:
1024     logger.debug("args[]='%s' - CALLED!", type(args))
1025
1026     logger.debug("Invoking locking.acquire() ...")
1027     locking.acquire()
1028
1029     source_domain = "codeberg.org"
1030     if sources.is_recent(source_domain):
1031         logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1032         return 1
1033     else:
1034         logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1035         sources.update(source_domain)
1036
1037     # Base URL
1038     base_url = f"https://{source_domain}/oliphant/blocklists/raw/branch/main/blocklists"
1039
1040     logger.debug("Downloading %d files ...", len(blocklists.oliphant_blocklists))
1041     for block in blocklists.oliphant_blocklists:
1042         # Is domain given and not equal blocker?
1043         logger.debug("block[blocker]='%s',block[csv_url]='%s'", block["blocker"], block["csv_url"])
1044         if isinstance(args.domain, str) and args.domain != block["blocker"]:
1045             logger.debug("Skipping blocker='%s', not matching args.domain='%s'", block["blocker"], args.domain)
1046             continue
1047
1048         url = f"{base_url}/{block['csv_url']}"
1049
1050         logger.debug("Invoking processing.csv_block(%s, %s, fetch_oliphant) ...", block["blocker"], url)
1051         processing.csv_block(block["blocker"], url, inspect.currentframe().f_code.co_name)
1052
1053     logger.debug("Success! - EXIT!")
1054     return 0
1055
1056 def fetch_txt(args: argparse.Namespace) -> int:
1057     logger.debug("args[]='%s' - CALLED!", type(args))
1058
1059     logger.debug("Invoking locking.acquire() ...")
1060     locking.acquire()
1061
1062     logger.info("Checking %d text file(s) ...", len(blocklists.txt_files))
1063     for row in blocklists.txt_files:
1064         logger.debug("Fetching row[url]='%s' ...", row["url"])
1065         response = utils.fetch_url(row["url"], network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
1066
1067         logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
1068         if response.ok and response.status_code == 200 and response.text != "":
1069             logger.debug("Returned %d Bytes for processing", len(response.text.strip()))
1070             domains = response.text.strip().split("\n")
1071
1072             logger.info("Processing %d domains ...", len(domains))
1073             for domain in domains:
1074                 logger.debug("domain='%s' - BEFORE!", domain)
1075                 domain = tidyup.domain(domain) if domain not in[None, ""] else None
1076
1077                 logger.debug("domain='%s' - AFTER!", domain)
1078                 if domain is None or domain == "":
1079                     logger.debug("domain='%s' is empty after tidyup.domain() - SKIPPED!", domain)
1080                     continue
1081                 elif not domain_helper.is_wanted(domain):
1082                     logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1083                     continue
1084                 elif instances.is_recent(domain):
1085                     logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1086                     continue
1087
1088                 logger.debug("Processing domain='%s',row[blocker]='%s'", domain, row["blocker"])
1089                 processed = processing.instance(domain, row["blocker"], inspect.currentframe().f_code.co_name)
1090
1091                 logger.debug("processed='%s'", processed)
1092                 if not processed:
1093                     logger.debug("domain='%s' was not generically processed - SKIPPED!", domain)
1094                     continue
1095
1096     logger.debug("Success! - EXIT!")
1097     return 0
1098
1099 def fetch_fedipact(args: argparse.Namespace) -> int:
1100     logger.debug("args[]='%s' - CALLED!", type(args))
1101
1102     logger.debug("Invoking locking.acquire() ...")
1103     locking.acquire()
1104
1105     source_domain = "fedipact.online"
1106     if sources.is_recent(source_domain):
1107         logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1108         return 1
1109     else:
1110         logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1111         sources.update(source_domain)
1112
1113     logger.info("Fetching / from source_domain='%s' ...", source_domain)
1114     response = utils.fetch_url(
1115         f"https://{source_domain}",
1116         network.web_headers,
1117         (config.get("connection_timeout"), config.get("read_timeout"))
1118     )
1119
1120     logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
1121     if response.ok and response.status_code == 200 and response.text != "":
1122         logger.debug("Parsing %d Bytes ...", len(response.text))
1123
1124         doc = bs4.BeautifulSoup(response.text, "html.parser")
1125         logger.debug("doc[]='%s'", type(doc))
1126
1127         rows = doc.findAll("li")
1128         logger.info("Checking %d row(s) ...", len(rows))
1129         for row in rows:
1130             logger.debug("row[]='%s'", type(row))
1131             domain = tidyup.domain(row.contents[0]) if row.contents[0] not in [None, ""] else None
1132
1133             logger.debug("domain='%s' - AFTER!", domain)
1134             if domain is None or domain == "":
1135                 logger.debug("domain='%s' is empty after tidyup.domain() - SKIPPED!", domain)
1136                 continue
1137
1138             logger.debug("domain='%s' - BEFORE!", domain)
1139             domain = domain.encode("idna").decode("utf-8")
1140             logger.debug("domain='%s' - AFTER!", domain)
1141
1142             if not domain_helper.is_wanted(domain):
1143                 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1144                 continue
1145             elif instances.is_registered(domain):
1146                 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
1147                 continue
1148             elif instances.is_recent(domain):
1149                 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1150                 continue
1151
1152             logger.info("Fetching domain='%s' ...", domain)
1153             federation.fetch_instances(domain, "beach.city", None, inspect.currentframe().f_code.co_name)
1154
1155     logger.debug("Success! - EXIT!")
1156     return 0
1157
1158 def fetch_joinmobilizon(args: argparse.Namespace) -> int:
1159     logger.debug("args[]='%s' - CALLED!", type(args))
1160
1161     logger.debug("Invoking locking.acquire() ...")
1162     locking.acquire()
1163
1164     source_domain = "instances.joinmobilizon.org"
1165     if sources.is_recent(source_domain):
1166         logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1167         return 1
1168     else:
1169         logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1170         sources.update(source_domain)
1171
1172     logger.info("Fetching instances from source_domain='%s' ...", source_domain)
1173     raw = utils.fetch_url(
1174         f"https://{source_domain}/api/v1/instances",
1175         network.web_headers,
1176         (config.get("connection_timeout"), config.get("read_timeout"))
1177     ).text
1178     logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
1179
1180     parsed = json.loads(raw)
1181     logger.debug("parsed[%s]()=%d", type(parsed), len(parsed))
1182
1183     if "data" not in parsed:
1184         logger.warning("parsed()=%d does not contain key 'data'")
1185         return 1
1186
1187     logger.info("Checking %d instances ...", len(parsed["data"]))
1188     for row in parsed["data"]:
1189         logger.debug("row[]='%s'", type(row))
1190         if "host" not in row:
1191             logger.warning("row='%s' does not contain key 'host' - SKIPPED!", row)
1192             continue
1193         elif not domain_helper.is_wanted(row["host"]):
1194             logger.debug("row[host]='%s' is not wanted - SKIPPED!", row["host"])
1195             continue
1196         elif instances.is_registered(row["host"]):
1197             logger.debug("row[host]='%s' is already registered - SKIPPED!", row["host"])
1198             continue
1199
1200         logger.info("Fetching row[host]='%s' ...", row["host"])
1201         federation.fetch_instances(row["host"], "demo.mobilizon.org", None, inspect.currentframe().f_code.co_name)
1202
1203     logger.debug("Success! - EXIT!")
1204     return 0
1205
1206 def fetch_joinmisskey(args: argparse.Namespace) -> int:
1207     logger.debug("args[]='%s' - CALLED!", type(args))
1208
1209     logger.debug("Invoking locking.acquire() ...")
1210     locking.acquire()
1211
1212     source_domain = "instanceapp.misskey.page"
1213     if sources.is_recent(source_domain):
1214         logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1215         return 1
1216     else:
1217         logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1218         sources.update(source_domain)
1219
1220     logger.info("Fetching instances.json from source_domain='%s' ...", source_domain)
1221     raw = utils.fetch_url(
1222         f"https://{source_domain}/instances.json",
1223         network.web_headers,
1224         (config.get("connection_timeout"), config.get("read_timeout"))
1225     ).text
1226     logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
1227
1228     parsed = json.loads(raw)
1229     logger.debug("parsed[%s]()=%d", type(parsed), len(parsed))
1230
1231     if "instancesInfos" not in parsed:
1232         logger.warning("parsed()=%d does not contain element 'instancesInfos'")
1233         return 1
1234
1235     logger.info("Checking %d instane(s) ...", len(parsed["instancesInfos"]))
1236     for row in parsed["instancesInfos"]:
1237         logger.debug("row[%s]='%s'", type(row), row)
1238         if "url" not in row:
1239             logger.warning("row()=%d does not have element 'url' - SKIPPED!", len(row))
1240             continue
1241         elif not domain_helper.is_wanted(row["url"]):
1242             logger.debug("row[url]='%s' is not wanted - SKIPPED!", row["url"])
1243             continue
1244         elif instances.is_registered(row["url"]):
1245             logger.debug("row[url]='%s' is already registered - SKIPPED!", row["url"])
1246             continue
1247
1248         logger.info("Fetching row[url]='%s' ...", row["url"])
1249         federation.fetch_instances(row["url"], "misskey.io", None, inspect.currentframe().f_code.co_name)
1250
1251     logger.debug("Success! - EXIT!")
1252     return 0
1253
1254 def recheck_obfuscation(args: argparse.Namespace) -> int:
1255     logger.debug("args[]='%s' - CALLED!", type(args))
1256
1257     logger.debug("Invoking locking.acquire() ...")
1258     locking.acquire()
1259
1260     if isinstance(args.domain, str) and args.domain != "" and domain_helper.is_wanted(args.domain):
1261         database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1 AND domain = ?", [args.domain])
1262     elif isinstance(args.software, str) and args.software != "" and validators.domain(args.software) == args.software:
1263         database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1 AND software = ?", [args.software])
1264     else:
1265         database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1")
1266
1267     rows = database.cursor.fetchall()
1268     logger.info("Checking %d domains ...", len(rows))
1269     for row in rows:
1270         logger.debug("Fetching peers from domain='%s',software='%s',nodeinfo_url='%s' ...", row["domain"], row["software"], row["nodeinfo_url"])
1271         if (args.force is None or not args.force) and args.domain is None and args.software is None and instances.is_recent(row["domain"], "last_blocked"):
1272             logger.debug("row[domain]='%s' has been recently checked, args.force[]='%s' - SKIPPED!", row["domain"], type(args.force))
1273             continue
1274
1275         logger.debug("Invoking federation.fetch_blocks(%s) ...", row["domain"])
1276         blocking = federation.fetch_blocks(row["domain"])
1277
1278         logger.debug("blocking()=%d", len(blocking))
1279         if len(blocking) == 0:
1280             if row["software"] == "pleroma":
1281                 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1282                 blocking = pleroma.fetch_blocks(row["domain"])
1283             elif row["software"] == "mastodon":
1284                 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1285                 blocking = mastodon.fetch_blocks(row["domain"])
1286             elif row["software"] == "lemmy":
1287                 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1288                 blocking = lemmy.fetch_blocks(row["domain"])
1289             elif row["software"] == "friendica":
1290                 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1291                 blocking = friendica.fetch_blocks(row["domain"])
1292             elif row["software"] == "misskey":
1293                 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1294                 blocking = misskey.fetch_blocks(row["domain"])
1295             else:
1296                 logger.warning("Unknown software: domain='%s',software='%s'", row["domain"], row["software"])
1297
1298         # c.s isn't part of oliphant's "hidden" blocklists
1299         logger.debug("row[domain]='%s'", row["domain"])
1300         if row["domain"] != "chaos.social" and not software_helper.is_relay(row["software"]) and not blocklists.has(row["domain"]):
1301             logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", row["domain"], len(blocking))
1302             instances.set_last_blocked(row["domain"])
1303             instances.set_total_blocks(row["domain"], blocking)
1304
1305         obfuscated = 0
1306         blockdict = list()
1307
1308         logger.info("Checking %d block(s) from domain='%s' ...", len(blocking), row["domain"])
1309         for block in blocking:
1310             logger.debug("block[blocked]='%s'", block["blocked"])
1311             blocked = None
1312
1313             if block["blocked"] == "":
1314                 logger.debug("block[blocked] is empty - SKIPPED!")
1315                 continue
1316             elif block["blocked"].endswith(".arpa"):
1317                 logger.debug("blocked='%s' is a reversed IP address - SKIPPED!", block["blocked"])
1318                 continue
1319             elif block["blocked"].endswith(".tld"):
1320                 logger.debug("blocked='%s' is a fake domain name - SKIPPED!", block["blocked"])
1321                 continue
1322             elif block["blocked"].endswith(".onion"):
1323                 logger.debug("blocked='%s' is a TOR onion domain name - SKIPPED!", block["blocked"])
1324                 continue
1325             elif block["blocked"].find("*") >= 0 or block["blocked"].find("?") >= 0:
1326                 logger.debug("block='%s' is obfuscated.", block["blocked"])
1327                 obfuscated = obfuscated + 1
1328                 blocked = utils.deobfuscate(block["blocked"], row["domain"], block["digest"] if "digest" in block else None)
1329             elif not domain_helper.is_wanted(block["blocked"]):
1330                 logger.debug("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
1331                 continue
1332             elif blocks.is_instance_blocked(row["domain"], block["blocked"]):
1333                 logger.debug("blocked='%s' is already blocked - SKIPPED!", block["blocked"])
1334                 continue
1335
1336             logger.debug("blocked[%s]='%s',block[blocked]='%s'", type(blocked), blocked, block["blocked"])
1337             if blocked is not None and blocked != block["blocked"]:
1338                 logger.debug("blocked='%s' was deobfuscated to blocked='%s'", block["blocked"], blocked)
1339                 obfuscated = obfuscated - 1
1340
1341                 if blacklist.is_blacklisted(blocked):
1342                     logger.debug("blocked='%s' is blacklisted - SKIPPED!", blocked)
1343                     continue
1344                 elif blacklist.is_blacklisted(row["domain"]):
1345                     logger.debug("row[domain]='%s' is blacklisted - SKIPPED!", row["domain"])
1346                     continue
1347                 elif blocks.is_instance_blocked(row["domain"], blocked):
1348                     logger.debug("blocked='%s' is already blocked by domain='%s' - SKIPPED!", blocked, row["domain"])
1349                     continue
1350
1351                 block["block_level"] = blocks.alias_block_level(block["block_level"])
1352
1353                 logger.info("blocked='%s' has been deobfuscated to blocked='%s', adding ...", block["blocked"], blocked)
1354                 if processing.block(row["domain"], blocked, block["reason"], block["block_level"]) and block["block_level"] == "reject" and config.get("bot_enabled"):
1355                     logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], row["domain"])
1356                     blockdict.append({
1357                         "blocked": blocked,
1358                         "reason" : block["reason"],
1359                     })
1360
1361         logger.debug("Setting obfuscated=%d for row[domain]='%s' ...", obfuscated, row["domain"])
1362         instances.set_obfuscated_blocks(row["domain"], obfuscated)
1363
1364         logger.info("domain='%s' has %d obfuscated domain(s)", row["domain"], obfuscated)
1365         if instances.has_pending(row["domain"]):
1366             logger.debug("Flushing updates for blocker='%s' ...", row["domain"])
1367             instances.update(row["domain"])
1368
1369         logger.debug("Invoking commit() ...")
1370         database.connection.commit()
1371
1372         logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
1373         if config.get("bot_enabled") and len(blockdict) > 0:
1374             logger.info("Sending bot POST for blocker='%s,blockdict()=%d ...", row["domain"], len(blockdict))
1375             network.send_bot_post(row["domain"], blockdict)
1376
1377     logger.debug("Success! - EXIT!")
1378     return 0
1379
1380 def fetch_fedilist(args: argparse.Namespace) -> int:
1381     logger.debug("args[]='%s' - CALLED!", type(args))
1382
1383     logger.debug("Invoking locking.acquire() ...")
1384     locking.acquire()
1385
1386     source_domain = "demo.fedilist.com"
1387     if sources.is_recent(source_domain):
1388         logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1389         return 1
1390     else:
1391         logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1392         sources.update(source_domain)
1393
1394     url = f"http://{source_domain}/instance/csv?onion=not"
1395     if args.software is not None and args.software != "":
1396         logger.debug("args.software='%s'", args.software)
1397         url = f"http://{source_domain}/instance/csv?software={args.software}&onion=not"
1398
1399     logger.info("Fetching url='%s' ...", url)
1400     response = reqto.get(
1401         url,
1402         headers=network.web_headers,
1403         timeout=(config.get("connection_timeout"), config.get("read_timeout")),
1404         allow_redirects=False
1405     )
1406
1407     logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
1408     if not response.ok or response.status_code > 200 or len(response.content) == 0:
1409         logger.warning("Failed fetching url='%s': response.ok='%s',response.status_code=%d,response.content()=%d - EXIT!", url, response.ok, response.status_code, len(response.text))
1410         return 1
1411
1412     reader = csv.DictReader(response.content.decode("utf-8").splitlines(), dialect="unix")
1413
1414     logger.debug("reader[]='%s'", type(reader))
1415     if reader is None:
1416         logger.warning("Failed parsing response.content()=%d as CSV content", len(response.content))
1417         return 2
1418
1419     rows = list(reader)
1420
1421     logger.info("Checking %d rows ...", len(rows))
1422     for row in rows:
1423         logger.debug("row[]='%s'", type(row))
1424         if "hostname" not in row:
1425             logger.warning("row()=%d has no element 'hostname' - SKIPPED!", len(row))
1426             continue
1427
1428         logger.debug("row[hostname]='%s' - BEFORE!", row["hostname"])
1429         domain = tidyup.domain(row["hostname"]) if row["hostname"] not in [None, ""] else None
1430         logger.debug("domain='%s' - AFTER!", domain)
1431
1432         if domain is None or domain == "":
1433             logger.debug("domain='%s' is empty after tidyup.domain(): row[hostname]='%s' - SKIPPED!", domain, row["hostname"])
1434             continue
1435
1436         logger.debug("domain='%s' - BEFORE!", domain)
1437         domain = domain.encode("idna").decode("utf-8")
1438         logger.debug("domain='%s' - AFTER!", domain)
1439
1440         if not domain_helper.is_wanted(domain):
1441             logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1442             continue
1443         elif (args.force is None or not args.force) and instances.is_registered(domain):
1444             logger.debug("domain='%s' is already registered, --force not specified: args.force[]='%s'", domain, type(args.force))
1445             continue
1446         elif instances.is_recent(domain):
1447             logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1448             continue
1449
1450         logger.info("Fetching instances from domain='%s' ...", domain)
1451         federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
1452
1453     logger.debug("Success! - EXIT!")
1454     return 0
1455
1456 def update_nodeinfo(args: argparse.Namespace) -> int:
1457     logger.debug("args[]='%s' - CALLED!", type(args))
1458
1459     logger.debug("Invoking locking.acquire() ...")
1460     locking.acquire()
1461
1462     if args.domain is not None and args.domain != "":
1463         logger.debug("Fetching args.domain='%s'", args.domain)
1464         database.cursor.execute("SELECT domain, software FROM instances WHERE domain = ? LIMIT 1", [args.domain])
1465     elif args.software is not None and args.software != "":
1466         logger.info("Fetching domains for args.software='%s'", args.software)
1467         database.cursor.execute("SELECT domain, software FROM instances WHERE software = ? ORDER BY last_updated ASC", [args.software])
1468     elif args.mode is not None and args.mode != "":
1469         logger.info("Fetching domains for args.mode='%s'", args.mode.upper())
1470         database.cursor.execute("SELECT domain, software FROM instances WHERE detection_mode = ? ORDER BY last_updated ASC", [args.mode])
1471     elif args.no_software:
1472         logger.info("Fetching domains with no software type detected ...")
1473         database.cursor.execute("SELECT domain, software FROM instances WHERE software IS NULL ORDER BY last_updated ASC")
1474     elif args.no_auto:
1475         logger.info("Fetching domains with other detection mode than AUTO_DISOVERY being set ...")
1476         database.cursor.execute("SELECT domain, software FROM instances WHERE detection_mode IS NOT NULL AND detection_mode != 'AUTO_DISCOVERY' ORDER BY last_updated ASC")
1477     elif args.no_detection:
1478         logger.info("Fetching domains with no detection mode being set ...")
1479         database.cursor.execute("SELECT domain, software FROM instances WHERE detection_mode IS NULL ORDER BY last_updated ASC")
1480     else:
1481         logger.info("Fetching domains for recently updated ...")
1482         database.cursor.execute("SELECT domain, software FROM instances ORDER BY last_updated ASC")
1483
1484     domains = database.cursor.fetchall()
1485
1486     logger.info("Checking %d domain(s) ...", len(domains))
1487     cnt = 0
1488     for row in domains:
1489         logger.debug("row[]='%s'", type(row))
1490         if not args.force and instances.is_recent(row["domain"], "last_nodeinfo"):
1491             logger.debug("row[domain]='%s' has been recently checked - SKIPPED!", row["domain"])
1492             continue
1493
1494         try:
1495             logger.info("Checking nodeinfo for row[domain]='%s',row[software]='%s' (%s%%) ...", row["domain"], row["software"], "{:5.1f}".format(cnt / len(domains) * 100))
1496             software = federation.determine_software(row["domain"])
1497
1498             logger.debug("Determined software='%s'", software)
1499             if (software != row["software"] and software is not None) or args.force is True:
1500                 logger.debug("software='%s'", software)
1501                 if software is None:
1502                     logger.debug("Setting nodeinfo_url to 'None' for row[domain]='%s' ...", row["domain"])
1503                     instances.set_nodeinfo_url(row["domain"], None)
1504
1505                 logger.warning("Software type for row[domain]='%s' has changed from '%s' to '%s'!", row["domain"], row["software"], software)
1506                 instances.set_software(row["domain"], software)
1507
1508             if software is not None:
1509                 logger.debug("Setting row[domain]='%s' as successfully determined ...", row["domain"])
1510                 instances.set_success(row["domain"])
1511         except network.exceptions as exception:
1512             logger.warning("Exception '%s' during updating nodeinfo for row[domain]='%s'", type(exception), row["domain"])
1513             instances.set_last_error(row["domain"], exception)
1514
1515         instances.set_last_nodeinfo(row["domain"])
1516         instances.update(row["domain"])
1517         cnt = cnt + 1
1518
1519     logger.debug("Success! - EXIT!")
1520     return 0
1521
1522 def fetch_instances_social(args: argparse.Namespace) -> int:
1523     logger.debug("args[]='%s' - CALLED!", type(args))
1524
1525     logger.debug("Invoking locking.acquire() ...")
1526     locking.acquire()
1527
1528     source_domain = "instances.social"
1529
1530     if config.get("instances_social_api_key") == "":
1531         logger.error("API key not set. Please set in your config.json file.")
1532         return 1
1533     elif sources.is_recent(source_domain):
1534         logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1535         return 2
1536     else:
1537         logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1538         sources.update(source_domain)
1539
1540     headers = {
1541         "Authorization": f"Bearer {config.get('instances_social_api_key')}",
1542     }
1543
1544     logger.info("Fetching list from source_domain='%s' ...", source_domain)
1545     fetched = network.get_json_api(
1546         source_domain,
1547         "/api/1.0/instances/list?count=0&sort_by=name",
1548         headers,
1549         (config.get("connection_timeout"), config.get("read_timeout"))
1550     )
1551     logger.debug("fetched[]='%s'", type(fetched))
1552
1553     if "error_message" in fetched:
1554         logger.warning("Error during fetching API result: '%s' - EXIT!", fetched["error_message"])
1555         return 2
1556     elif "exception" in fetched:
1557         logger.warning("Exception '%s' during fetching API result - EXIT!", type(fetched["exception"]))
1558         return 3
1559     elif "json" not in fetched:
1560         logger.warning("fetched has no element 'json' - EXIT!")
1561         return 4
1562     elif "instances" not in fetched["json"]:
1563         logger.warning("fetched[row] has no element 'instances' - EXIT!")
1564         return 5
1565
1566     domains = list()
1567     rows = fetched["json"]["instances"]
1568
1569     logger.info("Checking %d row(s) ...", len(rows))
1570     for row in rows:
1571         logger.debug("row[]='%s'", type(row))
1572         domain = tidyup.domain(row["name"]) if row["name"] not in [None, ""] else None
1573         logger.debug("domain='%s' - AFTER!", domain)
1574
1575         if domain is None and domain == "":
1576             logger.debug("domain='%s' is empty after tidyup.domain() - SKIPPED!", domain)
1577             continue
1578
1579         logger.debug("domain='%s' - BEFORE!", domain)
1580         domain = domain.encode("idna").decode("utf-8")
1581         logger.debug("domain='%s' - AFTER!", domain)
1582
1583         if not domain_helper.is_wanted(domain):
1584             logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1585             continue
1586         elif domain in domains:
1587             logger.debug("domain='%s' is already added - SKIPPED!", domain)
1588             continue
1589         elif instances.is_registered(domain):
1590             logger.debug("domain='%s' is already registered - SKIPPED!", domain)
1591             continue
1592         elif instances.is_recent(domain):
1593             logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1594             continue
1595
1596         logger.info("Fetching instances from domain='%s'", domain)
1597         federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
1598
1599     logger.debug("Success! - EXIT!")
1600     return 0
1601
1602 def fetch_relays(args: argparse.Namespace) -> int:
1603     logger.debug("args[]='%s' - CALLED!", type(args))
1604
1605     logger.debug("Invoking locking.acquire() ...")
1606     locking.acquire()
1607
1608     if args.domain is not None and args.domain != "":
1609         database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE software IN ('activityrelay', 'aoderelay', 'selective-relay', 'pub-relay') AND domain = ? LIMIT 1", [args.domain])
1610     elif args.software is not None and args.software != "":
1611         database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE software IN ('activityrelay', 'aoderelay', 'selective-relay', 'pub-relay') AND software = ?", [args.software])
1612     else:
1613         database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE software IN ('activityrelay', 'aoderelay', 'selective-relay', 'pub-relay')")
1614
1615     domains = list()
1616     rows = database.cursor.fetchall()
1617
1618     logger.info("Checking %d relays ...", len(rows))
1619     for row in rows:
1620         logger.debug("row[domain]='%s',row[software]='%s' ...", row["domain"], row["software"])
1621         peers = list()
1622         if not args.force and instances.is_recent(row["domain"]):
1623             logger.debug("row[domain]='%s' has been recently fetched - SKIPPED!", row["domain"])
1624             continue
1625
1626         try:
1627             if row["software"] == "pub-relay":
1628                 logger.info("Fetching row[nodeinfo_url]='%s' from relay row[domain]='%s',row[software]='%s' ...", row["nodeinfo_url"], row["domain"], row["software"])
1629                 raw = network.fetch_api_url(
1630                     row["nodeinfo_url"],
1631                     (config.get("connection_timeout"), config.get("read_timeout"))
1632                 )
1633
1634                 logger.debug("raw[%s]()=%d", type(raw), len(raw))
1635                 if "exception" in raw:
1636                     logger.warning("row[domain]='%s' has caused an exception: '%s' - raising again ...", row["domain"], type(raw["exception"]))
1637                     raise raw["exception"]
1638                 elif "error_message" in raw:
1639                     logger.warning("row[domain]='%s' has caused error message: '%s' - SKIPPED!", row["domain"], raw["error_message"])
1640                     instances.set_last_error(row["domain"], raw)
1641                     instances.set_last_instance_fetch(row["domain"])
1642                     instances.update(row["domain"])
1643                     continue
1644                 elif "json" not in raw:
1645                     logger.warning("raw()=%d does not contain key 'json' in response - SKIPPED!", len(raw))
1646                     continue
1647                 elif not "metadata" in raw["json"]:
1648                     logger.warning("raw[json]()=%d does not contain key 'json' in response - SKIPPED!", len(raw["json"]))
1649                     continue
1650                 elif not "peers" in raw["json"]["metadata"]:
1651                     logger.warning("raw[json][metadata()=%d does not contain key 'json' in response - SKIPPED!", len(raw["json"]["metadata"]))
1652                     continue
1653             else:
1654                 logger.info("Fetching / from relay row[domain]='%s',row[software]='%s' ...", row["domain"], row["software"])
1655                 raw = utils.fetch_url(
1656                     f"https://{row['domain']}",
1657                     network.web_headers,
1658                     (config.get("connection_timeout"), config.get("read_timeout"))
1659                 ).text
1660                 logger.debug("raw[%s]()=%d", type(raw), len(raw))
1661
1662                 doc = bs4.BeautifulSoup(raw, features="html.parser")
1663                 logger.debug("doc[]='%s'", type(doc))
1664
1665         except network.exceptions as exception:
1666             logger.warning("Exception '%s' during fetching from relay '%s': '%s'", type(exception), row["domain"], str(exception))
1667             instances.set_last_error(row["domain"], exception)
1668             instances.set_last_instance_fetch(row["domain"])
1669             instances.update(row["domain"])
1670             continue
1671
1672         logger.debug("row[software]='%s'", row["software"])
1673         if row["software"] == "activityrelay":
1674             logger.debug("Checking row[domain]='%s' ...", row["domain"])
1675             tags = doc.findAll("p")
1676
1677             logger.debug("Checking %d paragraphs ...", len(tags))
1678             for tag in tags:
1679                 logger.debug("tag[]='%s'", type(tag))
1680                 if len(tag.contents) == 0:
1681                     logger.debug("tag='%s' is an empty tag - SKIPPED!", tag)
1682                     continue
1683                 elif "registered instances" not in tag.contents[0]:
1684                     logger.debug("Skipping paragraph, text not found.")
1685                     continue
1686
1687                 logger.debug("Found tag.contents[0][]='%s'", tag.contents[0])
1688                 for domain in tag.contents:
1689                     logger.debug("domain[%s]='%s'", type(domain), domain)
1690                     if not isinstance(domain, bs4.element.NavigableString) or "registered instances" in domain:
1691                         continue
1692
1693                     domain = str(domain)
1694                     logger.debug("domain='%s'", domain)
1695                     if not domain_helper.is_wanted(domain):
1696                         logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1697                         continue
1698
1699                     logger.debug("domain='%s' - BEFORE!", domain)
1700                     domain = tidyup.domain(domain) if domain not in[None, ""] else None
1701                     logger.debug("domain='%s' - AFTER!", domain)
1702
1703                     if domain is None or domain == "":
1704                         logger.debug("domain='%s' is empty after tidyup.domain() from origin='%s' - SKIPPED!", domain, row["domain"])
1705                         continue
1706                     elif domain not in peers:
1707                         logger.debug("Appending domain='%s' to peers list for relay='%s' ...", domain, row["domain"])
1708                         peers.append(domain)
1709
1710                     if dict_helper.has_key(domains, "domain", domain):
1711                         logger.debug("domain='%s' already added", domain)
1712                         continue
1713
1714                     logger.debug("Appending domain='%s',origin='%s',software='%s' ...", domain, row["domain"], row["software"])
1715                     domains.append({
1716                         "domain": domain,
1717                         "origin": row["domain"],
1718                     })
1719         elif row["software"] in ["aoderelay", "selective-relay"]:
1720             logger.debug("Checking row[domain]='%s' ...", row["domain"])
1721             if row["software"] == "aoderelay":
1722                 tags = doc.findAll("section", {"class": "instance"})
1723             else:
1724                 tags = doc.find("div", {"id": "instances"}).findAll("li")
1725
1726             logger.debug("Checking %d tags ...", len(tags))
1727             for tag in tags:
1728                 logger.debug("tag[]='%s'", type(tag))
1729
1730                 link = tag.find("a")
1731                 logger.debug("link[%s]='%s'", type(link), link)
1732                 if not isinstance(link, bs4.element.Tag):
1733                     logger.warning("tag[%s]='%s' is not type of 'bs4.element.Tag' - SKIPPED!", type(tag), tag)
1734                     continue
1735
1736                 components = urlparse(link.get("href"))
1737                 logger.debug("components(%d)='%s'", len(components), components)
1738                 domain = components.netloc.lower().split(":")[0]
1739
1740                 logger.debug("domain='%s' - BEFORE!", domain)
1741                 domain = tidyup.domain(domain) if domain not in[None, ""] else None
1742                 logger.debug("domain='%s' - AFTER!", domain)
1743
1744                 if domain is None or domain == "":
1745                     logger.debug("domain='%s' is empty after tidyup.domain() from origin='%s' - SKIPPED!", domain, row["domain"])
1746                     continue
1747                 elif domain not in peers:
1748                     logger.debug("Appending domain='%s' to peers list for relay='%s' ...", domain, row["domain"])
1749                     peers.append(domain)
1750
1751                 if dict_helper.has_key(domains, "domain", domain):
1752                     logger.debug("domain='%s' already added", domain)
1753                     continue
1754
1755                 logger.debug("Appending domain='%s',origin='%s',software='%s'", domain, row["domain"], row["software"])
1756                 domains.append({
1757                     "domain": domain,
1758                     "origin": row["domain"],
1759                 })
1760         elif row["software"] == "pub-relay":
1761             logger.debug("Checking %d peer(s) row[domain]='%s' ...", len(raw["json"]["metadata"]["peers"]), row["domain"])
1762             for domain in raw["json"]["metadata"]["peers"]:
1763                 logger.debug("domain='%s' - BEFORE!", domain)
1764                 domain = tidyup.domain(domain) if domain not in[None, ""] else None
1765                 logger.debug("domain='%s' - AFTER!", domain)
1766
1767                 if domain is None or domain == "":
1768                     logger.debug("domain='%s' is empty after tidyup.domain() from origin='%s' - SKIPPED!", domain, row["domain"])
1769                     continue
1770                 elif domain not in peers:
1771                     logger.debug("Appending domain='%s' to peers list for relay='%s' ...", domain, row["domain"])
1772                     peers.append(domain)
1773
1774                 if dict_helper.has_key(domains, "domain", domain):
1775                     logger.debug("domain='%s' already added", domain)
1776                     continue
1777
1778                 logger.debug("Appending domain='%s',origin='%s',software='%s'", domain, row["domain"], row["software"])
1779                 domains.append({
1780                     "domain": domain,
1781                     "origin": row["domain"],
1782                 })
1783         else:
1784             logger.warning("row[domain]='%s',row[software]='%s' is not supported", row["domain"], row["software"])
1785             continue
1786
1787         logger.debug("Updating last_instance_fetch for row[domain]='%s' ...", row["domain"])
1788         instances.set_last_instance_fetch(row["domain"])
1789
1790         logger.info("Relay '%s' has %d peer(s) registered.", row["domain"], len(peers))
1791         instances.set_total_peers(row["domain"], peers)
1792
1793         logger.debug("Flushing data for row[domain]='%s'", row["domain"])
1794         instances.update(row["domain"])
1795
1796     logger.info("Checking %d domains ...", len(domains))
1797     for row in domains:
1798         logger.debug("row[domain]='%s',row[origin]='%s'", row["domain"], row["origin"])
1799         if not domain_helper.is_wanted(row["domain"]):
1800             logger.debug("row[domain]='%s' is not wanted - SKIPPED!", row["domain"])
1801             continue
1802         elif instances.is_registered(row["domain"]):
1803             logger.debug("row[domain]='%s' is already registered - SKIPPED!", row["domain"])
1804             continue
1805
1806         logger.info("Fetching row[domain]='%s',row[origin]='%s' ...", row["domain"], row["origin"])
1807         federation.fetch_instances(row["domain"], row["origin"], None, inspect.currentframe().f_code.co_name)
1808
1809     logger.debug("Success! - EXIT!")
1810     return 0
1811
1812 def convert_idna(args: argparse.Namespace) -> int:
1813     logger.debug("args[]='%s' - CALLED!", type(args))
1814
1815     database.cursor.execute("SELECT domain FROM instances WHERE domain NOT LIKE '%xn--%' ORDER BY domain ASC")
1816     rows = database.cursor.fetchall()
1817
1818     logger.debug("rows[]='%s'", type(rows))
1819     instances.translate_idnas(rows, "domain")
1820
1821     database.cursor.execute("SELECT origin FROM instances WHERE origin NOT LIKE '%xn--%' ORDER BY origin ASC")
1822     rows = database.cursor.fetchall()
1823
1824     logger.debug("rows[]='%s'", type(rows))
1825     instances.translate_idnas(rows, "origin")
1826
1827     database.cursor.execute("SELECT blocker FROM blocks WHERE blocker NOT LIKE '%xn--%' ORDER BY blocker ASC")
1828     rows = database.cursor.fetchall()
1829
1830     logger.debug("rows[]='%s'", type(rows))
1831     blocks.translate_idnas(rows, "blocker")
1832
1833     database.cursor.execute("SELECT blocked FROM blocks WHERE blocked NOT LIKE '%xn--%' ORDER BY blocked ASC")
1834     rows = database.cursor.fetchall()
1835
1836     logger.debug("rows[]='%s'", type(rows))
1837     blocks.translate_idnas(rows, "blocked")
1838
1839     logger.debug("Success! - EXIT!")
1840     return 0
1841
1842 def remove_invalid(args: argparse.Namespace) -> int:
1843     logger.debug("args[]='%s' - CALLED!", type(args))
1844
1845     logger.debug("Invoking locking.acquire() ...")
1846     locking.acquire()
1847
1848     database.cursor.execute("SELECT domain FROM instances ORDER BY domain ASC")
1849     rows = database.cursor.fetchall()
1850
1851     logger.info("Checking %d domains ...", len(rows))
1852     for row in rows:
1853         logger.debug("row[domain]='%s'", row["domain"])
1854         if not validators.domain(row["domain"].split("/")[0]):
1855             logger.info("Invalid row[domain]='%s' found, removing ...", row["domain"])
1856             database.cursor.execute("DELETE FROM blocks WHERE blocker = ? OR blocked = ?", [row["domain"], row["domain"]])
1857             database.cursor.execute("DELETE FROM instances WHERE domain = ? LIMIT 1", [row["domain"]])
1858
1859     logger.debug("Invoking commit() ...")
1860     database.connection.commit()
1861
1862     logger.info("Vaccum cleaning database ...")
1863     database.cursor.execute("VACUUM")
1864
1865     logger.debug("Success! - EXIT!")
1866     return 0