]> git.mxchange.org Git - fba.git/blob - fba/commands.py
Continued:
[fba.git] / fba / commands.py
1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
3 #
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
8 #
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 # GNU Affero General Public License for more details.
13 #
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program.  If not, see <https://www.gnu.org/licenses/>.
16
17 import csv
18 import inspect
19 import json
20 import logging
21 import time
22
23 from urllib.parse import urlparse
24
25 import argparse
26 import atoma
27 import bs4
28 import markdown
29 import reqto
30 import validators
31
32 from fba import database
33 from fba import utils
34
35 from fba.helpers import blacklist
36 from fba.helpers import blocklists
37 from fba.helpers import config
38 from fba.helpers import cookies
39 from fba.helpers import dicts as dict_helper
40 from fba.helpers import domain as domain_helper
41 from fba.helpers import locking
42 from fba.helpers import processing
43 from fba.helpers import software as software_helper
44 from fba.helpers import tidyup
45
46 from fba.http import csrf
47 from fba.http import federation
48 from fba.http import network
49
50 from fba.models import blocks
51 from fba.models import instances
52 from fba.models import sources
53
54 from fba.networks import friendica
55 from fba.networks import lemmy
56 from fba.networks import mastodon
57 from fba.networks import misskey
58 from fba.networks import pleroma
59
60 logging.basicConfig(level=logging.INFO)
61 logger = logging.getLogger(__name__)
62 #logger.setLevel(logging.DEBUG)
63
64 def check_instance(args: argparse.Namespace) -> int:
65     logger.debug("args.domain='%s' - CALLED!", args.domain)
66
67     status = 0
68     if not validators.domain(args.domain):
69         logger.warning("args.domain='%s' is not valid", args.domain)
70         status = 100
71     elif blacklist.is_blacklisted(args.domain):
72         logger.warning("args.domain='%s' is blacklisted", args.domain)
73         status = 101
74     elif instances.is_registered(args.domain):
75         logger.warning("args.domain='%s' is already registered", args.domain)
76         status = 102
77     else:
78         logger.info("args.domain='%s' is not known", args.domain)
79
80     logger.debug("status=%d - EXIT!", status)
81     return status
82
83 def check_nodeinfo(args: argparse.Namespace) -> int:
84     logger.debug("args[]='%s' - CALLED!", type(args))
85
86     # Fetch rows
87     database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE nodeinfo_url IS NOT NULL ORDER BY domain ASC")
88
89     cnt = 0
90     for row in database.cursor.fetchall():
91         logger.debug("Checking row[domain]='%s',row[software]='%s',row[nodeinfo_url]='%s' ...", row["domain"], row["software"], row["nodeinfo_url"])
92         punycode = row["domain"].encode("idna").decode("utf-8")
93
94         if row["nodeinfo_url"].startswith("/"):
95             logger.debug("row[nodeinfo_url]='%s' is a relative URL and always matches", row["nodeinfo_url"])
96             continue
97         elif row["nodeinfo_url"].find(punycode) == -1 and row["nodeinfo_url"].find(row["domain"]) == -1:
98             logger.warning("punycode='%s' is not found in row[nodeinfo_url]='%s',row[software]='%s'", punycode, row["nodeinfo_url"], row["software"])
99             cnt = cnt + 1
100
101     logger.info("Found %d row(s)", cnt)
102
103     logger.debug("EXIT!")
104     return 0
105
106 def fetch_pixelfed_api(args: argparse.Namespace) -> int:
107     logger.debug("args[]='%s' - CALLED!", type(args))
108
109     # No CSRF by default, you don't have to add network.source_headers by yourself here
110     headers = tuple()
111     source_domain = "pixelfed.org"
112
113     if sources.is_recent(source_domain):
114         logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
115         return 1
116     else:
117         logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
118         sources.update(source_domain)
119
120     try:
121         logger.debug("Checking CSRF from source_domain='%s' ...", source_domain)
122         headers = csrf.determine(source_domain, dict())
123     except network.exceptions as exception:
124         logger.warning("Exception '%s' during checking CSRF (fetch_peers,%s) - EXIT!", type(exception), __name__)
125         return list()
126
127     try:
128         logger.info("Fetching JSON from pixelfed.org API, headers()=%d ...", len(headers))
129         fetched = network.get_json_api(
130             source_domain,
131             "/api/v1/servers/all.json?scope=All&country=all&language=all",
132             headers,
133             (config.get("connection_timeout"), config.get("read_timeout"))
134         )
135
136         logger.debug("JSON API returned %d elements", len(fetched))
137         if "error_message" in fetched:
138             logger.warning("API returned error_message='%s' - EXIT!", fetched["error_message"])
139             return 101
140         elif "data" not in fetched["json"]:
141             logger.warning("API did not return JSON with 'data' element - EXIT!")
142             return 102
143
144         rows = fetched["json"]["data"]
145         logger.info("Checking %d fetched rows ...", len(rows))
146         for row in rows:
147             logger.debug("row[]='%s'", type(row))
148             if "domain" not in row:
149                 logger.warning("row='%s' does not contain element 'domain' - SKIPPED!", row)
150                 continue
151             elif row["domain"] == "":
152                 logger.debug("row[domain] is empty - SKIPPED!")
153                 continue
154
155             logger.debug("row[domain]='%s' - BEFORE!", row["domain"])
156             domain = row["domain"].encode("idna").decode("utf-8")
157             logger.debug("domain='%s' - AFTER!", domain)
158
159             if not domain_helper.is_wanted(domain):
160                 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
161                 continue
162             elif instances.is_registered(domain):
163                 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
164                 continue
165             elif instances.is_recent(domain):
166                 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
167                 continue
168
169             logger.debug("Fetching instances from domain='%s' ...", domain)
170             federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
171
172     except network.exceptions as exception:
173         logger.warning("Cannot fetch graphql,exception[%s]:'%s' - EXIT!", type(exception), str(exception))
174         return 103
175
176     logger.debug("Success! - EXIT!")
177     return 0
178
179 def fetch_bkali(args: argparse.Namespace) -> int:
180     logger.debug("args[]='%s' - CALLED!", type(args))
181
182     logger.debug("Invoking locking.acquire() ...")
183     locking.acquire()
184
185     source_domain = "gql.api.bka.li"
186     if sources.is_recent(source_domain):
187         logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
188         return 1
189     else:
190         logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
191         sources.update(source_domain)
192
193     domains = list()
194     try:
195         logger.info("Fetching domainlist from source_domain='%s' ...", source_domain)
196         fetched = network.post_json_api(
197             source_domain,
198             "/v1/graphql",
199             json.dumps({
200                 "query": "query domainlist {nodeinfo(order_by: {domain: asc}) {domain}}"
201             })
202         )
203
204         logger.debug("fetched[]='%s'", type(fetched))
205         if "error_message" in fetched:
206             logger.warning("post_json_api() for 'gql.sources.bka.li' returned error message='%s", fetched["error_message"])
207             return 100
208         elif isinstance(fetched["json"], dict) and "error" in fetched["json"] and "message" in fetched["json"]["error"]:
209             logger.warning("post_json_api() returned error: '%s", fetched["error"]["message"])
210             return 101
211
212         rows = fetched["json"]
213
214         logger.debug("rows(%d)[]='%s'", len(rows), type(rows))
215         if len(rows) == 0:
216             raise Exception("WARNING: Returned no records")
217         elif "data" not in rows:
218             raise Exception(f"WARNING: rows()={len(rows)} does not contain key 'data'")
219         elif "nodeinfo" not in rows["data"]:
220             raise Exception(f"WARNING: rows()={len(rows['data'])} does not contain key 'nodeinfo'")
221
222         for entry in rows["data"]["nodeinfo"]:
223             logger.debug("entry[%s]='%s'", type(entry), entry)
224             if "domain" not in entry:
225                 logger.warning("entry()=%d does not contain 'domain' - SKIPPED!", len(entry))
226                 continue
227             elif entry["domain"] == "":
228                 logger.debug("entry[domain] is empty - SKIPPED!")
229                 continue
230             elif not domain_helper.is_wanted(entry["domain"]):
231                 logger.debug("entry[domain]='%s' is not wanted - SKIPPED!", entry["domain"])
232                 continue
233             elif instances.is_registered(entry["domain"]):
234                 logger.debug("entry[domain]='%s' is already registered - SKIPPED!", entry["domain"])
235                 continue
236             elif instances.is_recent(entry["domain"]):
237                 logger.debug("entry[domain]='%s' has been recently crawled - SKIPPED!", entry["domain"])
238                 continue
239
240             logger.debug("Adding domain='%s' ...", entry["domain"])
241             domains.append(entry["domain"])
242
243     except network.exceptions as exception:
244         logger.warning("Cannot fetch graphql,exception[%s]:'%s' - EXIT!", type(exception), str(exception))
245         return 102
246
247     logger.debug("domains()=%d", len(domains))
248     if len(domains) > 0:
249         logger.info("Adding %d new instances ...", len(domains))
250         for domain in domains:
251             logger.debug("domain='%s' - BEFORE!", domain)
252             domain = domain.encode("idna").decode("utf-8")
253             logger.debug("domain='%s' - AFTER!", domain)
254
255             try:
256                 logger.info("Fetching instances from domain='%s' ...", domain)
257                 federation.fetch_instances(domain, 'tak.teleyal.blog', None, inspect.currentframe().f_code.co_name)
258             except network.exceptions as exception:
259                 logger.warning("Exception '%s' during fetching instances (fetch_bkali) from domain='%s'", type(exception), domain)
260                 instances.set_last_error(domain, exception)
261                 return 100
262
263     logger.debug("Success - EXIT!")
264     return 0
265
266 def fetch_blocks(args: argparse.Namespace) -> int:
267     logger.debug("args[]='%s' - CALLED!", type(args))
268     if args.domain is not None and args.domain != "":
269         logger.debug("args.domain='%s' - checking ...", args.domain)
270         if not validators.domain(args.domain):
271             logger.warning("args.domain='%s' is not valid.", args.domain)
272             return 100
273         elif blacklist.is_blacklisted(args.domain):
274             logger.warning("args.domain='%s' is blacklisted, won't check it!", args.domain)
275             return 101
276         elif not instances.is_registered(args.domain):
277             logger.warning("args.domain='%s' is not registered, please run ./utils.py fetch_instances '%s' first.", args.domain, args.domain)
278             return 102
279
280     logger.debug("Invoking locking.acquire() ...")
281     locking.acquire()
282
283     if args.domain is not None and args.domain != "":
284         # Re-check single domain
285         logger.debug("Querying database for args.domain='%s' ...", args.domain)
286         database.cursor.execute(
287             "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE domain = ? LIMIT 1", [args.domain]
288         )
289     elif args.software is not None and args.software != "":
290         # Re-check single software
291         logger.debug("Querying database for args.software='%s' ...", args.software)
292         database.cursor.execute(
293             "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software = ? AND nodeinfo_url IS NOT NULL ORDER BY total_blocks DESC, last_response_time ASC, last_updated ASC", [args.software]
294         )
295     elif args.force:
296         # Re-check all
297         logger.debug("Re-checking all instances ...")
298         database.cursor.execute(
299             "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'lemmy', 'friendica', 'misskey') AND nodeinfo_url IS NOT NULL ORDER BY total_blocks DESC, last_response_time ASC, last_updated ASC"
300         )
301     else:
302         # Re-check after "timeout" (aka. minimum interval)
303         database.cursor.execute(
304             "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'lemmy', 'friendica', 'misskey') AND (last_blocked IS NULL OR last_blocked < ?) AND nodeinfo_url IS NOT NULL ORDER BY total_blocks DESC, last_response_time ASC, last_updated ASC", [time.time() - config.get("recheck_block")]
305         )
306
307     rows = database.cursor.fetchall()
308     logger.info("Checking %d entries ...", len(rows))
309     for blocker, software, origin, nodeinfo_url in rows:
310         logger.debug("blocker='%s',software='%s',origin='%s',nodeinfo_url='%s'", blocker, software, origin, nodeinfo_url)
311
312         if not domain_helper.is_wanted(blocker):
313             logger.warning("blocker='%s' is not wanted - SKIPPED!", blocker)
314             continue
315
316         logger.debug("Setting last_blocked,has_obfuscation=false for blocker='%s' ...", blocker)
317         instances.set_last_blocked(blocker)
318         instances.set_has_obfuscation(blocker, False)
319
320         # c.s isn't part of oliphant's "hidden" blocklists
321         if blocker == "chaos.social" or software_helper.is_relay(software) or blocklists.has(blocker):
322             logger.debug("Skipping blocker='%s', run ./fba.py fetch_cs, fetch_oliphant, fetch_csv instead!", blocker)
323             continue
324
325         logger.debug("Invoking federation.fetch_blocks(%s) ...", blocker)
326         blocking = federation.fetch_blocks(blocker)
327
328         logger.debug("blocker='%s',software='%s',blocking()=%d", blocker, software, len(blocking))
329         if len(blocking) == 0:
330             logger.debug("blocker='%s',software='%s' - fetching blocklist ...", blocker, software)
331             if software == "pleroma":
332                 blocking = pleroma.fetch_blocks(blocker)
333                 logger.info("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software)
334             elif software == "mastodon":
335                 blocking = mastodon.fetch_blocks(blocker)
336                 logger.info("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software)
337             elif software == "lemmy":
338                 blocking = lemmy.fetch_blocks(blocker)
339                 logger.info("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software)
340             elif software == "friendica":
341                 blocking = friendica.fetch_blocks(blocker)
342                 logger.info("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software)
343             elif software == "misskey":
344                 blocking = misskey.fetch_blocks(blocker)
345                 logger.info("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software)
346             else:
347                 logger.warning("Unknown software: blocker='%s',software='%s'", blocker, software)
348
349         logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
350         instances.set_total_blocks(blocker, blocking)
351
352         blockdict = list()
353         deobfuscated = obfuscated = 0
354
355         logger.info("Checking %d entries from blocker='%s',software='%s' ...", len(blocking), blocker, software)
356         for block in blocking:
357             logger.debug("blocked='%s',block_level='%s',reason='%s'", block["blocked"], block["block_level"], block["reason"])
358
359             if block["block_level"] == "":
360                 logger.warning("block_level is empty, blocker='%s',blocked='%s'", block["blocker"], block["blocked"])
361                 continue
362
363             logger.debug("blocked='%s',reason='%s' - BEFORE!", block["blocked"], block["reason"])
364             block["blocked"] = tidyup.domain(block["blocked"])
365             block["reason"]  = tidyup.reason(block["reason"]) if block["reason"] is not None and block["reason"] != "" else None
366             logger.debug("blocked='%s',reason='%s' - AFTER!", block["blocked"], block["reason"])
367
368             if block["blocked"] == "":
369                 logger.warning("blocked is empty, blocker='%s'", blocker)
370                 continue
371             elif block["blocked"].endswith(".onion"):
372                 logger.debug("blocked='%s' is a TOR .onion domain - SKIPPED", block["blocked"])
373                 continue
374             elif block["blocked"].endswith(".arpa"):
375                 logger.debug("blocked='%s' is a reverse IP address - SKIPPED", block["blocked"])
376                 continue
377             elif block["blocked"].endswith(".tld"):
378                 logger.debug("blocked='%s' is a fake domain - SKIPPED", block["blocked"])
379                 continue
380             elif block["blocked"].find("*") >= 0:
381                 logger.debug("blocker='%s' uses obfuscated domains", blocker)
382                 instances.set_has_obfuscation(blocker, True)
383                 obfuscated = obfuscated + 1
384
385                 # Some friendica servers also obscure domains without hash
386                 row = instances.deobfuscate("*", block["blocked"], block["hash"] if "hash" in block else None)
387
388                 logger.debug("row[]='%s'", type(row))
389                 if row is None:
390                     logger.warning("Cannot deobfuscate blocked='%s',blocker='%s',software='%s' - SKIPPED!", block["blocked"], blocker, software)
391                     continue
392
393                 deobfuscated = deobfuscated + 1
394                 block["blocked"] = row["domain"]
395                 origin           = row["origin"]
396                 nodeinfo_url     = row["nodeinfo_url"]
397             elif block["blocked"].find("?") >= 0:
398                 logger.debug("blocker='%s' uses obfuscated domains", blocker)
399                 instances.set_has_obfuscation(blocker, True)
400                 obfuscated = obfuscated + 1
401
402                 # Some obscure them with question marks, not sure if that's dependent on version or not
403                 row = instances.deobfuscate("?", block["blocked"], block["hash"] if "hash" in block else None)
404
405                 logger.debug("row[]='%s'", type(row))
406                 if row is None:
407                     logger.warning("Cannot deobfuscate blocked='%s',blocker='%s',software='%s' - SKIPPED!", block["blocked"], blocker, software)
408                     continue
409
410                 deobfuscated = deobfuscated + 1
411                 block["blocked"] = row["domain"]
412                 origin           = row["origin"]
413                 nodeinfo_url     = row["nodeinfo_url"]
414
415             logger.debug("Looking up instance by domainm, blocked='%s'", block["blocked"])
416             if block["blocked"] == "":
417                 logger.debug("block[blocked] is empty - SKIPPED!")
418                 continue
419
420             logger.debug("block[blocked]='%s' - BEFORE!", block["blocked"])
421             block["blocked"] = block["blocked"].lstrip(".").encode("idna").decode("utf-8")
422             logger.debug("block[blocked]='%s' - AFTER!", block["blocked"])
423
424             if not domain_helper.is_wanted(block["blocked"]):
425                 logger.debug("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
426                 continue
427             elif block["block_level"] in ["accept", "accepted"]:
428                 logger.debug("blocked='%s' is accepted, not wanted here - SKIPPED!", block["blocked"])
429                 continue
430             elif not instances.is_registered(block["blocked"]):
431                 logger.debug("Hash wasn't found, adding: blocked='%s',blocker='%s'", block["blocked"], blocker)
432                 federation.fetch_instances(block["blocked"], blocker, None, inspect.currentframe().f_code.co_name)
433
434             block["block_level"] = blocks.alias_block_level(block["block_level"])
435
436             if processing.block(blocker, block["blocked"], block["reason"], block["block_level"]) and block["block_level"] == "reject" and config.get("bot_enabled"):
437                 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], blocker)
438                 blockdict.append({
439                     "blocked": block["blocked"],
440                     "reason" : block["reason"],
441                 })
442
443             logger.debug("Invoking cookies.clear(%s) ...", block["blocked"])
444             cookies.clear(block["blocked"])
445
446         logger.info("blocker='%s' has %d obfuscated domain(s) and %d of them could be deobfuscated.", block["blocker"], obfuscated, deobfuscated)
447
448         logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
449         if instances.has_pending(blocker):
450             logger.debug("Flushing updates for blocker='%s' ...", blocker)
451             instances.update(blocker)
452
453         logger.debug("Invoking commit() ...")
454         database.connection.commit()
455
456         logger.debug("Invoking cookies.clear(%s) ...", blocker)
457         cookies.clear(blocker)
458
459         logger.debug("config.get(bot_enabled)='%s',blockdict()=%d'", config.get("bot_enabled"), len(blockdict))
460         if config.get("bot_enabled") and len(blockdict) > 0:
461             logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
462             network.send_bot_post(blocker, blockdict)
463
464     logger.debug("Success! - EXIT!")
465     return 0
466
467 def fetch_observer(args: argparse.Namespace) -> int:
468     logger.debug("args[]='%s' - CALLED!", type(args))
469
470     logger.debug("Invoking locking.acquire() ...")
471     locking.acquire()
472
473     source_domain = "fediverse.observer"
474     if sources.is_recent(source_domain):
475         logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
476         return 1
477     else:
478         logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
479         sources.update(source_domain)
480
481     types = list()
482     if args.software is None:
483         logger.info("Fetching software list ...")
484         raw = utils.fetch_url(
485             f"https://{source_domain}",
486             network.web_headers,
487             (config.get("connection_timeout"), config.get("read_timeout"))
488         ).text
489         logger.debug("raw[%s]()=%d", type(raw), len(raw))
490
491         doc = bs4.BeautifulSoup(raw, features="html.parser")
492         logger.debug("doc[]='%s'", type(doc))
493
494         navbar = doc.find("div", {"aria-labelledby": "navbarDropdownMenuSoftwares"})
495         logger.debug("navbar[]='%s'", type(navbar))
496         if navbar is None:
497             logger.warning("Cannot find navigation bar, cannot continue!")
498             return 1
499
500         items = navbar.findAll("a", {"class": "dropdown-item"})
501         logger.debug("items[]='%s'", type(items))
502
503         logger.info("Checking %d menu items ...", len(items))
504         for item in items:
505             logger.debug("item[%s]='%s'", type(item), item)
506             if item.text.lower() == "all":
507                 logger.debug("Skipping 'All' menu entry ...")
508                 continue
509
510             logger.debug("Appending item.text='%s' ...", item.text)
511             types.append(tidyup.domain(item.text))
512     else:
513         logger.info("Adding args.software='%s' as type ...", args.software)
514         types.append(args.software)
515
516     logger.info("Fetching %d different table data ...", len(types))
517     for software in types:
518         logger.debug("software='%s' - BEFORE!", software)
519         if args.software is not None and args.software != software:
520             logger.debug("args.software='%s' does not match software='%s' - SKIPPED!", args.software, software)
521             continue
522
523         doc = None
524         try:
525             logger.debug("Fetching table data for software='%s' ...", software)
526             raw = utils.fetch_url(
527                 f"https://{source_domain}/app/views/tabledata.php?software={software}",
528                 network.web_headers,
529                 (config.get("connection_timeout"), config.get("read_timeout"))
530             ).text
531             logger.debug("raw[%s]()=%d", type(raw), len(raw))
532
533             doc = bs4.BeautifulSoup(raw, features="html.parser")
534             logger.debug("doc[]='%s'", type(doc))
535         except network.exceptions as exception:
536             logger.warning("Cannot fetch software='%s' from source_domain='%s': '%s'", software, source_domain, type(exception))
537             continue
538
539         items = doc.findAll("a", {"class": "url"})
540         logger.info("Checking %d items,software='%s' ...", len(items), software)
541         for item in items:
542             logger.debug("item[]='%s'", type(item))
543             domain = item.decode_contents()
544             domain = tidyup.domain(domain) if domain != None and domain != "" else None
545             logger.debug("domain='%s' - AFTER!", domain)
546
547             if domain is None or domain == "":
548                 logger.debug("domain='%s' is empty after tidyup.domain() - SKIPPED!", domain)
549                 continue
550
551             logger.debug("domain='%s' - BEFORE!", domain)
552             domain = domain.encode("idna").decode("utf-8")
553             logger.debug("domain='%s' - AFTER!", domain)
554
555             if not domain_helper.is_wanted(domain):
556                 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
557                 continue
558             elif instances.is_registered(domain):
559                 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
560                 continue
561
562             software = software_helper.alias(software)
563             logger.info("Fetching instances for domain='%s'", domain)
564             federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
565
566     logger.debug("Success! - EXIT!")
567     return 0
568
569 def fetch_todon_wiki(args: argparse.Namespace) -> int:
570     logger.debug("args[]='%s' - CALLED!", type(args))
571
572     logger.debug("Invoking locking.acquire() ...")
573     locking.acquire()
574
575     source_domain = "wiki.todon.eu"
576     if sources.is_recent(source_domain):
577         logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
578         return 1
579     else:
580         logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
581         sources.update(source_domain)
582
583     blocklist = {
584         "silenced": list(),
585         "reject": list(),
586     }
587
588     logger.debug("Fetching domainblocks from source_domain='%s'", source_domain)
589     raw = utils.fetch_url(
590         f"https://{source_domain}/todon/domainblocks",
591         network.web_headers,
592         (config.get("connection_timeout"), config.get("read_timeout"))
593     ).text
594     logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
595
596     doc = bs4.BeautifulSoup(raw, "html.parser")
597     logger.debug("doc[]='%s'", type(doc))
598
599     silenced = doc.find("h3", {"id": "silencedlimited_servers"}).find_next("ul").findAll("li")
600     logger.info("Checking %d silenced/limited entries ...", len(silenced))
601     blocklist["silenced"] = utils.find_domains(silenced, "div")
602
603     suspended = doc.find("h3", {"id": "suspended_servers"}).find_next("ul").findAll("li")
604     logger.info("Checking %d suspended entries ...", len(suspended))
605     blocklist["reject"] = utils.find_domains(suspended, "div")
606
607     blocking = blocklist["silenced"] + blocklist["reject"]
608     blocker = "todon.eu"
609
610     logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
611     instances.set_last_blocked(blocker)
612     instances.set_total_blocks(blocker, blocking)
613
614     blockdict = list()
615     for block_level in blocklist:
616         blockers = blocklist[block_level]
617
618         logger.debug("block_level='%s',blockers()=%d'", block_level, len(blockers))
619         for blocked in blockers:
620             logger.debug("blocked='%s'", blocked)
621
622             if not instances.is_registered(blocked):
623                 try:
624                     logger.info("Fetching instances from domain='%s' ...", blocked)
625                     federation.fetch_instances(blocked, blocker, None, inspect.currentframe().f_code.co_name)
626                 except network.exceptions as exception:
627                     logger.warning("Exception '%s' during fetching instances (fetch_cs) from blocked='%s'", type(exception), blocked)
628                     instances.set_last_error(blocked, exception)
629
630             if not domain_helper.is_wanted(blocked):
631                 logger.warning("blocked='%s' is not wanted - SKIPPED!", blocked)
632                 continue
633             elif not domain_helper.is_wanted(blocker):
634                 logger.warning("blocker='%s' is not wanted - SKIPPED!", blocker)
635                 continue
636             elif blocks.is_instance_blocked(blocker, blocked, block_level):
637                 logger.debug("blocked='%s',block_level='%s' is already blocked - SKIPPED!", blocked, block_level)
638                 continue
639
640             logger.info("Adding new block: blocked='%s',block_level='%s'", blocked, block_level)
641             if processing.block(blocker, blocked, None, block_level) and block_level == "reject" and config.get("bot_enabled"):
642                 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", blocked, block_level, blocker)
643                 blockdict.append({
644                     "blocked": blocked,
645                     "reason" : None,
646                 })
647
648         logger.debug("Invoking commit() ...")
649         database.connection.commit()
650
651         logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
652         if config.get("bot_enabled") and len(blockdict) > 0:
653             logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
654             network.send_bot_post(blocker, blockdict)
655
656     logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
657     if instances.has_pending(blocker):
658         logger.debug("Flushing updates for blocker='%s' ...", blocker)
659         instances.update(blocker)
660
661     logger.debug("Success! - EXIT!")
662     return 0
663
664 def fetch_cs(args: argparse.Namespace):
665     logger.debug("args[]='%s' - CALLED!", type(args))
666
667     logger.debug("Invoking locking.acquire() ...")
668     locking.acquire()
669
670     extensions = [
671         "extra",
672         "abbr",
673         "attr_list",
674         "def_list",
675         "fenced_code",
676         "footnotes",
677         "md_in_html",
678         "admonition",
679         "codehilite",
680         "legacy_attrs",
681         "legacy_em",
682         "meta",
683         "nl2br",
684         "sane_lists",
685         "smarty",
686         "toc",
687         "wikilinks"
688     ]
689
690     blocklist = {
691         "silenced": list(),
692         "reject"  : list(),
693     }
694
695     source_domain = "raw.githubusercontent.com"
696     if sources.is_recent(source_domain):
697         logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
698         return 1
699     else:
700         logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
701         sources.update(source_domain)
702
703     logger.info("Fetching federation.md from source_domain='%s' ...", source_domain)
704     raw = utils.fetch_url(
705         f"https://{source_domain}/chaossocial/meta/master/federation.md",
706         network.web_headers,
707         (config.get("connection_timeout"), config.get("read_timeout"))
708     ).text
709     logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
710
711     doc = bs4.BeautifulSoup(markdown.markdown(raw, extensions=extensions), features="html.parser")
712     logger.debug("doc()=%d[]='%s'", len(doc), type(doc))
713
714     silenced = doc.find("h2", {"id": "silenced-instances"}).findNext("table").find("tbody")
715     logger.debug("silenced[%s]()=%d", type(silenced), len(silenced))
716     blocklist["silenced"] = federation.find_domains(silenced)
717
718     blocked = doc.find("h2", {"id": "blocked-instances"}).findNext("table").find("tbody")
719     logger.debug("blocked[%s]()=%d", type(blocked), len(blocked))
720     blocklist["reject"] = federation.find_domains(blocked)
721
722     blocking = blocklist["silenced"] + blocklist["reject"]
723     blocker = "chaos.social"
724
725     logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
726     instances.set_last_blocked(blocker)
727     instances.set_total_blocks(blocker, blocking)
728
729     logger.debug("blocklist[silenced]()=%d,blocklist[reject]()=%d", len(blocklist["silenced"]), len(blocklist["reject"]))
730     if len(blocking) > 0:
731         blockdict = list()
732         for block_level in blocklist:
733             logger.info("block_level='%s' has %d row(s)", block_level, len(blocklist[block_level]))
734
735             for row in blocklist[block_level]:
736                 logger.debug("row[%s]='%s'", type(row), row)
737                 if not "domain" in row:
738                     logger.warning("row[]='%s' has no element 'domain' - SKIPPED!", type(row))
739                     continue
740                 elif not instances.is_registered(row["domain"]):
741                     try:
742                         logger.info("Fetching instances from domain='%s' ...", row["domain"])
743                         federation.fetch_instances(row["domain"], blocker, None, inspect.currentframe().f_code.co_name)
744                     except network.exceptions as exception:
745                         logger.warning("Exception '%s' during fetching instances (fetch_cs) from row[domain]='%s'", type(exception), row["domain"])
746                         instances.set_last_error(row["domain"], exception)
747
748                 if processing.block(blocker, row["domain"], row["reason"], block_level) and block_level == "reject" and config.get("bot_enabled"):
749                     logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", row["domain"], block_level, blocker)
750                     blockdict.append({
751                         "blocked": row["domain"],
752                         "reason" : row["reason"],
753                     })
754
755         logger.debug("Invoking commit() ...")
756         database.connection.commit()
757
758         logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
759         if config.get("bot_enabled") and len(blockdict) > 0:
760             logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
761             network.send_bot_post(blocker, blockdict)
762
763     logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
764     if instances.has_pending(blocker):
765         logger.debug("Flushing updates for blocker='%s' ...", blocker)
766         instances.update(blocker)
767
768     logger.debug("Success! - EXIT!")
769     return 0
770
771 def fetch_fba_rss(args: argparse.Namespace) -> int:
772     logger.debug("args[]='%s' - CALLED!", type(args))
773
774     domains = list()
775
776     logger.debug("Invoking locking.acquire() ...")
777     locking.acquire()
778
779     components = urlparse(args.feed)
780     domain = components.netloc.lower().split(":")[0]
781
782     logger.debug("domain='%s'", domain)
783     if sources.is_recent(domain):
784         logger.info("API from domain='%s' has recently being accessed - EXIT!", domain)
785         return 0
786     else:
787         logger.debug("domain='%s' has not been recently used, marking ...", domain)
788         sources.update(domain)
789
790     logger.info("Fetch FBA-specific RSS args.feed='%s' ...", args.feed)
791     response = utils.fetch_url(args.feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
792
793     logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
794     if response.ok and response.status_code == 200 and len(response.text) > 0:
795         logger.debug("Parsing RSS feed (%d Bytes) ...", len(response.text))
796         rss = atoma.parse_rss_bytes(response.content)
797
798         logger.debug("rss[]='%s'", type(rss))
799         for item in rss.items:
800             logger.debug("item[%s]='%s'", type(item), item)
801             domain = item.link.split("=")[1]
802             domain = tidyup.domain(domain) if domain != None and domain != "" else None
803
804             logger.debug("domain='%s' - AFTER!", domain)
805             if domain is None or domain == "":
806                 logger.debug("domain='%s' is empty after tidyup.domain() - SKIPPED!", domain)
807                 continue
808
809             logger.debug("domain='%s' - BEFORE!", domain)
810             domain = domain.encode("idna").decode("utf-8")
811             logger.debug("domain='%s' - AFTER!", domain)
812
813             if not domain_helper.is_wanted(domain):
814                 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
815                 continue
816             elif domain in domains:
817                 logger.debug("domain='%s' is already added - SKIPPED!", domain)
818                 continue
819             elif instances.is_registered(domain):
820                 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
821                 continue
822             elif instances.is_recent(domain):
823                 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
824                 continue
825
826             logger.debug("Adding domain='%s'", domain)
827             domains.append(domain)
828
829     logger.debug("domains()=%d", len(domains))
830     if len(domains) > 0:
831         logger.info("Adding %d new instances ...", len(domains))
832         for domain in domains:
833             logger.debug("domain='%s'", domain)
834             try:
835                 logger.info("Fetching instances from domain='%s' ...", domain)
836                 federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
837             except network.exceptions as exception:
838                 logger.warning("Exception '%s' during fetching instances (fetch_fba_rss) from domain='%s'", type(exception), domain)
839                 instances.set_last_error(domain, exception)
840                 return 100
841
842     logger.debug("Success! - EXIT!")
843     return 0
844
845 def fetch_fbabot_atom(args: argparse.Namespace) -> int:
846     logger.debug("args[]='%s' - CALLED!", type(args))
847
848     logger.debug("Invoking locking.acquire() ...")
849     locking.acquire()
850
851     source_domain = "ryona.agency"
852     feed = f"https://{source_domain}/users/fba/feed.atom"
853
854     logger.debug("args.feed[%s]='%s'", type(args.feed), args.feed)
855     if args.feed is not None and validators.url(args.feed):
856         logger.debug("Setting feed='%s' ...", args.feed)
857         feed = str(args.feed)
858         source_domain = urlparse(args.feed).netloc
859
860     if sources.is_recent(source_domain):
861         logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
862         return 1
863     else:
864         logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
865         sources.update(source_domain)
866
867     domains = list()
868
869     logger.info("Fetching ATOM feed='%s' from FBA bot account ...", feed)
870     response = utils.fetch_url(feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
871
872     logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
873     if response.ok and response.status_code == 200 and len(response.text) > 0:
874         logger.debug("Parsing ATOM feed (%d Bytes) ...", len(response.text))
875         atom = atoma.parse_atom_bytes(response.content)
876
877         logger.debug("atom[]='%s'", type(atom))
878         for entry in atom.entries:
879             logger.debug("entry[]='%s'", type(entry))
880             doc = bs4.BeautifulSoup(entry.content.value, "html.parser")
881             logger.debug("doc[]='%s'", type(doc))
882             for element in doc.findAll("a"):
883                 logger.debug("element[]='%s'", type(element))
884                 for href in element["href"].split(","):
885                     logger.debug("href[%s]='%s' - BEFORE!", type(href), href)
886                     domain = tidyup.domain(href) if href != None and href != "" else None
887
888                     logger.debug("domain='%s' - AFTER!", domain)
889                     if domain is None or domain == "":
890                         logger.debug("domain='%s' is empty after tidyup.domain() - SKIPPED!", domain)
891                         continue
892
893                     logger.debug("domain='%s' - BEFORE!", domain)
894                     domain = domain.encode("idna").decode("utf-8")
895                     logger.debug("domain='%s' - AFTER!", domain)
896
897                     if not domain_helper.is_wanted(domain):
898                         logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
899                         continue
900                     elif domain in domains:
901                         logger.debug("domain='%s' is already added - SKIPPED!", domain)
902                         continue
903                     elif instances.is_registered(domain):
904                         logger.debug("domain='%s' is already registered - SKIPPED!", domain)
905                         continue
906                     elif instances.is_recent(domain):
907                         logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
908                         continue
909
910                     logger.debug("Adding domain='%s',domains()=%d", domain, len(domains))
911                     domains.append(domain)
912
913     logger.debug("domains()=%d", len(domains))
914     if len(domains) > 0:
915         logger.info("Adding %d new instances ...", len(domains))
916         for domain in domains:
917             logger.debug("domain='%s'", domain)
918             try:
919                 logger.info("Fetching instances from domain='%s' ...", domain)
920                 federation.fetch_instances(domain, source_domain, None, inspect.currentframe().f_code.co_name)
921             except network.exceptions as exception:
922                 logger.warning("Exception '%s' during fetching instances (fetch_fbabot_atom) from domain='%s'", type(exception), domain)
923                 instances.set_last_error(domain, exception)
924                 return 100
925
926     logger.debug("Success! - EXIT!")
927     return 0
928
929 def fetch_instances(args: argparse.Namespace) -> int:
930     logger.debug("args[]='%s' - CALLED!", type(args))
931
932     logger.debug("args.domain='%s' - checking ...", args.domain)
933     if not validators.domain(args.domain):
934         logger.warning("args.domain='%s' is not valid.", args.domain)
935         return 100
936     elif blacklist.is_blacklisted(args.domain):
937         logger.warning("args.domain='%s' is blacklisted, won't check it!", args.domain)
938         return 101
939
940     logger.debug("Invoking locking.acquire() ...")
941     locking.acquire()
942
943     # Initialize values
944     domain = tidyup.domain(args.domain)
945     origin = software = None
946
947     # Fetch record
948     database.cursor.execute("SELECT origin, software FROM instances WHERE domain = ? LIMIT 1", [args.domain])
949     row = database.cursor.fetchone()
950     if row is not None:
951         origin = row["origin"]
952         software = row["software"]
953
954     if software_helper.is_relay(software):
955         logger.warning("args.domain='%s' is of software type '%s' which is not supported by this command. Please invoke fetch_relays instead.", args.domain, software)
956         return 102
957
958     # Initial fetch
959     try:
960         logger.info("Fetching instances from args.domain='%s',origin='%s',software='%s' ...", domain, origin, software)
961         federation.fetch_instances(domain, origin, software, inspect.currentframe().f_code.co_name)
962     except network.exceptions as exception:
963         logger.warning("Exception '%s' during fetching instances (fetch_instances) from args.domain='%s'", type(exception), args.domain)
964         instances.set_last_error(args.domain, exception)
965         instances.update(args.domain)
966         return 100
967
968     if args.single:
969         logger.debug("Not fetching more instances - EXIT!")
970         return 0
971
972     # Loop through some instances
973     database.cursor.execute(
974         "SELECT domain, origin, software, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'lemmy', 'peertube', 'takahe', 'gotosocial', 'brighteon', 'wildebeest', 'bookwyrm', 'mitra', 'areionskey', 'mammuthus', 'neodb') AND (last_instance_fetch IS NULL OR last_instance_fetch < ?) ORDER BY total_peers DESC, last_response_time ASC, last_updated ASC", [time.time() - config.get("recheck_instance")]
975     )
976
977     rows = database.cursor.fetchall()
978     logger.info("Checking %d entries ...", len(rows))
979     for row in rows:
980         logger.debug("row[domain]='%s'", row["domain"])
981         if row["domain"] == "":
982             logger.debug("row[domain] is empty - SKIPPED!")
983             continue
984
985         logger.debug("row[domain]='%s' - BEFORE!", row["domain"])
986         domain = row["domain"].encode("idna").decode("utf-8")
987         logger.debug("domain='%s' - AFTER!", domain)
988
989         if not domain_helper.is_wanted(domain):
990             logger.debug("Domain domain='%s' is not wanted - SKIPPED!", domain)
991             continue
992
993         try:
994             logger.info("Fetching instances for domain='%s',origin='%s',software='%s',nodeinfo_url='%s'", domain, row["origin"], row["software"], row["nodeinfo_url"])
995             federation.fetch_instances(domain, row["origin"], row["software"], inspect.currentframe().f_code.co_name, row["nodeinfo_url"])
996         except network.exceptions as exception:
997             logger.warning("Exception '%s' during fetching instances (fetch_instances) from domain='%s'", type(exception), domain)
998             instances.set_last_error(domain, exception)
999
1000     logger.debug("Success - EXIT!")
1001     return 0
1002
1003 def fetch_csv(args: argparse.Namespace) -> int:
1004     logger.debug("args[]='%s' - CALLED!", type(args))
1005
1006     logger.debug("Invoking locking.acquire() ...")
1007     locking.acquire()
1008
1009     logger.info("Checking %d CSV files ...", len(blocklists.csv_files))
1010     for block in blocklists.csv_files:
1011         logger.debug("block[blocker]='%s',block[csv_url]='%s'", block["blocker"], block["csv_url"])
1012
1013         # Is domain given and not equal blocker?
1014         if isinstance(args.domain, str) and args.domain != block["blocker"]:
1015             logger.debug("Skipping blocker='%s', not matching args.domain='%s'", block["blocker"], args.domain)
1016             continue
1017
1018         logger.debug("Invoking processing.csv_block(%s, %s, fetch_csv) ...", block["blocker"], block["csv_url"])
1019         processing.csv_block(block["blocker"], block["csv_url"], inspect.currentframe().f_code.co_name)
1020
1021     logger.debug("Success - EXIT!")
1022     return 0
1023
1024 def fetch_oliphant(args: argparse.Namespace) -> int:
1025     logger.debug("args[]='%s' - CALLED!", type(args))
1026
1027     logger.debug("Invoking locking.acquire() ...")
1028     locking.acquire()
1029
1030     source_domain = "codeberg.org"
1031     if sources.is_recent(source_domain):
1032         logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1033         return 1
1034     else:
1035         logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1036         sources.update(source_domain)
1037
1038     # Base URL
1039     base_url = f"https://{source_domain}/oliphant/blocklists/raw/branch/main/blocklists"
1040
1041     logger.debug("Downloading %d files ...", len(blocklists.oliphant_blocklists))
1042     for block in blocklists.oliphant_blocklists:
1043         # Is domain given and not equal blocker?
1044         if isinstance(args.domain, str) and args.domain != block["blocker"]:
1045             logger.debug("Skipping blocker='%s', not matching args.domain='%s'", block["blocker"], args.domain)
1046             continue
1047
1048         url = f"{base_url}/{block['csv_url']}"
1049
1050         logger.debug("Invoking processing.csv_block(%s, %s, fetch_oliphant) ...", block["blocker"], url)
1051         processing.csv_block(block["blocker"], url, inspect.currentframe().f_code.co_name)
1052
1053     logger.debug("Success! - EXIT!")
1054     return 0
1055
1056 def fetch_txt(args: argparse.Namespace) -> int:
1057     logger.debug("args[]='%s' - CALLED!", type(args))
1058
1059     logger.debug("Invoking locking.acquire() ...")
1060     locking.acquire()
1061
1062     # Static URLs
1063     urls = ({
1064         "blocker": "seirdy.one",
1065         "url"    : "https://seirdy.one/pb/bsl.txt",
1066     },)
1067
1068     logger.info("Checking %d text file(s) ...", len(urls))
1069     for row in urls:
1070         logger.debug("Fetching row[url]='%s' ...", row["url"])
1071         response = utils.fetch_url(row["url"], network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
1072
1073         logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
1074         if response.ok and response.status_code == 200 and response.text != "":
1075             logger.debug("Returned %d Bytes for processing", len(response.text.strip()))
1076             domains = response.text.strip().split("\n")
1077
1078             logger.info("Processing %d domains ...", len(domains))
1079             for domain in domains:
1080                 logger.debug("domain='%s' - BEFORE!", domain)
1081                 domain = tidyup.domain(domain) if domain != None and domain != "" else None
1082
1083                 logger.debug("domain='%s' - AFTER!", domain)
1084                 if domain is None or domain == "":
1085                     logger.debug("domain='%s' is empty after tidyup.domain() - SKIPPED!", domain)
1086                     continue
1087                 elif not domain_helper.is_wanted(domain):
1088                     logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1089                     continue
1090                 elif instances.is_recent(domain):
1091                     logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1092                     continue
1093
1094                 logger.debug("Processing domain='%s',row[blocker]='%s'", domain, row["blocker"])
1095                 processed = processing.instance(domain, row["blocker"], inspect.currentframe().f_code.co_name)
1096
1097                 logger.debug("processed='%s'", processed)
1098                 if not processed:
1099                     logger.debug("domain='%s' was not generically processed - SKIPPED!", domain)
1100                     continue
1101
1102     logger.debug("Success! - EXIT!")
1103     return 0
1104
1105 def fetch_fedipact(args: argparse.Namespace) -> int:
1106     logger.debug("args[]='%s' - CALLED!", type(args))
1107
1108     logger.debug("Invoking locking.acquire() ...")
1109     locking.acquire()
1110
1111     source_domain = "fedipact.online"
1112     if sources.is_recent(source_domain):
1113         logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1114         return 1
1115     else:
1116         logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1117         sources.update(source_domain)
1118
1119     logger.info("Fetching / from source_domain='%s' ...", source_domain)
1120     response = utils.fetch_url(
1121         f"https://{source_domain}",
1122         network.web_headers,
1123         (config.get("connection_timeout"), config.get("read_timeout"))
1124     )
1125
1126     logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
1127     if response.ok and response.status_code == 200 and response.text != "":
1128         logger.debug("Parsing %d Bytes ...", len(response.text))
1129
1130         doc = bs4.BeautifulSoup(response.text, "html.parser")
1131         logger.debug("doc[]='%s'", type(doc))
1132
1133         rows = doc.findAll("li")
1134         logger.info("Checking %d row(s) ...", len(rows))
1135         for row in rows:
1136             logger.debug("row[]='%s'", type(row))
1137             domain = tidyup.domain(row.contents[0]) if row.contents[0] != None and row.contents[0] != "" else None
1138
1139             logger.debug("domain='%s' - AFTER!", domain)
1140             if domain is None or domain == "":
1141                 logger.debug("domain='%s' is empty after tidyup.domain() - SKIPPED!", domain)
1142                 continue
1143
1144             logger.debug("domain='%s' - BEFORE!", domain)
1145             domain = domain.encode("idna").decode("utf-8")
1146             logger.debug("domain='%s' - AFTER!", domain)
1147
1148             if not domain_helper.is_wanted(domain):
1149                 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1150                 continue
1151             elif instances.is_registered(domain):
1152                 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
1153                 continue
1154             elif instances.is_recent(domain):
1155                 logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1156                 continue
1157
1158             logger.info("Fetching domain='%s' ...", domain)
1159             federation.fetch_instances(domain, "beach.city", None, inspect.currentframe().f_code.co_name)
1160
1161     logger.debug("Success! - EXIT!")
1162     return 0
1163
1164 def fetch_joinmobilizon(args: argparse.Namespace) -> int:
1165     logger.debug("args[]='%s' - CALLED!", type(args))
1166
1167     logger.debug("Invoking locking.acquire() ...")
1168     locking.acquire()
1169
1170     source_domain = "instances.joinmobilizon.org"
1171     if sources.is_recent(source_domain):
1172         logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1173         return 1
1174     else:
1175         logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1176         sources.update(source_domain)
1177
1178     logger.info("Fetching instances from source_domain='%s' ...", source_domain)
1179     raw = utils.fetch_url(
1180         f"https://{source_domain}/api/v1/instances",
1181         network.web_headers,
1182         (config.get("connection_timeout"), config.get("read_timeout"))
1183     ).text
1184     logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
1185
1186     parsed = json.loads(raw)
1187     logger.debug("parsed[%s]()=%d", type(parsed), len(parsed))
1188
1189     if "data" not in parsed:
1190         logger.warning("parsed()=%d does not contain key 'data'")
1191         return 1
1192
1193     logger.info("Checking %d instances ...", len(parsed["data"]))
1194     for row in parsed["data"]:
1195         logger.debug("row[]='%s'", type(row))
1196         if "host" not in row:
1197             logger.warning("row='%s' does not contain key 'host' - SKIPPED!", row)
1198             continue
1199         elif not domain_helper.is_wanted(row["host"]):
1200             logger.debug("row[host]='%s' is not wanted - SKIPPED!", row["host"])
1201             continue
1202         elif instances.is_registered(row["host"]):
1203             logger.debug("row[host]='%s' is already registered - SKIPPED!", row["host"])
1204             continue
1205
1206         logger.info("Fetching row[host]='%s' ...", row["host"])
1207         federation.fetch_instances(row["host"], "demo.mobilizon.org", None, inspect.currentframe().f_code.co_name)
1208
1209     logger.debug("Success! - EXIT!")
1210     return 0
1211
1212 def fetch_joinmisskey(args: argparse.Namespace) -> int:
1213     logger.debug("args[]='%s' - CALLED!", type(args))
1214
1215     logger.debug("Invoking locking.acquire() ...")
1216     locking.acquire()
1217
1218     source_domain = "instanceapp.misskey.page"
1219     if sources.is_recent(source_domain):
1220         logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1221         return 1
1222     else:
1223         logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1224         sources.update(source_domain)
1225
1226     logger.info("Fetching instances.json from source_domain='%s' ...", source_domain)
1227     raw = utils.fetch_url(
1228         f"https://{source_domain}/instances.json",
1229         network.web_headers,
1230         (config.get("connection_timeout"), config.get("read_timeout"))
1231     ).text
1232     logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
1233
1234     parsed = json.loads(raw)
1235     logger.debug("parsed[%s]()=%d", type(parsed), len(parsed))
1236
1237     if "instancesInfos" not in parsed:
1238         logger.warning("parsed()=%d does not contain element 'instancesInfos'")
1239         return 1
1240
1241     logger.info("Checking %d instane(s) ...", len(parsed["instancesInfos"]))
1242     for row in parsed["instancesInfos"]:
1243         logger.debug("row[%s]='%s'", type(row), row)
1244         if "url" not in row:
1245             logger.warning("row()=%d does not have element 'url' - SKIPPED!", len(row))
1246             continue
1247         elif not domain_helper.is_wanted(row["url"]):
1248             logger.debug("row[url]='%s' is not wanted - SKIPPED!", row["url"])
1249             continue
1250         elif instances.is_registered(row["url"]):
1251             logger.debug("row[url]='%s' is already registered - SKIPPED!", row["url"])
1252             continue
1253
1254         logger.info("Fetching row[url]='%s' ...", row["url"])
1255         federation.fetch_instances(row["url"], "misskey.io", None, inspect.currentframe().f_code.co_name)
1256
1257     logger.debug("Success! - EXIT!")
1258     return 0
1259
1260 def recheck_obfuscation(args: argparse.Namespace) -> int:
1261     logger.debug("args[]='%s' - CALLED!", type(args))
1262
1263     logger.debug("Invoking locking.acquire() ...")
1264     locking.acquire()
1265
1266     if isinstance(args.domain, str) and args.domain != "" and domain_helper.is_wanted(args.domain):
1267         database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1 AND domain = ?", [args.domain])
1268     elif isinstance(args.software, str) and args.software != "" and validators.domain(args.software) == args.software:
1269         database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1 AND software = ?", [args.software])
1270     else:
1271         database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1")
1272
1273     rows = database.cursor.fetchall()
1274     logger.info("Checking %d domains ...", len(rows))
1275     for row in rows:
1276         logger.debug("Fetching peers from domain='%s',software='%s',nodeinfo_url='%s' ...", row["domain"], row["software"], row["nodeinfo_url"])
1277         if (args.force is None or not args.force) and args.domain is None and args.software is None and instances.is_recent(row["domain"], "last_blocked"):
1278             logger.debug("row[domain]='%s' has been recently checked, args.force[]='%s' - SKIPPED!", row["domain"], type(args.force))
1279             continue
1280
1281         logger.debug("Invoking federation.fetch_blocks(%s) ...", row["domain"])
1282         blocking = federation.fetch_blocks(row["domain"])
1283
1284         logger.debug("blocking()=%d", len(blocking))
1285         if len(blocking) == 0:
1286             if row["software"] == "pleroma":
1287                 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1288                 blocking = pleroma.fetch_blocks(row["domain"])
1289             elif row["software"] == "mastodon":
1290                 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1291                 blocking = mastodon.fetch_blocks(row["domain"])
1292             elif row["software"] == "lemmy":
1293                 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1294                 blocking = lemmy.fetch_blocks(row["domain"])
1295             elif row["software"] == "friendica":
1296                 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1297                 blocking = friendica.fetch_blocks(row["domain"])
1298             elif row["software"] == "misskey":
1299                 logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
1300                 blocking = misskey.fetch_blocks(row["domain"])
1301             else:
1302                 logger.warning("Unknown software: domain='%s',software='%s'", row["domain"], row["software"])
1303
1304         # c.s isn't part of oliphant's "hidden" blocklists
1305         logger.debug("row[domain]='%s'", row["domain"])
1306         if row["domain"] != "chaos.social" and not software_helper.is_relay(row["software"]) and not blocklists.has(row["domain"]):
1307             logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", row["domain"], len(blocking))
1308             instances.set_last_blocked(row["domain"])
1309             instances.set_total_blocks(row["domain"], blocking)
1310
1311         obfuscated = 0
1312         blockdict = list()
1313
1314         logger.info("Checking %d block(s) from domain='%s' ...", len(blocking), row["domain"])
1315         for block in blocking:
1316             logger.debug("block[blocked]='%s'", block["blocked"])
1317             blocked = None
1318
1319             if block["blocked"] == "":
1320                 logger.debug("block[blocked] is empty - SKIPPED!")
1321                 continue
1322             elif block["blocked"].endswith(".arpa"):
1323                 logger.debug("blocked='%s' is a reversed IP address - SKIPPED!", block["blocked"])
1324                 continue
1325             elif block["blocked"].endswith(".tld"):
1326                 logger.debug("blocked='%s' is a fake domain name - SKIPPED!", block["blocked"])
1327                 continue
1328             elif block["blocked"].endswith(".onion"):
1329                 logger.debug("blocked='%s' is a TOR onion domain name - SKIPPED!", block["blocked"])
1330                 continue
1331             elif block["blocked"].find("*") >= 0 or block["blocked"].find("?") >= 0:
1332                 logger.debug("block='%s' is obfuscated.", block["blocked"])
1333                 obfuscated = obfuscated + 1
1334                 blocked = utils.deobfuscate(block["blocked"], row["domain"], block["hash"] if "hash" in block else None)
1335             elif not domain_helper.is_wanted(block["blocked"]):
1336                 logger.debug("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
1337                 continue
1338             elif blocks.is_instance_blocked(row["domain"], block["blocked"]):
1339                 logger.debug("blocked='%s' is already blocked - SKIPPED!", block["blocked"])
1340                 continue
1341
1342             logger.debug("blocked[%s]='%s',block[blocked]='%s'", type(blocked), blocked, block["blocked"])
1343             if blocked is not None and blocked != block["blocked"]:
1344                 logger.debug("blocked='%s' was deobfuscated to blocked='%s'", block["blocked"], blocked)
1345                 obfuscated = obfuscated - 1
1346
1347                 if blacklist.is_blacklisted(blocked):
1348                     logger.debug("blocked='%s' is blacklisted - SKIPPED!", blocked)
1349                     continue
1350                 elif blacklist.is_blacklisted(row["domain"]):
1351                     logger.debug("row[domain]='%s' is blacklisted - SKIPPED!", row["domain"])
1352                     continue
1353                 elif blocks.is_instance_blocked(row["domain"], blocked):
1354                     logger.debug("blocked='%s' is already blocked by domain='%s' - SKIPPED!", blocked, row["domain"])
1355                     continue
1356
1357                 block["block_level"] = blocks.alias_block_level(block["block_level"])
1358
1359                 logger.info("blocked='%s' has been deobfuscated to blocked='%s', adding ...", block["blocked"], blocked)
1360                 if processing.block(row["domain"], blocked, block["reason"], block["block_level"]) and block["block_level"] == "reject" and config.get("bot_enabled"):
1361                     logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], row["domain"])
1362                     blockdict.append({
1363                         "blocked": blocked,
1364                         "reason" : block["reason"],
1365                     })
1366
1367         logger.debug("Settings obfuscated=%d for row[domain]='%s' ...", obfuscated, row["domain"])
1368         instances.set_obfuscated_blocks(row["domain"], obfuscated)
1369
1370         logger.info("domain='%s' has %d obfuscated domain(s)", row["domain"], obfuscated)
1371         if instances.has_pending(row["domain"]):
1372             logger.debug("Flushing updates for blocker='%s' ...", row["domain"])
1373             instances.update(row["domain"])
1374
1375         logger.debug("Invoking commit() ...")
1376         database.connection.commit()
1377
1378         logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
1379         if config.get("bot_enabled") and len(blockdict) > 0:
1380             logger.info("Sending bot POST for blocker='%s,blockdict()=%d ...", row["domain"], len(blockdict))
1381             network.send_bot_post(row["domain"], blockdict)
1382
1383     logger.debug("Success! - EXIT!")
1384     return 0
1385
1386 def fetch_fedilist(args: argparse.Namespace) -> int:
1387     logger.debug("args[]='%s' - CALLED!", type(args))
1388
1389     logger.debug("Invoking locking.acquire() ...")
1390     locking.acquire()
1391
1392     source_domain = "demo.fedilist.com"
1393     if sources.is_recent(source_domain):
1394         logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1395         return 1
1396     else:
1397         logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1398         sources.update(source_domain)
1399
1400     url = f"http://{source_domain}/instance/csv?onion=not"
1401     if args.software is not None and args.software != "":
1402         logger.debug("args.software='%s'", args.software)
1403         url = f"http://{source_domain}/instance/csv?software={args.software}&onion=not"
1404
1405     logger.info("Fetching url='%s' ...", url)
1406     response = reqto.get(
1407         url,
1408         headers=network.web_headers,
1409         timeout=(config.get("connection_timeout"), config.get("read_timeout")),
1410         allow_redirects=False
1411     )
1412
1413     logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
1414     if not response.ok or response.status_code > 200 or len(response.content) == 0:
1415         logger.warning("Failed fetching url='%s': response.ok='%s',response.status_code=%d,response.content()=%d - EXIT!", url, response.ok, response.status_code, len(response.text))
1416         return 1
1417
1418     reader = csv.DictReader(response.content.decode("utf-8").splitlines(), dialect="unix")
1419
1420     logger.debug("reader[]='%s'", type(reader))
1421     if reader is None:
1422         logger.warning("Failed parsing response.content()=%d as CSV content", len(response.content))
1423         return 2
1424
1425     rows = list(reader)
1426
1427     logger.info("Checking %d rows ...", len(rows))
1428     for row in rows:
1429         logger.debug("row[]='%s'", type(row))
1430         if "hostname" not in row:
1431             logger.warning("row()=%d has no element 'hostname' - SKIPPED!", len(row))
1432             continue
1433
1434         logger.debug("row[hostname]='%s' - BEFORE!", row["hostname"])
1435         domain = tidyup.domain(row["hostname"]) if row["hostname"] != None and row["hostname"] != "" else None
1436         logger.debug("domain='%s' - AFTER!", domain)
1437
1438         if domain is None or domain == "":
1439             logger.debug("domain='%s' is empty after tidyup.domain(): row[hostname]='%s' - SKIPPED!", domain, row["hostname"])
1440             continue
1441
1442         logger.debug("domain='%s' - BEFORE!", domain)
1443         domain = domain.encode("idna").decode("utf-8")
1444         logger.debug("domain='%s' - AFTER!", domain)
1445
1446         if not domain_helper.is_wanted(domain):
1447             logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1448             continue
1449         elif (args.force is None or not args.force) and instances.is_registered(domain):
1450             logger.debug("domain='%s' is already registered, --force not specified: args.force[]='%s'", domain, type(args.force))
1451             continue
1452         elif instances.is_recent(domain):
1453             logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1454             continue
1455
1456         logger.info("Fetching instances from domain='%s' ...", domain)
1457         federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
1458
1459     logger.debug("Success! - EXIT!")
1460     return 0
1461
1462 def update_nodeinfo(args: argparse.Namespace) -> int:
1463     logger.debug("args[]='%s' - CALLED!", type(args))
1464
1465     logger.debug("Invoking locking.acquire() ...")
1466     locking.acquire()
1467
1468     if args.domain is not None and args.domain != "":
1469         logger.debug("Fetching args.domain='%s'", args.domain)
1470         database.cursor.execute("SELECT domain, software FROM instances WHERE domain = ? LIMIT 1", [args.domain])
1471     elif args.software is not None and args.software != "":
1472         logger.info("Fetching domains for args.software='%s'", args.software)
1473         database.cursor.execute("SELECT domain, software FROM instances WHERE software = ? ORDER BY last_updated ASC")
1474     elif args.mode is not None and args.mode != "":
1475         logger.info("Fetching domains for args.mode='%s'", args.mode.upper())
1476         database.cursor.execute("SELECT domain, software FROM instances WHERE detection_mode = ? ORDER BY last_updated ASC")
1477     elif args.no_software:
1478         logger.info("Fetching domains with no software type detected ...")
1479         database.cursor.execute("SELECT domain, software FROM instances WHERE software IS NULL ORDER BY last_updated ASC")
1480     elif args.no_auto:
1481         logger.info("Fetching domains with other detection mode than AUTO_DISOVERY being set ...")
1482         database.cursor.execute("SELECT domain, software FROM instances WHERE detection_mode IS NOT NULL AND detection_mode != 'AUTO_DISCOVERY' ORDER BY last_updated ASC")
1483     else:
1484         logger.info("Fetching domains for recently updated ...")
1485         database.cursor.execute("SELECT domain, software FROM instances ORDER BY last_updated ASC")
1486
1487     domains = database.cursor.fetchall()
1488
1489     logger.info("Checking %d domain(s) ...", len(domains))
1490     cnt = 0
1491     for row in domains:
1492         logger.debug("row[]='%s'", type(row))
1493         if not args.force and instances.is_recent(row["domain"], "last_nodeinfo"):
1494             logger.debug("row[domain]='%s' has been recently checked - SKIPPED!", row["domain"])
1495             continue
1496
1497         try:
1498             logger.info("Checking nodeinfo for row[domain]='%s',row[software]='%s' (%s%%) ...", row["domain"], row["software"], "{:5.1f}".format(cnt / len(domains) * 100))
1499             software = federation.determine_software(row["domain"])
1500
1501             logger.debug("Determined software='%s'", software)
1502             if (software != row["software"] and software is not None) or args.force is True:
1503                 logger.debug("software='%s'", software)
1504                 if software is None:
1505                     logger.debug("Setting nodeinfo_url to 'None' for row[domain]='%s' ...", row["domain"])
1506                     instances.set_nodeinfo_url(row["domain"], None)
1507
1508                 logger.warning("Software type for row[domain]='%s' has changed from '%s' to '%s'!", row["domain"], row["software"], software)
1509                 instances.set_software(row["domain"], software)
1510
1511             if software is not None:
1512                 logger.debug("Setting row[domain]='%s' as successfully determined ...", row["domain"])
1513                 instances.set_success(row["domain"])
1514         except network.exceptions as exception:
1515             logger.warning("Exception '%s' during updating nodeinfo for row[domain]='%s'", type(exception), row["domain"])
1516             instances.set_last_error(row["domain"], exception)
1517
1518         instances.set_last_nodeinfo(row["domain"])
1519         instances.update(row["domain"])
1520         cnt = cnt + 1
1521
1522     logger.debug("Success! - EXIT!")
1523     return 0
1524
1525 def fetch_instances_social(args: argparse.Namespace) -> int:
1526     logger.debug("args[]='%s' - CALLED!", type(args))
1527
1528     logger.debug("Invoking locking.acquire() ...")
1529     locking.acquire()
1530
1531     source_domain = "instances.social"
1532
1533     if config.get("instances_social_api_key") == "":
1534         logger.error("API key not set. Please set in your config.json file.")
1535         return 1
1536     elif sources.is_recent(source_domain):
1537         logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
1538         return 2
1539     else:
1540         logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
1541         sources.update(source_domain)
1542
1543     headers = {
1544         "Authorization": f"Bearer {config.get('instances_social_api_key')}",
1545     }
1546
1547     logger.info("Fetching list from source_domain='%s' ...", source_domain)
1548     fetched = network.get_json_api(
1549         source_domain,
1550         "/api/1.0/instances/list?count=0&sort_by=name",
1551         headers,
1552         (config.get("connection_timeout"), config.get("read_timeout"))
1553     )
1554     logger.debug("fetched[]='%s'", type(fetched))
1555
1556     if "error_message" in fetched:
1557         logger.warning("Error during fetching API result: '%s' - EXIT!", fetched["error_message"])
1558         return 2
1559     elif "exception" in fetched:
1560         logger.warning("Exception '%s' during fetching API result - EXIT!", type(fetched["exception"]))
1561         return 3
1562     elif "json" not in fetched:
1563         logger.warning("fetched has no element 'json' - EXIT!")
1564         return 4
1565     elif "instances" not in fetched["json"]:
1566         logger.warning("fetched[row] has no element 'instances' - EXIT!")
1567         return 5
1568
1569     domains = list()
1570     rows = fetched["json"]["instances"]
1571
1572     logger.info("Checking %d row(s) ...", len(rows))
1573     for row in rows:
1574         logger.debug("row[]='%s'", type(row))
1575         domain = tidyup.domain(row["name"]) if row["name"] != None and row["name"] != "" else None
1576         logger.debug("domain='%s' - AFTER!", domain)
1577
1578         if domain is None and domain == "":
1579             logger.debug("domain='%s' is empty after tidyup.domain() - SKIPPED!", domain)
1580             continue
1581
1582         logger.debug("domain='%s' - BEFORE!", domain)
1583         domain = domain.encode("idna").decode("utf-8")
1584         logger.debug("domain='%s' - AFTER!", domain)
1585
1586         if not domain_helper.is_wanted(domain):
1587             logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1588             continue
1589         elif domain in domains:
1590             logger.debug("domain='%s' is already added - SKIPPED!", domain)
1591             continue
1592         elif instances.is_registered(domain):
1593             logger.debug("domain='%s' is already registered - SKIPPED!", domain)
1594             continue
1595         elif instances.is_recent(domain):
1596             logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
1597             continue
1598
1599         logger.info("Fetching instances from domain='%s'", domain)
1600         federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
1601
1602     logger.debug("Success! - EXIT!")
1603     return 0
1604
1605 def fetch_relays(args: argparse.Namespace) -> int:
1606     logger.debug("args[]='%s' - CALLED!", type(args))
1607
1608     logger.debug("Invoking locking.acquire() ...")
1609     locking.acquire()
1610
1611     if args.domain is not None and args.domain != "":
1612         database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE software IN ('activityrelay', 'aoderelay', 'selective-relay', 'pub-relay') AND domain = ? LIMIT 1", [args.domain])
1613     elif args.software is not None and args.software != "":
1614         database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE software IN ('activityrelay', 'aoderelay', 'selective-relay', 'pub-relay') AND software = ?", [args.software])
1615     else:
1616         database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE software IN ('activityrelay', 'aoderelay', 'selective-relay', 'pub-relay')")
1617
1618     domains = list()
1619     rows = database.cursor.fetchall()
1620
1621     logger.info("Checking %d relays ...", len(rows))
1622     for row in rows:
1623         logger.debug("row[domain]='%s',row[software]='%s' ...", row["domain"], row["software"])
1624         peers = list()
1625         if not args.force and instances.is_recent(row["domain"]):
1626             logger.debug("row[domain]='%s' has been recently fetched - SKIPPED!", row["domain"])
1627             continue
1628
1629         try:
1630             if row["software"] == "pub-relay":
1631                 logger.info("Fetching row[nodeinfo_url]='%s' from relay row[domain]='%s',row[software]='%s' ...", row["nodeinfo_url"], row["domain"], row["software"])
1632                 raw = network.fetch_api_url(
1633                     row["nodeinfo_url"],
1634                     (config.get("connection_timeout"), config.get("read_timeout"))
1635                 )
1636
1637                 logger.debug("raw[%s]()=%d", type(raw), len(raw))
1638                 if "exception" in raw:
1639                     logger.warning("row[domain]='%s' has caused an exception: '%s' - raising again ...", row["domain"], type(raw["exception"]))
1640                     raise raw["exception"]
1641                 elif "error_message" in raw:
1642                     logger.warning("row[domain]='%s' has caused error message: '%s' - SKIPPED!", row["domain"], raw["error_message"])
1643                     instances.set_last_error(row["domain"], raw)
1644                     instances.set_last_instance_fetch(row["domain"])
1645                     instances.update(row["domain"])
1646                     continue
1647                 elif not "json" in raw:
1648                     logger.warning("raw()=%d does not contain key 'json' in response - SKIPPED!", len(raw))
1649                     continue
1650                 elif not "metadata" in raw["json"]:
1651                     logger.warning("raw[json]()=%d does not contain key 'json' in response - SKIPPED!", len(raw["json"]))
1652                     continue
1653                 elif not "peers" in raw["json"]["metadata"]:
1654                     logger.warning("raw[json][metadata()=%d does not contain key 'json' in response - SKIPPED!", len(raw["json"]["metadata"]))
1655                     continue
1656             else:
1657                 logger.info("Fetching / from relay row[domain]='%s',row[software]='%s' ...", row["domain"], row["software"])
1658                 raw = utils.fetch_url(
1659                     f"https://{row['domain']}",
1660                     network.web_headers,
1661                     (config.get("connection_timeout"), config.get("read_timeout"))
1662                 ).text
1663                 logger.debug("raw[%s]()=%d", type(raw), len(raw))
1664
1665                 doc = bs4.BeautifulSoup(raw, features="html.parser")
1666                 logger.debug("doc[]='%s'", type(doc))
1667
1668         except network.exceptions as exception:
1669             logger.warning("Exception '%s' during fetching from relay '%s': '%s'", type(exception), row["domain"], str(exception))
1670             instances.set_last_error(row["domain"], exception)
1671             instances.set_last_instance_fetch(row["domain"])
1672             instances.update(row["domain"])
1673             continue
1674
1675         logger.debug("row[software]='%s'", row["software"])
1676         if row["software"] == "activityrelay":
1677             logger.debug("Checking row[domain]='%s' ...", row["domain"])
1678             tags = doc.findAll("p")
1679
1680             logger.debug("Checking %d paragraphs ...", len(tags))
1681             for tag in tags:
1682                 logger.debug("tag[]='%s'", type(tag))
1683                 if len(tag.contents) == 0:
1684                     logger.debug("tag='%s' is an empty tag - SKIPPED!", tag)
1685                     continue
1686                 elif "registered instances" not in tag.contents[0]:
1687                     logger.debug("Skipping paragraph, text not found.")
1688                     continue
1689
1690                 logger.debug("Found tag.contents[0][]='%s'", tag.contents[0])
1691                 for domain in tag.contents:
1692                     logger.debug("domain[%s]='%s'", type(domain), domain)
1693                     if not isinstance(domain, bs4.element.NavigableString) or "registered instances" in domain:
1694                         continue
1695
1696                     domain = str(domain)
1697                     logger.debug("domain='%s'", domain)
1698                     if not domain_helper.is_wanted(domain):
1699                         logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
1700                         continue
1701
1702                     logger.debug("domain='%s' - BEFORE!", domain)
1703                     domain = tidyup.domain(domain) if domain != None and domain != "" else None
1704                     logger.debug("domain='%s' - AFTER!", domain)
1705
1706                     if domain is None or domain == "":
1707                         logger.debug("domain='%s' is empty after tidyup.domain() from origin='%s' - SKIPPED!", domain, row["domain"])
1708                         continue
1709                     elif domain not in peers:
1710                         logger.debug("Appending domain='%s' to peers list for relay='%s' ...", domain, row["domain"])
1711                         peers.append(domain)
1712
1713                     if dict_helper.has_key(domains, "domain", domain):
1714                         logger.debug("domain='%s' already added", domain)
1715                         continue
1716
1717                     logger.debug("Appending domain='%s',origin='%s',software='%s' ...", domain, row["domain"], row["software"])
1718                     domains.append({
1719                         "domain": domain,
1720                         "origin": row["domain"],
1721                     })
1722         elif row["software"] in ["aoderelay", "selective-relay"]:
1723             logger.debug("Checking row[domain]='%s' ...", row["domain"])
1724             if row["software"] == "aoderelay":
1725                 tags = doc.findAll("section", {"class": "instance"})
1726             else:
1727                 tags = doc.find("div", {"id": "instances"}).findAll("li")
1728
1729             logger.debug("Checking %d tags ...", len(tags))
1730             for tag in tags:
1731                 logger.debug("tag[]='%s'", type(tag))
1732
1733                 link = tag.find("a")
1734                 logger.debug("link[%s]='%s'", type(link), link)
1735                 if not isinstance(link, bs4.element.Tag):
1736                     logger.warning("tag[%s]='%s' is not type of 'bs4.element.Tag' - SKIPPED!", type(tag), tag)
1737                     continue
1738
1739                 components = urlparse(link.get("href"))
1740                 logger.debug("components(%d)='%s'", len(components), components)
1741                 domain = components.netloc.lower().split(":")[0]
1742
1743                 logger.debug("domain='%s' - BEFORE!", domain)
1744                 domain = tidyup.domain(domain) if domain != None and domain != "" else None
1745                 logger.debug("domain='%s' - AFTER!", domain)
1746
1747                 if domain is None or domain == "":
1748                     logger.debug("domain='%s' is empty after tidyup.domain() from origin='%s' - SKIPPED!", domain, row["domain"])
1749                     continue
1750                 elif domain not in peers:
1751                     logger.debug("Appending domain='%s' to peers list for relay='%s' ...", domain, row["domain"])
1752                     peers.append(domain)
1753
1754                 if dict_helper.has_key(domains, "domain", domain):
1755                     logger.debug("domain='%s' already added", domain)
1756                     continue
1757
1758                 logger.debug("Appending domain='%s',origin='%s',software='%s'", domain, row["domain"], row["software"])
1759                 domains.append({
1760                     "domain": domain,
1761                     "origin": row["domain"],
1762                 })
1763         elif row["software"] == "pub-relay":
1764             logger.debug("Checking %d peer(s) row[domain]='%s' ...", len(raw["json"]["metadata"]["peers"]), row["domain"])
1765             for domain in raw["json"]["metadata"]["peers"]:
1766                 logger.debug("domain='%s' - BEFORE!", domain)
1767                 domain = tidyup.domain(domain) if domain != None and domain != "" else None
1768                 logger.debug("domain='%s' - AFTER!", domain)
1769
1770                 if domain is None or domain == "":
1771                     logger.debug("domain='%s' is empty after tidyup.domain() from origin='%s' - SKIPPED!", domain, row["domain"])
1772                     continue
1773                 elif domain not in peers:
1774                     logger.debug("Appending domain='%s' to peers list for relay='%s' ...", domain, row["domain"])
1775                     peers.append(domain)
1776
1777                 if dict_helper.has_key(domains, "domain", domain):
1778                     logger.debug("domain='%s' already added", domain)
1779                     continue
1780
1781                 logger.debug("Appending domain='%s',origin='%s',software='%s'", domain, row["domain"], row["software"])
1782                 domains.append({
1783                     "domain": domain,
1784                     "origin": row["domain"],
1785                 })
1786         else:
1787             logger.warning("row[domain]='%s',row[software]='%s' is not supported", row["domain"], row["software"])
1788             continue
1789
1790         logger.debug("Updating last_instance_fetch for row[domain]='%s' ...", row["domain"])
1791         instances.set_last_instance_fetch(row["domain"])
1792
1793         logger.info("Relay '%s' has %d peer(s) registered.", row["domain"], len(peers))
1794         instances.set_total_peers(row["domain"], peers)
1795
1796         logger.debug("Flushing data for row[domain]='%s'", row["domain"])
1797         instances.update(row["domain"])
1798
1799     logger.info("Checking %d domains ...", len(domains))
1800     for row in domains:
1801         logger.debug("row[domain]='%s',row[origin]='%s'", row["domain"], row["origin"])
1802         if not domain_helper.is_wanted(row["domain"]):
1803             logger.debug("row[domain]='%s' is not wanted - SKIPPED!", row["domain"])
1804             continue
1805         elif instances.is_registered(row["domain"]):
1806             logger.debug("row[domain]='%s' is already registered - SKIPPED!", row["domain"])
1807             continue
1808
1809         logger.info("Fetching row[domain]='%s',row[origin]='%s' ...", row["domain"], row["origin"])
1810         federation.fetch_instances(row["domain"], row["origin"], None, inspect.currentframe().f_code.co_name)
1811
1812     logger.debug("Success! - EXIT!")
1813     return 0
1814
1815 def convert_idna(args: argparse.Namespace) -> int:
1816     logger.debug("args[]='%s' - CALLED!", type(args))
1817
1818     database.cursor.execute("SELECT domain FROM instances WHERE domain NOT LIKE '%xn--%' ORDER BY domain ASC")
1819     rows = database.cursor.fetchall()
1820
1821     logger.debug("rows[]='%s'", type(rows))
1822     instances.translate_idnas(rows, "domain")
1823
1824     database.cursor.execute("SELECT origin FROM instances WHERE origin NOT LIKE '%xn--%' ORDER BY origin ASC")
1825     rows = database.cursor.fetchall()
1826
1827     logger.debug("rows[]='%s'", type(rows))
1828     instances.translate_idnas(rows, "origin")
1829
1830     database.cursor.execute("SELECT blocker FROM blocks WHERE blocker NOT LIKE '%xn--%' ORDER BY blocker ASC")
1831     rows = database.cursor.fetchall()
1832
1833     logger.debug("rows[]='%s'", type(rows))
1834     blocks.translate_idnas(rows, "blocker")
1835
1836     database.cursor.execute("SELECT blocked FROM blocks WHERE blocked NOT LIKE '%xn--%' ORDER BY blocked ASC")
1837     rows = database.cursor.fetchall()
1838
1839     logger.debug("rows[]='%s'", type(rows))
1840     blocks.translate_idnas(rows, "blocked")
1841
1842     logger.debug("Success! - EXIT!")
1843     return 0
1844
1845 def remove_invalid(args: argparse.Namespace) -> int:
1846     logger.debug("args[]='%s' - CALLED!", type(args))
1847
1848     logger.debug("Invoking locking.acquire() ...")
1849     locking.acquire()
1850
1851     database.cursor.execute("SELECT domain FROM instances ORDER BY domain ASC")
1852     rows = database.cursor.fetchall()
1853
1854     logger.info("Checking %d domains ...", len(rows))
1855     for row in rows:
1856         logger.debug("row[domain]='%s'", row["domain"])
1857         if not validators.domain(row["domain"].split("/")[0]):
1858             logger.info("Invalid row[domain]='%s' found, removing ...", row["domain"])
1859             database.cursor.execute("DELETE FROM blocks WHERE blocker = ? OR blocked = ?", [row["domain"], row["domain"]])
1860             database.cursor.execute("DELETE FROM instances WHERE domain = ? LIMIT 1", [row["domain"]])
1861
1862     logger.debug("Invoking commit() ...")
1863     database.connection.commit()
1864
1865     logger.info("Vaccum cleaning database ...")
1866     database.cursor.execute("VACUUM")
1867
1868     logger.debug("Success! - EXIT!")
1869     return 0