+def fetch_relays(args: argparse.Namespace) -> int:
+ logger.debug("args[]='%s' - CALLED!", type(args))
+
+ logger.debug("Invoking locking.acquire() ...")
+ locking.acquire()
+
+ if args.domain is not None and args.domain != "":
+ database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE software IN ('activityrelay', 'aoderelay', 'selective-relay', 'pub-relay') AND domain = ? LIMIT 1", [args.domain])
+ elif args.software is not None and args.software != "":
+ database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE software IN ('activityrelay', 'aoderelay', 'selective-relay', 'pub-relay') AND software = ?", [args.software])
+ else:
+ database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE software IN ('activityrelay', 'aoderelay', 'selective-relay', 'pub-relay')")
+
+ domains = list()
+ rows = database.cursor.fetchall()
+
+ logger.info("Checking %d relays ...", len(rows))
+ for row in rows:
+ logger.debug("row[domain]='%s',row[software]='%s' ...", row["domain"], row["software"])
+ peers = list()
+ if not args.force and instances.is_recent(row["domain"]):
+ logger.debug("row[domain]='%s' has been recently fetched - SKIPPED!", row["domain"])
+ continue
+
+ try:
+ if row["software"] == "pub-relay":
+ logger.info("Fetching row[nodeinfo_url]='%s' from relay row[domain]='%s',row[software]='%s' ...", row["nodeinfo_url"], row["domain"], row["software"])
+ raw = network.fetch_api_url(
+ row["nodeinfo_url"],
+ (config.get("connection_timeout"), config.get("read_timeout"))
+ )
+
+ logger.debug("raw[%s]()=%d", type(raw), len(raw))
+ if "exception" in raw:
+ logger.warning("row[domain]='%s' has caused an exception: '%s' - raising again ...", row["domain"], type(raw["exception"]))
+ raise raw["exception"]
+ elif "error_message" in raw:
+ logger.warning("row[domain]='%s' has caused error message: '%s' - SKIPPED!", row["domain"], raw["error_message"])
+ instances.set_last_error(row["domain"], raw)
+ instances.set_last_instance_fetch(row["domain"])
+ instances.update(row["domain"])
+ continue
+ elif not "json" in raw:
+ logger.warning("raw()=%d does not contain key 'json' in response - SKIPPED!", len(raw))
+ continue
+ elif not "metadata" in raw["json"]:
+ logger.warning("raw[json]()=%d does not contain key 'json' in response - SKIPPED!", len(raw["json"]))
+ continue
+ elif not "peers" in raw["json"]["metadata"]:
+ logger.warning("raw[json][metadata()=%d does not contain key 'json' in response - SKIPPED!", len(raw["json"]["metadata"]))
+ continue
+ else:
+ logger.info("Fetching / from relay row[domain]='%s',row[software]='%s' ...", row["domain"], row["software"])
+ raw = utils.fetch_url(
+ f"https://{row['domain']}",
+ network.web_headers,
+ (config.get("connection_timeout"), config.get("read_timeout"))
+ ).text
+ logger.debug("raw[%s]()=%d", type(raw), len(raw))
+
+ doc = bs4.BeautifulSoup(raw, features="html.parser")
+ logger.debug("doc[]='%s'", type(doc))
+
+ except network.exceptions as exception:
+ logger.warning("Exception '%s' during fetching from relay '%s': '%s'", type(exception), row["domain"], str(exception))
+ instances.set_last_error(row["domain"], exception)
+ instances.set_last_instance_fetch(row["domain"])
+ instances.update(row["domain"])
+ continue
+
+ logger.debug("row[software]='%s'", row["software"])
+ if row["software"] == "activityrelay":
+ logger.debug("Checking row[domain]='%s' ...", row["domain"])
+ tags = doc.findAll("p")
+
+ logger.debug("Checking %d paragraphs ...", len(tags))
+ for tag in tags:
+ logger.debug("tag[]='%s'", type(tag))
+ if len(tag.contents) == 0:
+ logger.debug("tag='%s' is an empty tag - SKIPPED!", tag)
+ continue
+ elif "registered instances" not in tag.contents[0]:
+ logger.debug("Skipping paragraph, text not found.")
+ continue
+
+ logger.debug("Found tag.contents[0][]='%s'", tag.contents[0])
+ for domain in tag.contents:
+ logger.debug("domain[%s]='%s'", type(domain), domain)
+ if not isinstance(domain, bs4.element.NavigableString) or "registered instances" in domain:
+ continue
+
+ domain = str(domain)
+ logger.debug("domain='%s'", domain)
+ if not domain_helper.is_wanted(domain):
+ logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
+ continue
+
+ logger.debug("domain='%s' - BEFORE!", domain)
+ domain = tidyup.domain(domain) if domain != None and domain != "" else None
+ logger.debug("domain='%s' - AFTER!", domain)
+
+ if domain is None or domain == "":
+ logger.debug("domain='%s' is empty after tidyup.domain() from origin='%s' - SKIPPED!", domain, row["domain"])
+ continue
+ elif domain not in peers:
+ logger.debug("Appending domain='%s' to peers list for relay='%s' ...", domain, row["domain"])
+ peers.append(domain)
+
+ if dict_helper.has_key(domains, "domain", domain):
+ logger.debug("domain='%s' already added", domain)
+ continue
+
+ logger.debug("Appending domain='%s',origin='%s',software='%s' ...", domain, row["domain"], row["software"])
+ domains.append({
+ "domain": domain,
+ "origin": row["domain"],
+ })
+ elif row["software"] in ["aoderelay", "selective-relay"]:
+ logger.debug("Checking row[domain]='%s' ...", row["domain"])
+ if row["software"] == "aoderelay":
+ tags = doc.findAll("section", {"class": "instance"})
+ else:
+ tags = doc.find("div", {"id": "instances"}).findAll("li")
+
+ logger.debug("Checking %d tags ...", len(tags))
+ for tag in tags:
+ logger.debug("tag[]='%s'", type(tag))
+
+ link = tag.find("a")
+ logger.debug("link[%s]='%s'", type(link), link)
+ if not isinstance(link, bs4.element.Tag):
+ logger.warning("tag[%s]='%s' is not type of 'bs4.element.Tag' - SKIPPED!", type(tag), tag)
+ continue
+
+ components = urlparse(link.get("href"))
+ logger.debug("components(%d)='%s'", len(components), components)
+ domain = components.netloc.lower().split(":")[0]
+
+ logger.debug("domain='%s' - BEFORE!", domain)
+ domain = tidyup.domain(domain) if domain != None and domain != "" else None
+ logger.debug("domain='%s' - AFTER!", domain)
+
+ if domain is None or domain == "":
+ logger.debug("domain='%s' is empty after tidyup.domain() from origin='%s' - SKIPPED!", domain, row["domain"])
+ continue
+ elif domain not in peers:
+ logger.debug("Appending domain='%s' to peers list for relay='%s' ...", domain, row["domain"])
+ peers.append(domain)
+
+ if dict_helper.has_key(domains, "domain", domain):
+ logger.debug("domain='%s' already added", domain)
+ continue
+
+ logger.debug("Appending domain='%s',origin='%s',software='%s'", domain, row["domain"], row["software"])
+ domains.append({
+ "domain": domain,
+ "origin": row["domain"],
+ })
+ elif row["software"] == "pub-relay":
+ logger.debug("Checking %d peer(s) row[domain]='%s' ...", len(raw["json"]["metadata"]["peers"]), row["domain"])
+ for domain in raw["json"]["metadata"]["peers"]:
+ logger.debug("domain='%s' - BEFORE!", domain)
+ domain = tidyup.domain(domain) if domain != None and domain != "" else None
+ logger.debug("domain='%s' - AFTER!", domain)
+
+ if domain is None or domain == "":
+ logger.debug("domain='%s' is empty after tidyup.domain() from origin='%s' - SKIPPED!", domain, row["domain"])
+ continue
+ elif domain not in peers:
+ logger.debug("Appending domain='%s' to peers list for relay='%s' ...", domain, row["domain"])
+ peers.append(domain)
+
+ if dict_helper.has_key(domains, "domain", domain):
+ logger.debug("domain='%s' already added", domain)
+ continue
+
+ logger.debug("Appending domain='%s',origin='%s',software='%s'", domain, row["domain"], row["software"])
+ domains.append({
+ "domain": domain,
+ "origin": row["domain"],
+ })
+ else:
+ logger.warning("row[domain]='%s',row[software]='%s' is not supported", row["domain"], row["software"])
+ continue
+
+ logger.debug("Updating last_instance_fetch for row[domain]='%s' ...", row["domain"])
+ instances.set_last_instance_fetch(row["domain"])
+
+ logger.info("Relay '%s' has %d peer(s) registered.", row["domain"], len(peers))
+ instances.set_total_peers(row["domain"], peers)
+
+ logger.debug("Flushing data for row[domain]='%s'", row["domain"])
+ instances.update(row["domain"])
+
+ logger.info("Checking %d domains ...", len(domains))
+ for row in domains:
+ logger.debug("row[domain]='%s',row[origin]='%s'", row["domain"], row["origin"])
+ if not domain_helper.is_wanted(row["domain"]):
+ logger.debug("row[domain]='%s' is not wanted - SKIPPED!", row["domain"])
+ continue
+ elif instances.is_registered(row["domain"]):
+ logger.debug("row[domain]='%s' is already registered - SKIPPED!", row["domain"])
+ continue
+
+ logger.info("Fetching row[domain]='%s',row[origin]='%s' ...", row["domain"], row["origin"])
+ federation.fetch_instances(row["domain"], row["origin"], None, inspect.currentframe().f_code.co_name)
+
+ logger.debug("Success! - EXIT!")
+ return 0
+