]> git.mxchange.org Git - fba.git/commitdiff
Continued:
authorRoland Häder <roland@mxchange.org>
Wed, 6 Sep 2023 01:37:48 +0000 (03:37 +0200)
committerRoland Häder <roland@mxchange.org>
Wed, 6 Sep 2023 01:37:48 +0000 (03:37 +0200)
- fetch_relays now supports --software=foo parameter
- added support for 'pub-relay' relays, they provide their peers over their
  nodeinfo URL (see element metadata -> peers)

fba/boot.py
fba/commands.py
fba/helpers/blacklist.py

index d9fa4af93b65e171f943c00621d521db2f06dced..8e0f3fe5c92801e83279965383755bb6dc78f911 100644 (file)
@@ -231,6 +231,7 @@ def init_parser():
     )
     parser.set_defaults(command=commands.fetch_relays)
     parser.add_argument("--domain", help="Instance name (aka. 'relay')")
+    parser.add_argument("--software", help="Name of software, e.g. 'lemmy'")
     parser.add_argument("--force", action="store_true", help="Forces update of data, no matter what.")
 
     ### Remove invalid domains ###
index 7611f5560c13ee7e3fab0d1e597fd7525b2c37ab..99a2870e5e2ad1c16674d330e27ed1202ee0bbbf 100644 (file)
@@ -1867,9 +1867,11 @@ def fetch_relays(args: argparse.Namespace) -> int:
     locking.acquire()
 
     if args.domain is not None and args.domain != "":
-        database.cursor.execute("SELECT domain, software FROM instances WHERE software IN ('activityrelay', 'aoderelay', 'selective-relay') AND domain = ? LIMIT 1", [args.domain])
+        database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE software IN ('activityrelay', 'aoderelay', 'selective-relay', 'pub-relay') AND domain = ? LIMIT 1", [args.domain])
+    elif args.software is not None and args.software != "":
+        database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE software IN ('activityrelay', 'aoderelay', 'selective-relay', 'pub-relay') AND software = ?", [args.software])
     else:
-        database.cursor.execute("SELECT domain, software FROM instances WHERE software IN ('activityrelay', 'aoderelay', 'selective-relay')")
+        database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE software IN ('activityrelay', 'aoderelay', 'selective-relay', 'pub-relay')")
 
     domains = list()
     rows = database.cursor.fetchall()
@@ -1883,13 +1885,44 @@ def fetch_relays(args: argparse.Namespace) -> int:
             continue
 
         try:
-            logger.info("Fetching / from relay row[domain]='%s',row[software]='%s' ...", row["domain"], row["software"])
-            raw = utils.fetch_url(
-                f"https://{row['domain']}",
-                network.web_headers,
-                (config.get("connection_timeout"), config.get("read_timeout"))
-            ).text
-            logger.debug("raw[%s]()=%d", type(raw), len(raw))
+            if row["software"] == "pub-relay":
+                logger.info("Fetching row[nodeinfo_url]='%s' from relay row[domain]='%s',row[software]='%s' ...", row["nodeinfo_url"], row["domain"], row["software"])
+                raw = network.fetch_api_url(
+                    row["nodeinfo_url"],
+                    (config.get("connection_timeout"), config.get("read_timeout"))
+                )
+
+                logger.debug("raw[%s]()=%d", type(raw), len(raw))
+                if "exception" in raw:
+                    logger.warning("row[domain]='%s' has caused an exception: '%s' - raising again ...", row["domain"], type(raw["exception"]))
+                    raise raw["exception"]
+                elif "error_message" in raw:
+                    logger.warning("row[domain]='%s' has caused error message: '%s' - SKIPPED!", row["domain"], raw["error_message"])
+                    instances.set_last_error(row["domain"], raw)
+                    instances.set_last_instance_fetch(row["domain"])
+                    instances.update(row["domain"])
+                    continue
+                elif not "json" in raw:
+                    logger.warning("raw()=%d does not contain key 'json' in response - SKIPPED!", len(raw))
+                    continue
+                elif not "metadata" in raw["json"]:
+                    logger.warning("raw[json]()=%d does not contain key 'json' in response - SKIPPED!", len(raw["json"]))
+                    continue
+                elif not "peers" in raw["json"]["metadata"]:
+                    logger.warning("raw[json][metadata()=%d does not contain key 'json' in response - SKIPPED!", len(raw["json"]["metadata"]))
+                    continue
+            else:
+                logger.info("Fetching / from relay row[domain]='%s',row[software]='%s' ...", row["domain"], row["software"])
+                raw = utils.fetch_url(
+                    f"https://{row['domain']}",
+                    network.web_headers,
+                    (config.get("connection_timeout"), config.get("read_timeout"))
+                ).text
+                logger.debug("raw[%s]()=%d", type(raw), len(raw))
+
+                doc = bs4.BeautifulSoup(raw, features="html.parser")
+                logger.debug("doc[]='%s'", type(doc))
+
         except network.exceptions as exception:
             logger.warning("Exception '%s' during fetching from relay '%s': '%s'", type(exception), row["domain"], str(exception))
             instances.set_last_error(row["domain"], exception)
@@ -1897,9 +1930,6 @@ def fetch_relays(args: argparse.Namespace) -> int:
             instances.update(row["domain"])
             continue
 
-        doc = bs4.BeautifulSoup(raw, features="html.parser")
-        logger.debug("doc[]='%s'", type(doc))
-
         logger.debug("row[software]='%s'", row["software"])
         if row["software"] == "activityrelay":
             logger.debug("Checking row[domain]='%s' ...", row["domain"])
@@ -1967,10 +1997,6 @@ def fetch_relays(args: argparse.Namespace) -> int:
                 components = urlparse(link["href"])
                 domain = components.netloc.lower()
 
-                if not domain_helper.is_wanted(domain):
-                    logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
-                    continue
-
                 logger.debug("domain='%s' - BEFORE!", domain)
                 domain = tidyup.domain(domain)
                 logger.debug("domain='%s' - AFTER!", domain)
@@ -1985,6 +2011,29 @@ def fetch_relays(args: argparse.Namespace) -> int:
                 if dict_helper.has_key(domains, "domain", domain):
                     logger.debug("domain='%s' already added", domain)
                     continue
+                elif not domain_helper.is_wanted(domain):
+                    logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
+                    continue
+
+                logger.debug("Appending domain='%s',origin='%s',software='%s'", domain, row["domain"], row["software"])
+                domains.append({
+                    "domain": domain,
+                    "origin": row["domain"],
+                })
+        elif row["software"] == "pub-relay":
+            logger.debug("Checking %d peer(s) row[domain]='%s' ...", len(raw["json"]["metadata"]["peers"]), row["domain"])
+            for domain in raw["json"]["metadata"]["peers"]:
+                logger.debug("domain='%s'", domain)
+                if domain not in peers:
+                    logger.debug("Appending domain='%s' to peers list for relay='%s' ...", domain, row["domain"])
+                    peers.append(domain)
+
+                if dict_helper.has_key(domains, "domain", domain):
+                    logger.debug("domain='%s' already added", domain)
+                    continue
+                elif not domain_helper.is_wanted(domain):
+                    logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
+                    continue
 
                 logger.debug("Appending domain='%s',origin='%s',software='%s'", domain, row["domain"], row["software"])
                 domains.append({
@@ -1993,6 +2042,7 @@ def fetch_relays(args: argparse.Namespace) -> int:
                 })
         else:
             logger.warning("row[domain]='%s',row[software]='%s' is not supported", row["domain"], row["software"])
+            continue
 
         logger.debug("Updating last_instance_fetch for row[domain]='%s' ...", row["domain"])
         instances.set_last_instance_fetch(row["domain"])
index bbd8d03f27defb777bbcd9ff15af4b7b33b45489..91c674b63f38644c4491a4ea3faf395f34fe8331 100644 (file)
@@ -47,6 +47,7 @@ _blacklist = {
     "misskeytest.chn.moe" : "Testing/developing instances shouldn't be part of public instances",
     "netlify.app"         : "Testing/developing instances shouldn't be part of public instances",
     "ignorelist.com"      : "Testing/developing instances shouldn't be part of public instances",
+    "app.github.dev"      : "Testing/developing instances shouldn't be part of public instances",
     "hexbear.net"         : "Is a Lemmy instance with malicious JavaScript code (shell commands)",
 }