From: Roland Häder Date: Sat, 1 Jul 2023 06:23:02 +0000 (+0200) Subject: Continued: X-Git-Url: https://git.mxchange.org/?a=commitdiff_plain;h=e9d1c115a0cebf1b0d8751c69094145f80b5cf88;p=fba.git Continued: - added command check_nodeinfo which checks if domain is part of nodeinfo_url - first loop through nodeinfo IDs to find newest nodeinfo first --- diff --git a/daemon.py b/daemon.py index 60a1d2d..acc6cdf 100755 --- a/daemon.py +++ b/daemon.py @@ -122,7 +122,7 @@ def api_index(request: Request, mode: str, value: str, amount: int): raise HTTPException(status_code=500, detail=f"domain='{domain}' is not wanted") wildchar = "*." + ".".join(domain.split(".")[-domain.count("."):]) - punycode = domain.encode('idna').decode('utf-8') + punycode = domain.encode("idna").decode("utf-8") elif mode == "reason": reason = re.sub("(%|_)", "", tidyup.reason(value)) if len(reason) < 3: diff --git a/fba/boot.py b/fba/boot.py index d72c8e4..dc53f92 100644 --- a/fba/boot.py +++ b/fba/boot.py @@ -165,6 +165,13 @@ def init_parser(): ) parser.set_defaults(command=commands.fetch_pixelfed_api) + ### Check nodeinfo ### + parser = subparser_command.add_parser( + "check_nodeinfo", + help="Checks if domain is part of nodeinfo.", + ) + parser.set_defaults(command=commands.check_nodeinfo) + logger.debug("EXIT!") def run_command(): diff --git a/fba/commands.py b/fba/commands.py index bd755b7..af24675 100644 --- a/fba/commands.py +++ b/fba/commands.py @@ -72,6 +72,29 @@ def check_instance(args: argparse.Namespace) -> int: logger.debug("status=%d - EXIT!", status) return status +def check_nodeinfo(args: argparse.Namespace) -> int: + logger.debug("args[]='%s' - CALLED!", type(args)) + + # Fetch rows + database.cursor.execute("SELECT domain, nodeinfo_url FROM instances WHERE nodeinfo_url IS NOT NULL ORDER BY domain ASC") + + cnt = 0 + for row in database.cursor.fetchall(): + logger.debug("Checking row[domain]='%s',row[nodeinfo_url]='%s' ...", row["domain"], row["nodeinfo_url"]) + punycode = row["domain"].encode("idna").decode("utf-8") + + if row["nodeinfo_url"].startswith("/"): + logger.debug("row[nodeinfo_url]='%s' is a relative URL and always matches", row["nodeinfo_url"]) + continue + elif row["nodeinfo_url"].find(punycode) == -1 and row["nodeinfo_url"].find(row["domain"]) == -1: + logger.warning("punycode='%s' is not found in row[nodeinfo_url]='%s'", punycode, row["nodeinfo_url"]) + cnt = cnt + 1 + + logger.info("Found %d row(s)", cnt) + + logger.debug("EXIT!") + return 0 + def fetch_pixelfed_api(args: argparse.Namespace) -> int: logger.debug("args[]='%s' - CALLED!", type(args)) diff --git a/fba/http/federation.py b/fba/http/federation.py index b1a87fa..b60b051 100644 --- a/fba/http/federation.py +++ b/fba/http/federation.py @@ -41,18 +41,6 @@ from fba.networks import peertube logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) -# "rel" identifiers (no real URLs) -nodeinfo_identifier = [ - "https://nodeinfo.diaspora.software/ns/schema/2.1", - "https://nodeinfo.diaspora.software/ns/schema/2.0", - "https://nodeinfo.diaspora.software/ns/schema/1.1", - "https://nodeinfo.diaspora.software/ns/schema/1.0", - "http://nodeinfo.diaspora.software/ns/schema/2.1", - "http://nodeinfo.diaspora.software/ns/schema/2.0", - "http://nodeinfo.diaspora.software/ns/schema/1.1", - "http://nodeinfo.diaspora.software/ns/schema/1.0", -] - def fetch_instances(domain: str, origin: str, software: str, command: str, path: str = None): logger.debug("domain='%s',origin='%s',software='%s',command='%s',path='%s' - CALLED!", domain, origin, software, command, path) domain_helper.raise_on(domain) @@ -286,6 +274,18 @@ def fetch_wellknown_nodeinfo(domain: str) -> dict: logger.debug("domain='%s' - CALLED!", domain) domain_helper.raise_on(domain) + # "rel" identifiers (no real URLs) + nodeinfo_identifier = [ + "https://nodeinfo.diaspora.software/ns/schema/2.1", + "http://nodeinfo.diaspora.software/ns/schema/2.1", + "https://nodeinfo.diaspora.software/ns/schema/2.0", + "http://nodeinfo.diaspora.software/ns/schema/2.0", + "https://nodeinfo.diaspora.software/ns/schema/1.1", + "http://nodeinfo.diaspora.software/ns/schema/1.1", + "https://nodeinfo.diaspora.software/ns/schema/1.0", + "http://nodeinfo.diaspora.software/ns/schema/1.0", + ] + # No CSRF by default, you don't have to add network.api_headers by yourself here headers = tuple() @@ -309,6 +309,7 @@ def fetch_wellknown_nodeinfo(domain: str) -> dict: (config.get("nodeinfo_connection_timeout"), config.get("nodeinfo_read_timeout")) ) + logger.debug("data[]='%s'", type(data)) if "error_message" not in data: nodeinfo = data["json"] @@ -317,46 +318,54 @@ def fetch_wellknown_nodeinfo(domain: str) -> dict: logger.debug("Found entries: nodeinfo()=%d,domain='%s'", len(nodeinfo), domain) if "links" in nodeinfo: - logger.debug("Found nodeinfo[links]()=%d record(s)", len(nodeinfo["links"])) - for link in nodeinfo["links"]: - logger.debug("link[%s]='%s'", type(link), link) - if not isinstance(link, dict) or not "rel" in link: - logger.warning("link[]='%s' is not 'dict' or no element 'rel' found", type(link)) - elif link["rel"] in nodeinfo_identifier: - # Default is that 'href' has a complete URL, but some hosts don't send that - url = link["href"] - components = urlparse(link["href"]) - - logger.debug("components[%s]='%s'", type(components), components) - if components.scheme == "" and components.netloc == "": - logger.debug("link[href]='%s' has no scheme and host name in it, prepending from domain='%s'", link['href'], domain) - url = f"https://{domain}{url}" - components = urlparse(url) - - if not utils.is_domain_wanted(components.netloc): - logger.debug("components.netloc='%s' is not wanted - SKIPPED!", components.netloc) - continue - - logger.debug("Fetching nodeinfo from url='%s' ...", url) - data = network.fetch_api_url( - url, - (config.get("connection_timeout"), config.get("read_timeout")) - ) - - logger.debug("link[href]='%s',data[]='%s'", link["href"], type(data)) - if "error_message" not in data and "json" in data: - logger.debug("Found JSON data()=%d", len(data)) - instances.set_detection_mode(domain, "AUTO_DISCOVERY") - instances.set_nodeinfo_url(domain, link["href"]) - - logger.debug("Marking domain='%s' as successfully handled ...", domain) - instances.set_success(domain) - break - else: - logger.debug("Setting last error for domain='%s',data[]='%s'", domain, type(data)) - instances.set_last_error(domain, data) - else: - logger.warning("Unknown 'rel' value: domain='%s',link[rel]='%s'", domain, link["rel"]) + logger.debug("Found nodeinfo[links]()=%d record(s),", len(nodeinfo["links"])) + for niid in nodeinfo_identifier: + data = dict() + + logger.debug("Checking niid='%s' ...", niid) + for link in nodeinfo["links"]: + logger.debug("link[%s]='%s'", type(link), link) + if not isinstance(link, dict) or not "rel" in link: + logger.warning("link[]='%s' is not 'dict' or no element 'rel' found", type(link)) + elif link["rel"] == niid: + # Default is that 'href' has a complete URL, but some hosts don't send that + logger.debug("link[href]='%s' matches niid='%s'", link["href"], niid) + url = link["href"] + components = urlparse(link["href"]) + + logger.debug("components[%s]='%s'", type(components), components) + if components.scheme == "" and components.netloc == "": + logger.debug("link[href]='%s' has no scheme and host name in it, prepending from domain='%s'", link['href'], domain) + url = f"https://{domain}{url}" + components = urlparse(url) + + if not utils.is_domain_wanted(components.netloc): + logger.debug("components.netloc='%s' is not wanted - SKIPPED!", components.netloc) + continue + + logger.debug("Fetching nodeinfo from url='%s' ...", url) + data = network.fetch_api_url( + url, + (config.get("connection_timeout"), config.get("read_timeout")) + ) + + logger.debug("link[href]='%s',data[]='%s'", link["href"], type(data)) + if "error_message" not in data and "json" in data: + logger.debug("Found JSON data()=%d,link[href]='%s'", len(data), link["href"]) + instances.set_detection_mode(domain, "AUTO_DISCOVERY") + instances.set_nodeinfo_url(domain, link["href"]) + + logger.debug("Marking domain='%s' as successfully handled ...", domain) + instances.set_success(domain) + break + else: + logger.debug("Setting last error for domain='%s',data[]='%s'", domain, type(data)) + instances.set_last_error(domain, data) + + logger.debug("data()=%d", len(data)) + if "error_message" not in data and "json" in data: + logger.debug("Auto-discovery successful: domain='%s'", domain) + break else: logger.warning("nodeinfo does not contain 'links': domain='%s'", domain)