From bc55222cf812673e4c38414fe3ea266104840950 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Roland=20H=C3=A4der?= Date: Mon, 11 Sep 2023 20:59:11 +0200 Subject: [PATCH] Continued: - some url.netloc may contain port number which needs to be split off --- fba/commands.py | 12 +++++++----- fba/http/federation.py | 7 ++++--- fba/http/nodeinfo.py | 7 ++++--- 3 files changed, 15 insertions(+), 11 deletions(-) diff --git a/fba/commands.py b/fba/commands.py index 52401ff..9089cd3 100644 --- a/fba/commands.py +++ b/fba/commands.py @@ -770,13 +770,15 @@ def fetch_fba_rss(args: argparse.Namespace) -> int: locking.acquire() components = urlparse(args.feed) + domain = components.netloc.lower().split(":")[0] - if sources.is_recent(components.netloc): - logger.info("API from components.netloc='%s' has recently being accessed - EXIT!", components.netloc) + logger.debug("domain='%s'", domain) + if sources.is_recent(domain): + logger.info("API from domain='%s' has recently being accessed - EXIT!", domain) return 0 else: - logger.debug("components.netloc='%s' has not been recently used, marking ...", components.netloc) - sources.update(components.netloc) + logger.debug("domain='%s' has not been recently used, marking ...", domain) + sources.update(domain) logger.info("Fetch FBA-specific RSS args.feed='%s' ...", args.feed) response = utils.fetch_url(args.feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))) @@ -2004,7 +2006,7 @@ def fetch_relays(args: argparse.Namespace) -> int: continue components = urlparse(link["href"]) - domain = components.netloc.lower() + domain = components.netloc.lower().split(":")[0] logger.debug("domain='%s' - BEFORE!", domain) domain = tidyup.domain(domain) if domain != None and domain != "" else None diff --git a/fba/http/federation.py b/fba/http/federation.py index 49f5c52..eeed806 100644 --- a/fba/http/federation.py +++ b/fba/http/federation.py @@ -317,11 +317,12 @@ def fetch_generator_from_path(domain: str, path: str = "/") -> str: logger.warning("domain='%s' doesn't match response.url='%s', maybe redirect to other domain?", domain, response.url) components = urlparse(response.url) + domain2 = components.netloc.lower().split(":")[0] - logger.debug("components[]='%s'", type(components)) - if not instances.is_registered(components.netloc): + logger.debug("domain2='%s'", domain2) + if not instances.is_registered(domain2): logger.info("components.netloc='%s' is not registered, adding ...", components.netloc) - fetch_instances(components.netloc, domain, None, "fetch_generator") + fetch_instances(domain2, domain, None, "fetch_generator") message = f"Redirect from domain='{domain}' to response.url='{response.url}'" instances.set_last_error(domain, message) diff --git a/fba/http/nodeinfo.py b/fba/http/nodeinfo.py index 274575e..e18daff 100644 --- a/fba/http/nodeinfo.py +++ b/fba/http/nodeinfo.py @@ -210,9 +210,10 @@ def fetch_wellknown_nodeinfo(domain: str) -> dict: url = f"{components.scheme}://{domain}{components.path}" components = urlparse(url) - logger.debug("components.netloc[]='%s'", type(components.netloc)) - if not domain_helper.is_wanted(components.netloc): - logger.debug("components.netloc='%s' is not wanted - SKIPPED!", components.netloc) + domain2 = components.netloc.lower().split(":")[0] + logger.debug("domain2='%s'", domain2) + if not domain_helper.is_wanted(domain2): + logger.debug("domain2='%s' is not wanted - SKIPPED!", domain2) continue logger.debug("Fetching nodeinfo from url='%s' ...", url) -- 2.39.5