From 315af95a95eb8e315040db88ab1314e65e3bb05d Mon Sep 17 00:00:00 2001 From: =?utf8?q?Roland=20H=C3=A4der?= Date: Mon, 20 May 2024 14:49:13 +0200 Subject: [PATCH] Continued: - variable `response_url` is the whole URL, maybe stripped off from its anchor (#) part but it is NEVER a valid domain (ops) --- fba/http/federation.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fba/http/federation.py b/fba/http/federation.py index d9101c6..b3ad86c 100644 --- a/fba/http/federation.py +++ b/fba/http/federation.py @@ -297,7 +297,7 @@ def fetch_generator_from_path(domain: str, path: str = "/") -> str: response_url = response.url.split("#")[0] logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d,response_url='%s'", response.ok, response.status_code, len(response.text), response_url) - if ((response.ok and response.status_code == 200) or response.status_code == 410) and response.text.find("= 0 and validators.domain(response_url) and domain_helper.is_in_url(domain, response_url): + if ((response.ok and response.status_code == 200) or response.status_code == 410) and response.text.find("= 0 and validators.url(response_url) and domain_helper.is_in_url(domain, response_url): logger.debug("Parsing response.text()=%d Bytes ...", len(response.text)) doc = bs4.BeautifulSoup(response.text, "html.parser") @@ -340,7 +340,7 @@ def fetch_generator_from_path(domain: str, path: str = "/") -> str: if software is not None and software != "": logger.debug("domain='%s' has og:site_name='%s' - Setting detection_mode=SITE_NAME ...", domain, software) instances.set_detection_mode(domain, "SITE_NAME") - elif validators.domain(response_url) and not domain_helper.is_in_url(domain, response_url): + elif validators.url(response_url) and not domain_helper.is_in_url(domain, response_url): logger.warning("domain='%s' doesn't match response.url='%s', maybe redirect to other domain?", domain, response.url) components = urlparse(response.url) -- 2.39.5