]> git.mxchange.org Git - fba.git/blobdiff - fba/http/federation.py
Continued:
[fba.git] / fba / http / federation.py
index 339b4117241042841c155a26dfa2f54d3b5031d9..a90dc8bc2e43a0a82f385c29396342317179714f 100644 (file)
@@ -340,9 +340,13 @@ def fetch_wellknown_nodeinfo(domain: str) -> dict:
 
                         logger.debug("components[%s]='%s'", type(components), components)
                         if components.scheme == "" and components.netloc == "":
-                            logger.debug("link[href]='%s' has no scheme and host name in it, prepending from domain='%s'", link['href'], domain)
+                            logger.warning("link[href]='%s' has no scheme and host name in it, prepending from domain='%s'", link['href'], domain)
                             url = f"https://{domain}{url}"
                             components = urlparse(url)
+                        elif components.netloc == "":
+                            logger.warning("link[href]='%s' has no netloc set, setting domain='%s'", link["href"], domain)
+                            url = f"{components.scheme}://{domain}{components.path}"
+                            components = urlparse(url)
 
                         if not utils.is_domain_wanted(components.netloc):
                             logger.debug("components.netloc='%s' is not wanted - SKIPPED!", components.netloc)
@@ -390,10 +394,16 @@ def fetch_generator_from_path(domain: str, path: str = "/") -> str:
     software = None
 
     logger.debug("Fetching path='%s' from domain='%s' ...", path, domain)
-    response = network.fetch_response(domain, path, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
+    response = network.fetch_response(
+        domain, path,
+        network.web_headers,
+        (config.get("connection_timeout"), config.get("read_timeout")),
+        allow_redirects=True
+    )
+    components = urlparse(response.url)
 
     logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
-    if response.ok and response.status_code < 300 and response.text.find("<html") > 0:
+    if response.ok and response.status_code < 300 and response.text.find("<html") > 0 and components.netloc == domain:
         logger.debug("Parsing response.text()=%d Bytes ...", len(response.text))
         doc = bs4.BeautifulSoup(response.text, "html.parser")
 
@@ -418,6 +428,9 @@ def fetch_generator_from_path(domain: str, path: str = "/") -> str:
             if software is not None and software != "":
                 logger.debug("domain='%s' has og:site_name='%s' - Setting detection_mode=SITE_NAME ...", domain, software)
                 instances.set_detection_mode(domain, "SITE_NAME")
+    elif domain != components.netloc:
+        logger.warning("domain='%s' doesn't match components.netloc='%s', maybe redirect to other domain?", domain, components.netloc)
+        instances.set_last_error(domain, f"Redirect from domain='{domain}' to components.netloc='{components.netloc}'")
 
     logger.debug("software[]='%s'", type(software))
     if isinstance(software, str) and software == "":