Continued:

author Roland Häder <roland@mxchange.org>

Mon, 1 Jul 2024 16:21:54 +0000 (18:21 +0200)

committer Roland Häder <roland@mxchange.org>

Mon, 1 Jul 2024 16:21:54 +0000 (18:21 +0200)
author Roland Häder <roland@mxchange.org>
Mon, 1 Jul 2024 16:21:54 +0000 (18:21 +0200)
committer Roland Häder <roland@mxchange.org>
Mon, 1 Jul 2024 16:21:54 +0000 (18:21 +0200)
diff --git a/fba/commands.py b/fba/commands.py

index eed28133583799c7329c25a89b359c56965f94c5..05cdd7ee3a992495286f0c02ca188472083199b9 100644 (file)
--- a/fba/commands.py
+++ b/fba/commands.py
@@ -1031,7 +1031,7 @@ def fetch_instances(args: argparse.Namespace) -> int:
      database.cursor.execute(
          "SELECT domain, origin, software \
  FROM instances \
-WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'lemmy', 'peertube', 'takahe', 'gotosocial', 'brighteon', 'wildebeest', 'bookwyrm', 'mitra', 'areionskey', 'mammuthus', 'neodb', 'smithereen', 'vebinet', 'toki', 'snac', 'biblioreads') \
+WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'lemmy', 'peertube', 'takahe', 'gotosocial', 'brighteon', 'wildebeest', 'bookwyrm', 'mitra', 'areionskey', 'mammuthus', 'neodb', 'smithereen', 'vebinet', 'toki', 'snac', 'biblioreads', 'wordpress') \
  ORDER BY total_peers DESC, last_response_time ASC, last_updated ASC"
      )
  
diff --git a/fba/http/csrf.py b/fba/http/csrf.py

index 57ed5447d2ce6ab6676b01cbb33d7bb8ec5e5105..684390501b5ccf51d2377df3e31e107d4ba3f033 100644 (file)
--- a/fba/http/csrf.py
+++ b/fba/http/csrf.py
@@ -19,6 +19,7 @@ import logging
  import bs4
  import reqto
  import requests
+import validators
  
  from fba.helpers import blacklist
  from fba.helpers import config
@@ -54,7 +55,10 @@ def determine(domain: str, headers: dict) -> dict:
      )
  
      logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
-    if response.ok and response.status_code == 200 and response.text.strip() != "" and response.text.find("<html") >= 0 and domain_helper.is_in_url(domain, response.url.split("#")[0]):
+    response_url = response.url.split("#")[0]
+    logger.debug("response_url='%s'", response_url)
+
+    if response.ok and response.status_code == 200 and response.text.strip() != "" and response.text.find("<html") >= 0 and validators.url(response_url) and domain_helper.is_in_url(domain, response_url):
          # Save cookies
          logger.debug("Parsing response.text()=%d Bytes ...", len(response.text))
          cookies.store(domain, response.cookies.get_dict())
@@ -78,10 +82,16 @@ def determine(domain: str, headers: dict) -> dict:
          if tag is not None:
              logger.debug("Adding CSRF token='%s' for domain='%s'", tag["content"], domain)
              reqheaders["X-CSRF-Token"] = tag["content"]
-    elif not domain_helper.is_in_url(domain, response.url.split("#")[0]):
+    elif not validators.url(response_url):
+        logger.warning("response_url='%s' is not valid - Raising exception ...", response_url)
+
+        message = f"Redirect from domain='{domain}' to response_url='{response_url}'"
+        instances.set_last_error(domain, message)
+        raise requests.exceptions.TooManyRedirects(message)
+    elif not domain_helper.is_in_url(domain, response_url):
          logger.warning("domain='%s' doesn't match with response.url='%s', maybe redirect to other domain?", domain, response.url)
  
-        message = f"Redirect from domain='{domain}' to response.url='{response.url}'"
+        message = f"Redirect from domain='{domain}' to response_url='{response_url}'"
          instances.set_last_error(domain, message)
          raise requests.exceptions.TooManyRedirects(message)
  
diff --git a/fba/http/federation.py b/fba/http/federation.py

index b3ad86ce113a094f3292f4130995243425202afa..eae8f8e4b7c599d3c11c8c39f70aac4e758288ab 100644 (file)
--- a/fba/http/federation.py
+++ b/fba/http/federation.py
@@ -294,9 +294,10 @@ def fetch_generator_from_path(domain: str, path: str = "/") -> str:
          allow_redirects=True
      )
  
-    response_url = response.url.split("#")[0]
+    logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
+    response_url = response.url.split("#")[0], response_url)
+    logger.debug("response_url='%s'"
  
-    logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d,response_url='%s'", response.ok, response.status_code, len(response.text), response_url)
      if ((response.ok and response.status_code == 200) or response.status_code == 410) and response.text.find("<html") >= 0 and validators.url(response_url) and domain_helper.is_in_url(domain, response_url):
          logger.debug("Parsing response.text()=%d Bytes ...", len(response.text))
          doc = bs4.BeautifulSoup(response.text, "html.parser")
@@ -340,7 +341,13 @@ def fetch_generator_from_path(domain: str, path: str = "/") -> str:
              if software is not None and software != "":
                  logger.debug("domain='%s' has og:site_name='%s' - Setting detection_mode=SITE_NAME ...", domain, software)
                  instances.set_detection_mode(domain, "SITE_NAME")
-    elif validators.url(response_url) and not domain_helper.is_in_url(domain, response_url):
+    elif not validators.url(response_url):
+        logger.warning("response_url='%s' is not valid - Raising exception ...", response_url)
+
+        message = f"Redirect from domain='{domain}' to response_url='{response_url}'"
+        instances.set_last_error(domain, message)
+        raise requests.exceptions.TooManyRedirects(message)
+    elif not domain_helper.is_in_url(domain, response_url):
          logger.warning("domain='%s' doesn't match response.url='%s', maybe redirect to other domain?", domain, response.url)
  
          components = urlparse(response.url)
author	Roland Häder <roland@mxchange.org>
	Mon, 1 Jul 2024 16:21:54 +0000 (18:21 +0200)
committer	Roland Häder <roland@mxchange.org>
	Mon, 1 Jul 2024 16:21:54 +0000 (18:21 +0200)
fba/commands.py		patch \| blob \| history
fba/http/csrf.py		patch \| blob \| history
fba/http/federation.py		patch \| blob \| history