From 2434c7a1ed2effb94220895a396d7dfbce945891 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Roland=20H=C3=A4der?= Date: Mon, 3 Jul 2023 14:19:39 +0200 Subject: [PATCH] Continued: - reset software, detection mode and nodeinfo URL to None when redirection is done to other domain - yes, some people have moved their instance to a sub domain and now redirect their traffic to there - still this had caused another instance under a wrong domain name to be registered - this fix solves this, please run ./fba.py update_nodeinfo - added config key recheck_nodeinfo --- config.defaults.json | 1 + fba/commands.py | 9 +++++---- fba/csrf.py | 5 ++++- fba/http/federation.py | 10 +++++++++- 4 files changed, 19 insertions(+), 6 deletions(-) diff --git a/config.defaults.json b/config.defaults.json index e378f47..7f7d37c 100644 --- a/config.defaults.json +++ b/config.defaults.json @@ -17,6 +17,7 @@ "slogan" : "### Your footer slogan ###", "recheck_instance" : 604800, "recheck_block" : 43200, + "recheck_nodeinfo" : 604800, "misskey_limit" : 100, "error_log_cleanup" : 604800, "write_error_log" : "true", diff --git a/fba/commands.py b/fba/commands.py index 5895353..231ec4e 100644 --- a/fba/commands.py +++ b/fba/commands.py @@ -100,10 +100,11 @@ def fetch_pixelfed_api(args: argparse.Namespace) -> int: # No CSRF by default, you don't have to add network.api_headers by yourself here headers = tuple() + domain = "pixelfed.org" try: - logger.debug("Checking CSRF from pixelfed.org") - headers = csrf.determine("pixelfed.org", dict()) + logger.debug("Checking CSRF from domain='%s' ...", domain) + headers = csrf.determine(domain, dict()) except network.exceptions as exception: logger.warning("Exception '%s' during checking CSRF (fetch_peers,%s) - EXIT!", type(exception), __name__) return list() @@ -111,7 +112,7 @@ def fetch_pixelfed_api(args: argparse.Namespace) -> int: try: logger.debug("Fetching JSON from pixelfed.org API, headers()=%d ...", len(headers)) fetched = network.get_json_api( - "pixelfed.org", + domain, "/api/v1/servers/all.json?scope=All&country=all&language=all", headers, (config.get("connection_timeout"), config.get("read_timeout")) @@ -1384,7 +1385,7 @@ def update_nodeinfo(args: argparse.Namespace) -> int: database.cursor.execute("SELECT domain, software FROM instances WHERE software = ?", [args.software]) else: logger.info("Fetching domains for recently updated ...") - database.cursor.execute("SELECT domain, software FROM instances WHERE last_nodeinfo < ? OR last_nodeinfo IS NULL AND software IS NULL AND last_status_code < 999", [time.time() - config.get("recheck_block")]) + database.cursor.execute("SELECT domain, software FROM instances WHERE last_nodeinfo < ? OR last_nodeinfo IS NULL", [time.time() - config.get("recheck_nodeinfo")]) domains = database.cursor.fetchall() diff --git a/fba/csrf.py b/fba/csrf.py index d5a3ecc..2ed3613 100644 --- a/fba/csrf.py +++ b/fba/csrf.py @@ -20,6 +20,7 @@ from urllib.parse import urlparse import bs4 import reqto +import requests from fba.helpers import config from fba.helpers import cookies @@ -71,7 +72,9 @@ def determine(domain: str, headers: dict) -> dict: reqheaders["X-CSRF-Token"] = tag["content"] elif domain != components.netloc: logger.warning("domain='%s' doesn't match components.netloc='%s', maybe redirect to other domain?", domain, components.netloc) - instances.set_last_error(domain, f"Redirect from domain='{domain}' to components.netloc='{components.netloc}'") + message = f"Redirect from domain='{domain}' to components.netloc='{components.netloc}'" + instances.set_last_error(domain, message) + raise requests.exceptions.TooManyRedirects(message) logger.debug("reqheaders()=%d - EXIT!", len(reqheaders)) return reqheaders diff --git a/fba/http/federation.py b/fba/http/federation.py index a90dc8b..07cbe0a 100644 --- a/fba/http/federation.py +++ b/fba/http/federation.py @@ -227,6 +227,9 @@ def fetch_nodeinfo(domain: str, path: str = None) -> dict: except network.exceptions as exception: logger.warning("Exception '%s' during checking CSRF (nodeinfo,%s) - EXIT!", type(exception), __name__) instances.set_last_error(domain, exception) + instances.set_software(domain, None) + instances.set_detection_mode(domain, None) + instances.set_nodeinfo_url(domain, None) return { "status_code" : 500, "error_message": f"exception[{type(exception)}]='{str(exception)}'", @@ -430,7 +433,12 @@ def fetch_generator_from_path(domain: str, path: str = "/") -> str: instances.set_detection_mode(domain, "SITE_NAME") elif domain != components.netloc: logger.warning("domain='%s' doesn't match components.netloc='%s', maybe redirect to other domain?", domain, components.netloc) - instances.set_last_error(domain, f"Redirect from domain='{domain}' to components.netloc='{components.netloc}'") + message = f"Redirect from domain='{domain}' to components.netloc='{components.netloc}'" + instances.set_last_error(domain, message) + instances.set_software(domain, None) + instances.set_detection_mode(domain, None) + instances.set_nodeinfo_url(domain, None) + raise requests.exceptions.TooManyRedirects(message) logger.debug("software[]='%s'", type(software)) if isinstance(software, str) and software == "": -- 2.39.5