From 7bfb53adcb8b24d2ea4eda469ee8a5aa7eee36e0 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Roland=20H=C3=A4der?= Date: Sun, 22 Sep 2024 01:13:57 +0200 Subject: [PATCH] Continued: - some parked domains return "200 OK" for all requests. This is understandable in their view on things but it isn't standard-conform as 200 means document found and not some fake content being returned --- fba/helpers/json.py | 15 ++++++++++++++- fba/http/network.py | 5 ++++- 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/fba/helpers/json.py b/fba/helpers/json.py index a3e3fe5..6d0454c 100644 --- a/fba/helpers/json.py +++ b/fba/helpers/json.py @@ -22,6 +22,19 @@ import requests logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) +def is_json_response(response: requests.models.Response) -> bool: + logger.debug("response[]='%s' - CALLED!", type(response)) + + if not isinstance(response, requests.models.Response): + raise ValueError(f"Parameter response[]='{type(response)}' is not type of 'Response'") + elif not response.ok or response.status_code > 200: + raise ValueError(f"response.ok='{response.ok}',response.status_code={response.status_code},response.reason='{response.reason}' but function was invoked") + + is_json = response.headers.get("content-type") is None or response.headers.get("content-type").split(";")[0] in ["*/*", "application/json", "application/jrd+json", "application/activity+json"] + + logger.debug("is_json='%s' - EXIT!", is_json) + return is_json + def from_response(response: requests.models.Response) -> any: logger.debug("response[]='%s' - CALLED!", type(response)) @@ -29,7 +42,7 @@ def from_response(response: requests.models.Response) -> any: raise ValueError(f"Parameter response[]='{type(response)}' is not type of 'Response'") elif not response.ok or response.status_code > 200: raise ValueError(f"response.ok='{response.ok}',response.status_code={response.status_code},response.reason='{response.reason}' but function was invoked") - elif response.text.strip() != "" and response.headers.get("content-type") is not None and response.headers.get("content-type").split(";")[0] not in ["*/*", "application/json", "application/jrd+json", "application/activity+json"]: + elif response.text.strip() != "" and not is_json_response(response): logger.warning("response.headers[content-type]='%s' is not a JSON type, below json() invocation may raise an exception", response.headers.get("content-type")) data = list() diff --git a/fba/http/network.py b/fba/http/network.py index 6860001..15ca9d3 100644 --- a/fba/http/network.py +++ b/fba/http/network.py @@ -198,7 +198,10 @@ def get_json_api(domain: str, path: str, headers: dict, timeout: tuple) -> dict: raise exception logger.debug("response.ok='%s',response.status_code=%d,response.reason='%s'", response.ok, response.status_code, response.reason) - if response.ok and response.status_code == 200: + if not json_helper.is_json_response(response): + json_reply["status_code"] = 999 + json_reply["error_message"] = f"content-type='{response.headers.get('content-type')}' is not a JSON response!" + elif response.ok and response.status_code == 200: logger.debug("Parsing JSON response from domain='%s',path='%s' ...", domain, path) json_reply["json"] = json_helper.from_response(response) logger.debug("json_reply[json][]='%s'", type(json_reply["json"])) -- 2.39.5