From 2d3a11b76a19d6f5b111e4c2ae4dd3bc079fcb75 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Roland=20H=C3=A4der?= Date: Sun, 4 Jun 2023 14:54:47 +0200 Subject: [PATCH] Continued: - introduced json_from_response() which handles decoding errors, e.g. when a server has returned a HTML instead of a JSON which is caused by improper error handling --- api.py | 2 +- fba/fba.py | 38 +++++++++++++++++++++++++++----------- 2 files changed, 28 insertions(+), 12 deletions(-) diff --git a/api.py b/api.py index af326b2..177eb5f 100644 --- a/api.py +++ b/api.py @@ -174,7 +174,7 @@ def index(request: Request, blockers: int = None, blocked: int = None, reference "reference" : reference, "software" : software, "originator": originator, - "scores" : res.json() + "scores" : fba.json_from_response(res) }) @router.get(config.get("base_url") + "/") diff --git a/fba/fba.py b/fba/fba.py index 4d2d21d..1bab14f 100644 --- a/fba/fba.py +++ b/fba/fba.py @@ -18,6 +18,7 @@ import bs4 import hashlib import re import reqto +import requests import json import sqlite3 import sys @@ -558,7 +559,8 @@ def get_peers(domain: str, software: str) -> list: try: res = reqto.get(f"https://{domain}/api/v3/site", headers=api_headers, timeout=(config.get("connection_timeout"), config.get("read_timeout"))) - data = res.json() + data = json_from_response(res) + # DEBUG: print(f"DEBUG: res.ok={res.ok},res.status_code='{res.status_code}',data[]='{type(data)}'") if not res.ok or res.status_code >= 400: print("WARNING: Could not reach any JSON API:", domain) @@ -595,7 +597,7 @@ def get_peers(domain: str, software: str) -> list: try: res = reqto.get(f"https://{domain}/api/v1/server/{mode}?start={start}&count=100", headers=headers, timeout=(config.get("connection_timeout"), config.get("read_timeout"))) - data = res.json() + data = json_from_response(res) # DEBUG: print(f"DEBUG: res.ok={res.ok},res.status_code='{res.status_code}',data[]='{type(data)}'") if res.ok and isinstance(data, dict): # DEBUG: print("DEBUG: Success, data:", len(data)) @@ -632,16 +634,14 @@ def get_peers(domain: str, software: str) -> list: try: res = reqto.get(f"https://{domain}{get_peers_url}", headers=api_headers, timeout=(config.get("connection_timeout"), config.get("read_timeout"))) - data = res.json() + data = json_from_response(res) + # DEBUG: print(f"DEBUG: res.ok={res.ok},res.status_code={res.status_code},data[]='{type(data)}'") if not res.ok or res.status_code >= 400: # DEBUG: print(f"DEBUG: Was not able to fetch '{get_peers_url}', trying alternative ...") res = reqto.get(f"https://{domain}/api/v3/site", headers=api_headers, timeout=(config.get("connection_timeout"), config.get("read_timeout"))) - data = json.dumps({}) - if res.text.strip() != "": - data = res.json() - + data = json_from_response(res) # DEBUG: print(f"DEBUG: res.ok={res.ok},res.status_code={res.status_code},data[]='{type(data)}'") if not res.ok or res.status_code >= 400: print("WARNING: Could not reach any JSON API:", domain) @@ -690,7 +690,7 @@ def post_json_api(domain: str, path: str, parameter: str, extra_headers: dict = try: res = reqto.post(f"https://{domain}{path}", data=parameter, headers={**api_headers, **extra_headers}, timeout=(config.get("connection_timeout"), config.get("read_timeout"))) - data = res.json() + data = json_from_response(res) # DEBUG: print(f"DEBUG: res.ok={res.ok},res.status_code={res.status_code},data[]='{type(data)}'") if not res.ok or res.status_code >= 400: print(f"WARNING: Cannot query JSON API: domain='{domain}',path='{path}',parameter()={len(parameter)},res.status_code='{res.status_code}',data[]='{type(data)}'") @@ -738,7 +738,7 @@ def fetch_nodeinfo(domain: str, path: str = None) -> list: # DEBUG: print("DEBUG: Fetching request:", request) res = reqto.get(request, headers=api_headers, timeout=(config.get("connection_timeout"), config.get("read_timeout"))) - data = res.json() + data = json_from_response(res) # DEBUG: print(f"DEBUG: res.ok={res.ok},res.status_code={res.status_code},data[]='{type(data)}'") if res.ok and isinstance(data, dict): # DEBUG: print("DEBUG: Success:", request) @@ -774,7 +774,7 @@ def fetch_wellknown_nodeinfo(domain: str) -> list: try: res = reqto.get(f"https://{domain}/.well-known/nodeinfo", headers=api_headers, timeout=(config.get("connection_timeout"), config.get("read_timeout"))) - data = res.json() + data = json_from_response(res) # DEBUG: print("DEBUG: domain,res.ok,data[]:", domain, res.ok, type(data)) if res.ok and isinstance(data, dict): nodeinfo = data @@ -787,7 +787,7 @@ def fetch_wellknown_nodeinfo(domain: str) -> list: # DEBUG: print("DEBUG: Fetching nodeinfo from:", link["href"]) res = reqto.get(link["href"]) - data = res.json() + data = json_from_response(res) # DEBUG: print("DEBUG: href,res.ok,res.status_code:", link["href"], res.ok, res.status_code) if res.ok and isinstance(data, dict): # DEBUG: print("DEBUG: Found JSON nodeinfo():", len(data)) @@ -1445,3 +1445,19 @@ def tidyup_domain(domain: str) -> str: # DEBUG: print(f"DEBUG: domain='{domain}' - EXIT!") return domain + +def json_from_response(response: requests.models.Response) -> list: + # DEBUG: print(f"DEBUG: response[]={type(response)} - CALLED!") + if not isinstance(response, requests.models.Response): + raise ValueError(f"Parameter response[]='{type(response)}' is not type of 'Response'") + + data = list() + if response.text.strip() != "": + # DEBUG: print(f"DEBUG: response.text()={len(response.text)} is not empty, invoking response.json() ...") + try: + data = response.json() + except json.JSONDecodeError: + pass + + # DEBUG: print(f"DEBUG: data[]={type(data)} - EXIT!") + return data -- 2.39.5