fba/http/nodeinfo.py

   1 # Copyright (C) 2023 Free Software Foundation
   2 #
   3 # This program is free software: you can redistribute it and/or modify
   4 # it under the terms of the GNU Affero General Public License as published
   5 # by the Free Software Foundation, either version 3 of the License, or
   6 # (at your option) any later version.
   7 #
   8 # This program is distributed in the hope that it will be useful,
   9 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  11 # GNU Affero General Public License for more details.
  12 #
  13 # You should have received a copy of the GNU Affero General Public License
  14 # along with this program.  If not, see <https://www.gnu.org/licenses/>.
  15
  16 import logging
  17
  18 from urllib.parse import urlparse
  19
  20 from fba import csrf
  21
  22 from fba.helpers import config
  23 from fba.helpers import domain as domain_helper
  24
  25 from fba.http import network
  26
  27 from fba.models import instances
  28
  29 _DEPTH = 0
  30
  31 logging.basicConfig(level=logging.INFO)
  32 logger = logging.getLogger(__name__)
  33
  34 def fetch_nodeinfo(domain: str, path: str = None) -> dict:
  35     logger.debug("domain='%s',path='%s' - CALLED!", domain, path)
  36     domain_helper.raise_on(domain)
  37
  38     if not isinstance(path, str) and path is not None:
  39         raise ValueError(f"Parameter path[]='{type(path)}' is not of type 'str'")
  40
  41     logger.debug("Fetching nodeinfo from domain='%s' ...", domain)
  42     data = fetch_wellknown_nodeinfo(domain)
  43
  44     logger.debug("data[%s](%d)='%s'", type(data), len(data), data)
  45     if "error_message" not in data and "json" in data and len(data["json"]) > 0:
  46         logger.debug("Invoking instances.set_last_nodeinfo(%s) ...", domain)
  47         instances.set_last_nodeinfo(domain)
  48
  49         logger.debug("Found data[json]()=%d - EXIT!", len(data['json']))
  50         return data
  51
  52     # No CSRF by default, you don't have to add network.api_headers by yourself here
  53     headers = tuple()
  54     data = dict()
  55
  56     try:
  57         logger.debug("Checking CSRF for domain='%s'", domain)
  58         headers = csrf.determine(domain, dict())
  59     except network.exceptions as exception:
  60         logger.warning("Exception '%s' during checking CSRF (nodeinfo,%s) - EXIT!", type(exception), __name__)
  61         instances.set_last_error(domain, exception)
  62         return {
  63             "status_code"  : 500,
  64             "error_message": f"exception[{type(exception)}]='{str(exception)}'",
  65             "exception"    : exception,
  66         }
  67
  68     request_paths = [
  69        "/nodeinfo/2.1.json",
  70        "/nodeinfo/2.1",
  71        "/nodeinfo/2.0.json",
  72        "/nodeinfo/2.0",
  73        "/nodeinfo/1.0.json",
  74        "/nodeinfo/1.0",
  75        "/api/v1/instance",
  76     ]
  77
  78     for request in request_paths:
  79         logger.debug("request='%s'", request)
  80         http_url  = f"http://{domain}{str(path)}"
  81         https_url = f"https://{domain}{str(path)}"
  82
  83         logger.debug("path[%s]='%s',request='%s',http_url='%s',https_url='%s'", type(path), path, request, http_url, https_url)
  84         if path is None or path in [request, http_url, https_url]:
  85             logger.debug("path='%s',http_url='%s',https_url='%s'", path, http_url, https_url)
  86             if path in [http_url, https_url]:
  87                 logger.debug("domain='%s',path='%s' has protocol in path, splitting ...", domain, path)
  88                 components = urlparse(path)
  89                 path = components.path
  90
  91             logger.debug("Fetching request='%s' from domain='%s' ...", request, domain)
  92             data = network.get_json_api(
  93                 domain,
  94                 request,
  95                 headers,
  96                 (config.get("nodeinfo_connection_timeout"), config.get("nodeinfo_read_timeout"))
  97             )
  98
  99             logger.debug("data[]='%s'", type(data))
 100             if "error_message" not in data and "json" in data:
 101                 logger.debug("Success: request='%s' - Setting detection_mode=STATIC_CHECK ...", request)
 102                 instances.set_last_nodeinfo(domain)
 103                 instances.set_detection_mode(domain, "STATIC_CHECK")
 104                 instances.set_nodeinfo_url(domain, request)
 105                 break
 106
 107             logger.warning("Failed fetching nodeinfo from domain='%s',status_code='%s',error_message='%s'", domain, data['status_code'], data['error_message'])
 108
 109     logger.debug("data()=%d - EXIT!", len(data))
 110     return data
 111
 112 def fetch_wellknown_nodeinfo(domain: str) -> dict:
 113     logger.debug("domain='%s' - CALLED!", domain)
 114     domain_helper.raise_on(domain)
 115
 116     # "rel" identifiers (no real URLs)
 117     nodeinfo_identifier = [
 118         "https://nodeinfo.diaspora.software/ns/schema/2.1",
 119         "http://nodeinfo.diaspora.software/ns/schema/2.1",
 120         "https://nodeinfo.diaspora.software/ns/schema/2.0",
 121         "http://nodeinfo.diaspora.software/ns/schema/2.0",
 122         "https://nodeinfo.diaspora.software/ns/schema/1.1",
 123         "http://nodeinfo.diaspora.software/ns/schema/1.1",
 124         "https://nodeinfo.diaspora.software/ns/schema/1.0",
 125         "http://nodeinfo.diaspora.software/ns/schema/1.0",
 126     ]
 127
 128     # No CSRF by default, you don't have to add network.api_headers by yourself here
 129     headers = tuple()
 130
 131     try:
 132         logger.debug("Checking CSRF for domain='%s'", domain)
 133         headers = csrf.determine(domain, dict())
 134     except network.exceptions as exception:
 135         logger.warning("Exception '%s' during checking CSRF (fetch_wellknown_nodeinfo,%s) - EXIT!", type(exception), __name__)
 136         instances.set_last_error(domain, exception)
 137         return {
 138             "status_code"  : 500,
 139             "error_message": type(exception),
 140             "exception"    : exception,
 141         }
 142
 143     data = dict()
 144
 145     logger.debug("Fetching .well-known info for domain='%s'", domain)
 146     for path in ["/.well-known/x-nodeinfo2", "/.well-known/nodeinfo"]:
 147         logger.debug("Fetching path='%s' from domain='%s' ...", path, domain)
 148         data = network.get_json_api(
 149             domain,
 150             path,
 151             headers,
 152             (config.get("nodeinfo_connection_timeout"), config.get("nodeinfo_read_timeout"))
 153         )
 154         logger.debug("data[]='%s'", type(data))
 155
 156         if "error_message" not in data and "json" in data:
 157             logger.debug("path='%s' returned valid json()=%d", path, len(data["json"]))
 158             break
 159
 160     logger.debug("data[]='%s'", type(data))
 161     if "exception" in data:
 162         logger.warning("domain='%s' returned exception '%s'", domain, str(data["exception"]))
 163         raise data["exception"]
 164     elif "error_message" in data:
 165         logger.warning("domain='%s' returned error message: '%s'", domain, data["error_message"])
 166         return data
 167     elif "json" not in data:
 168         logger.warning("domain='%s' returned no 'json' key", domain)
 169         return dict()
 170
 171     infos = data["json"]
 172     logger.debug("infos()=%d has been returned", len(infos))
 173
 174     if "links" in infos:
 175         logger.debug("Marking domain='%s' as successfully handled ...", domain)
 176         instances.set_success(domain)
 177
 178         logger.debug("Found infos[links]()=%d record(s),", len(infos["links"]))
 179         for niid in nodeinfo_identifier:
 180             data = dict()
 181
 182             logger.debug("Checking niid='%s' ...", niid)
 183             for link in infos["links"]:
 184                 logger.debug("link[%s]='%s'", type(link), link)
 185                 if not isinstance(link, dict) or not "rel" in link:
 186                     logger.debug("link[]='%s' is not of type 'dict' or no element 'rel' found - SKIPPED!", type(link))
 187                     continue
 188                 elif link["rel"] != niid:
 189                     logger.debug("link[re]='%s' does not matched niid='%s' - SKIPPED!", link["rel"], niid)
 190                     continue
 191                 elif "href" not in link:
 192                     logger.warning("link[rel]='%s' has no element 'href' - SKIPPED!", link["rel"])
 193                     continue
 194                 elif link["href"] is None:
 195                     logger.debug("link[href] is None, link[rel]='%s' - SKIPPED!", link["rel"])
 196                     continue
 197
 198                 # Default is that 'href' has a complete URL, but some hosts don't send that
 199                 logger.debug("link[rel]='%s' matches niid='%s'", link["rel"], niid)
 200                 url = link["href"]
 201                 components = urlparse(url)
 202
 203                 logger.debug("components[%s]='%s'", type(components), components)
 204                 if components.scheme == "" and components.netloc == "":
 205                     logger.warning("link[href]='%s' has no scheme and host name in it, prepending from domain='%s'", link['href'], domain)
 206                     url = f"https://{domain}{url}"
 207                     components = urlparse(url)
 208                 elif components.netloc == "":
 209                     logger.warning("link[href]='%s' has no netloc set, setting domain='%s'", link["href"], domain)
 210                     url = f"{components.scheme}://{domain}{components.path}"
 211                     components = urlparse(url)
 212
 213                 logger.debug("components.netloc[]='%s'", type(components.netloc))
 214                 if not domain_helper.is_wanted(components.netloc):
 215                     logger.debug("components.netloc='%s' is not wanted - SKIPPED!", components.netloc)
 216                     continue
 217
 218                 logger.debug("Fetching nodeinfo from url='%s' ...", url)
 219                 data = network.fetch_api_url(
 220                     url,
 221                     (config.get("connection_timeout"), config.get("read_timeout"))
 222                  )
 223
 224                 logger.debug("link[href]='%s',data[]='%s'", link["href"], type(data))
 225                 if "error_message" not in data and "json" in data:
 226                     logger.debug("Found JSON data()=%d,link[href]='%s' - Setting detection_mode=AUTO_DISCOVERY ...", len(data), link["href"])
 227                     instances.set_detection_mode(domain, "AUTO_DISCOVERY")
 228                     instances.set_nodeinfo_url(domain, link["href"])
 229
 230                     logger.debug("Marking domain='%s' as successfully handled ...", domain)
 231                     instances.set_success(domain)
 232                     break
 233                 else:
 234                     logger.debug("Setting last error for domain='%s',data[]='%s'", domain, type(data))
 235                     instances.set_last_error(domain, data)
 236
 237             logger.debug("data()=%d", len(data))
 238             if "error_message" not in data and "json" in data:
 239                 logger.debug("Auto-discovery successful: domain='%s'", domain)
 240                 break
 241     elif "server" in infos:
 242         logger.debug("Found infos[server][software]='%s'", infos["server"]["software"])
 243         instances.set_detection_mode(domain, "AUTO_DISCOVERY")
 244         instances.set_nodeinfo_url(domain, f"https://{domain}/.well-known/x-nodeinfo2")
 245
 246         logger.debug("Marking domain='%s' as successfully handled ...", domain)
 247         instances.set_success(domain)
 248     else:
 249         logger.warning("nodeinfo does not contain 'links' or 'server': domain='%s'", domain)
 250
 251     logger.debug("Returning data[]='%s' - EXIT!", type(data))
 252     return data