]> git.mxchange.org Git - fba.git/blob - fba/http/nodeinfo.py
9507844d15c41d292e12c15980b556767a032bc1
[fba.git] / fba / http / nodeinfo.py
1 # Copyright (C) 2023 Free Software Foundation
2 #
3 # This program is free software: you can redistribute it and/or modify
4 # it under the terms of the GNU Affero General Public License as published
5 # by the Free Software Foundation, either version 3 of the License, or
6 # (at your option) any later version.
7 #
8 # This program is distributed in the hope that it will be useful,
9 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11 # GNU Affero General Public License for more details.
12 #
13 # You should have received a copy of the GNU Affero General Public License
14 # along with this program.  If not, see <https://www.gnu.org/licenses/>.
15
16 import logging
17
18 from urllib.parse import urlparse
19
20 from fba import csrf
21
22 from fba.helpers import config
23 from fba.helpers import domain as domain_helper
24
25 from fba.http import network
26
27 from fba.models import instances
28
29 _DEPTH = 0
30
31 logging.basicConfig(level=logging.INFO)
32 logger = logging.getLogger(__name__)
33
34 def fetch_nodeinfo(domain: str, path: str = None) -> dict:
35     logger.debug("domain='%s',path='%s' - CALLED!", domain, path)
36     domain_helper.raise_on(domain)
37
38     if not isinstance(path, str) and path is not None:
39         raise ValueError(f"Parameter path[]='{type(path)}' is not of type 'str'")
40
41     logger.debug("Fetching nodeinfo from domain='%s' ...", domain)
42     data = fetch_wellknown_nodeinfo(domain)
43
44     logger.debug("data[%s](%d)='%s'", type(data), len(data), data)
45     if "error_message" not in data and "json" in data and len(data["json"]) > 0:
46         logger.debug("Invoking instances.set_last_nodeinfo(%s) ...", domain)
47         instances.set_last_nodeinfo(domain)
48
49         logger.debug("Found data[json]()=%d - EXIT!", len(data['json']))
50         return data
51
52     # No CSRF by default, you don't have to add network.api_headers by yourself here
53     headers = tuple()
54     data = dict()
55
56     try:
57         logger.debug("Checking CSRF for domain='%s'", domain)
58         headers = csrf.determine(domain, dict())
59     except network.exceptions as exception:
60         logger.warning("Exception '%s' during checking CSRF (nodeinfo,%s) - EXIT!", type(exception), __name__)
61         instances.set_last_error(domain, exception)
62         return {
63             "status_code"  : 500,
64             "error_message": f"exception[{type(exception)}]='{str(exception)}'",
65             "exception"    : exception,
66         }
67
68     request_paths = [
69        "/nodeinfo/2.1.json",
70        "/nodeinfo/2.1",
71        "/nodeinfo/2.0.json",
72        "/nodeinfo/2.0",
73        "/nodeinfo/1.0.json",
74        "/nodeinfo/1.0",
75        "/api/v1/instance",
76     ]
77
78     for request in request_paths:
79         logger.debug("request='%s'", request)
80         http_url  = f"http://{domain}{str(path)}"
81         https_url = f"https://{domain}{str(path)}"
82
83         logger.debug("path[%s]='%s',request='%s',http_url='%s',https_url='%s'", type(path), path, request, http_url, https_url)
84         if path is None or path in [request, http_url, https_url]:
85             logger.debug("path='%s',http_url='%s',https_url='%s'", path, http_url, https_url)
86             if path in [http_url, https_url]:
87                 logger.debug("domain='%s',path='%s' has protocol in path, splitting ...", domain, path)
88                 components = urlparse(path)
89                 path = components.path
90
91             logger.debug("Fetching request='%s' from domain='%s' ...", request, domain)
92             data = network.get_json_api(
93                 domain,
94                 request,
95                 headers,
96                 (config.get("nodeinfo_connection_timeout"), config.get("nodeinfo_read_timeout"))
97             )
98
99             logger.debug("data[]='%s'", type(data))
100             if "error_message" not in data and "json" in data:
101                 logger.debug("Success: request='%s' - Setting detection_mode=STATIC_CHECK ...", request)
102                 instances.set_last_nodeinfo(domain)
103                 instances.set_detection_mode(domain, "STATIC_CHECK")
104                 instances.set_nodeinfo_url(domain, request)
105                 break
106
107             logger.warning("Failed fetching nodeinfo from domain='%s',status_code='%s',error_message='%s'", domain, data['status_code'], data['error_message'])
108
109     logger.debug("data()=%d - EXIT!", len(data))
110     return data
111
112 def fetch_wellknown_nodeinfo(domain: str) -> dict:
113     logger.debug("domain='%s' - CALLED!", domain)
114     domain_helper.raise_on(domain)
115
116     # "rel" identifiers (no real URLs)
117     nodeinfo_identifier = [
118         "https://nodeinfo.diaspora.software/ns/schema/2.1",
119         "http://nodeinfo.diaspora.software/ns/schema/2.1",
120         "https://nodeinfo.diaspora.software/ns/schema/2.0",
121         "http://nodeinfo.diaspora.software/ns/schema/2.0",
122         "https://nodeinfo.diaspora.software/ns/schema/1.1",
123         "http://nodeinfo.diaspora.software/ns/schema/1.1",
124         "https://nodeinfo.diaspora.software/ns/schema/1.0",
125         "http://nodeinfo.diaspora.software/ns/schema/1.0",
126     ]
127
128     # No CSRF by default, you don't have to add network.api_headers by yourself here
129     headers = tuple()
130
131     try:
132         logger.debug("Checking CSRF for domain='%s'", domain)
133         headers = csrf.determine(domain, dict())
134     except network.exceptions as exception:
135         logger.warning("Exception '%s' during checking CSRF (fetch_wellknown_nodeinfo,%s) - EXIT!", type(exception), __name__)
136         instances.set_last_error(domain, exception)
137         return {
138             "status_code"  : 500,
139             "error_message": type(exception),
140             "exception"    : exception,
141         }
142
143     data = dict()
144
145     logger.debug("Fetching .well-known info for domain='%s'", domain)
146     for path in ["/.well-known/x-nodeinfo2", "/.well-known/nodeinfo"]:
147         logger.debug("Fetching path='%s' from domain='%s' ...", path, domain)
148         data = network.get_json_api(
149             domain,
150             path,
151             headers,
152             (config.get("nodeinfo_connection_timeout"), config.get("nodeinfo_read_timeout"))
153         )
154         logger.debug("data[]='%s'", type(data))
155
156         if "error_message" not in data and "json" in data:
157             logger.debug("path='%s' returned valid json()=%d", path, len(data["json"]))
158             break
159
160     logger.debug("data[]='%s'", type(data))
161     if "exception" in data:
162         logger.warning("domain='%s' returned exception '%s'", domain, str(data["exception"]))
163         raise data["exception"]
164     elif "error_message" in data:
165         logger.warning("domain='%s' returned error message: '%s'", domain, data["error_message"])
166         return data
167     elif "json" not in data:
168         logger.warning("domain='%s' returned no 'json' key", domain)
169         return dict()
170
171     infos = data["json"]
172     logger.debug("infos()=%d has been returned", len(infos))
173
174     if "links" in infos:
175         logger.debug("Marking domain='%s' as successfully handled ...", domain)
176         instances.set_success(domain)
177
178         logger.debug("Found infos[links]()=%d record(s),", len(infos["links"]))
179         for niid in nodeinfo_identifier:
180             data = dict()
181
182             logger.debug("Checking niid='%s' ...", niid)
183             for link in infos["links"]:
184                 logger.debug("link[%s]='%s'", type(link), link)
185                 if not isinstance(link, dict) or not "rel" in link:
186                     logger.debug("link[]='%s' is not of type 'dict' or no element 'rel' found - SKIPPED!", type(link))
187                     continue
188                 elif link["rel"] != niid:
189                     logger.debug("link[re]='%s' does not matched niid='%s' - SKIPPED!", link["rel"], niid)
190                     continue
191                 elif "href" not in link:
192                     logger.warning("link[rel]='%s' has no element 'href' - SKIPPED!", link["rel"])
193                     continue
194                 elif link["href"] is None:
195                     logger.debug("link[href] is None, link[rel]='%s' - SKIPPED!", link["rel"])
196                     continue
197
198                 # Default is that 'href' has a complete URL, but some hosts don't send that
199                 logger.debug("link[rel]='%s' matches niid='%s'", link["rel"], niid)
200                 url = link["href"]
201                 components = urlparse(url)
202
203                 logger.debug("components[%s]='%s'", type(components), components)
204                 if components.scheme == "" and components.netloc == "":
205                     logger.warning("link[href]='%s' has no scheme and host name in it, prepending from domain='%s'", link['href'], domain)
206                     url = f"https://{domain}{url}"
207                     components = urlparse(url)
208                 elif components.netloc == "":
209                     logger.warning("link[href]='%s' has no netloc set, setting domain='%s'", link["href"], domain)
210                     url = f"{components.scheme}://{domain}{components.path}"
211                     components = urlparse(url)
212
213                 logger.debug("components.netloc[]='%s'", type(components.netloc))
214                 if not domain_helper.is_wanted(components.netloc):
215                     logger.debug("components.netloc='%s' is not wanted - SKIPPED!", components.netloc)
216                     continue
217
218                 logger.debug("Fetching nodeinfo from url='%s' ...", url)
219                 data = network.fetch_api_url(
220                     url,
221                     (config.get("connection_timeout"), config.get("read_timeout"))
222                  )
223
224                 logger.debug("link[href]='%s',data[]='%s'", link["href"], type(data))
225                 if "error_message" not in data and "json" in data:
226                     logger.debug("Found JSON data()=%d,link[href]='%s' - Setting detection_mode=AUTO_DISCOVERY ...", len(data), link["href"])
227                     instances.set_detection_mode(domain, "AUTO_DISCOVERY")
228                     instances.set_nodeinfo_url(domain, link["href"])
229
230                     logger.debug("Marking domain='%s' as successfully handled ...", domain)
231                     instances.set_success(domain)
232                     break
233                 else:
234                     logger.debug("Setting last error for domain='%s',data[]='%s'", domain, type(data))
235                     instances.set_last_error(domain, data)
236
237             logger.debug("data()=%d", len(data))
238             if "error_message" not in data and "json" in data:
239                 logger.debug("Auto-discovery successful: domain='%s'", domain)
240                 break
241     elif "server" in infos:
242         logger.debug("Found infos[server][software]='%s'", infos["server"]["software"])
243         instances.set_detection_mode(domain, "AUTO_DISCOVERY")
244         instances.set_nodeinfo_url(domain, f"https://{domain}/.well-known/x-nodeinfo2")
245
246         logger.debug("Marking domain='%s' as successfully handled ...", domain)
247         instances.set_success(domain)
248     else:
249         logger.warning("nodeinfo does not contain 'links' or 'server': domain='%s'", domain)
250
251     logger.debug("Returning data[]='%s' - EXIT!", type(data))
252     return data