1 # Copyright (C) 2023 Free Software Foundation
3 # This program is free software: you can redistribute it and/or modify
4 # it under the terms of the GNU Affero General Public License as published
5 # by the Free Software Foundation, either version 3 of the License, or
6 # (at your option) any later version.
8 # This program is distributed in the hope that it will be useful,
9 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 # GNU Affero General Public License for more details.
13 # You should have received a copy of the GNU Affero General Public License
14 # along with this program. If not, see <https://www.gnu.org/licenses/>.
18 from urllib.parse import urlparse
20 from fba.helpers import config
21 from fba.helpers import domain as domain_helper
23 from fba.http import csrf
24 from fba.http import network
26 from fba.models import instances
30 logging.basicConfig(level=logging.INFO)
31 logger = logging.getLogger(__name__)
33 def fetch(domain: str, path: str = None, update_mode: bool = True) -> dict:
34 logger.debug("domain='%s',path='%s',update_mode='%s' - CALLED!", domain, path, update_mode)
35 domain_helper.raise_on(domain)
37 if not isinstance(path, str) and path is not None:
38 raise ValueError(f"Parameter path[]='{type(path)}' is not of type 'str'")
39 elif not isinstance(update_mode, bool) and update_mode is not None:
40 raise ValueError(f"Parameter update_mode[]='{type(update_mode)}' is not of type 'bool'")
42 if path is None and update_mode:
43 logger.debug("Fetching well-known nodeinfo from domain='%s' ...", domain)
44 data = fetch_wellknown_nodeinfo(domain)
46 logger.debug("data[%s](%d)='%s'", type(data), len(data), data)
47 if "exception" in data:
48 logger.warning("Exception returned: '%s', raising again ...", type(data["exception"]))
49 raise data["exception"]
50 elif "error_message" not in data and "json" in data and len(data["json"]) > 0:
51 logger.debug("Invoking instances.set_last_nodeinfo(%s) ...", domain)
52 instances.set_last_nodeinfo(domain)
54 logger.debug("Found data[json]()=%d - EXIT!", len(data['json']))
57 # No CSRF by default, you don't have to add network.api_headers by yourself here
62 logger.debug("Checking CSRF for domain='%s'", domain)
63 headers = csrf.determine(domain, dict())
64 except network.exceptions as exception:
65 logger.warning("Exception '%s' during checking CSRF (nodeinfo,%s) - EXIT!", type(exception), __name__)
66 instances.set_last_error(domain, exception)
69 "error_message": f"exception[{type(exception)}]='{str(exception)}'",
70 "exception" : exception,
83 for request in request_paths:
84 logger.debug("request='%s'", request)
85 http_url = f"http://{domain}{str(path) if path is not None else '/'}"
86 https_url = f"https://{domain}{str(path) if path is not None else '/'}"
88 logger.debug("path[%s]='%s',request='%s',http_url='%s',https_url='%s'", type(path), path, request, http_url, https_url)
89 if path is None or path in [request, http_url, https_url]:
90 logger.debug("Fetching request='%s' from domain='%s' ...", request, domain)
91 data = network.get_json_api(
95 (config.get("nodeinfo_connection_timeout"), config.get("nodeinfo_read_timeout"))
98 logger.debug("data[]='%s'", type(data))
99 if "error_message" not in data and "json" in data:
100 logger.debug("Success: request='%s' - Setting detection_mode=STATIC_CHECK ...", request)
101 instances.set_last_nodeinfo(domain)
103 instances.set_detection_mode(domain, "STATIC_CHECK")
104 instances.set_nodeinfo_url(domain, "https://{domain}{request}")
107 logger.warning("Failed fetching nodeinfo from domain='%s',status_code='%s',error_message='%s'", domain, data['status_code'], data['error_message'])
109 logger.debug("data()=%d - EXIT!", len(data))
112 def fetch_wellknown_nodeinfo(domain: str) -> dict:
113 logger.debug("domain='%s' - CALLED!", domain)
114 domain_helper.raise_on(domain)
116 # "rel" identifiers (no real URLs)
117 nodeinfo_identifier = [
118 "https://nodeinfo.diaspora.software/ns/schema/2.1",
119 "http://nodeinfo.diaspora.software/ns/schema/2.1",
120 "https://nodeinfo.diaspora.software/ns/schema/2.0",
121 "http://nodeinfo.diaspora.software/ns/schema/2.0",
122 "https://nodeinfo.diaspora.software/ns/schema/1.1",
123 "http://nodeinfo.diaspora.software/ns/schema/1.1",
124 "https://nodeinfo.diaspora.software/ns/schema/1.0",
125 "http://nodeinfo.diaspora.software/ns/schema/1.0",
128 # No CSRF by default, you don't have to add network.api_headers by yourself here
132 logger.debug("Checking CSRF for domain='%s'", domain)
133 headers = csrf.determine(domain, dict())
134 except network.exceptions as exception:
135 logger.warning("Exception '%s' during checking CSRF (fetch_wellknown_nodeinfo,%s) - EXIT!", type(exception), __name__)
136 instances.set_last_error(domain, exception)
139 "error_message": type(exception),
140 "exception" : exception,
145 logger.debug("Fetching .well-known info for domain='%s'", domain)
146 for path in ["/.well-known/x-nodeinfo2", "/.well-known/nodeinfo"]:
147 logger.debug("Fetching path='%s' from domain='%s' ...", path, domain)
148 data = network.get_json_api(
152 (config.get("nodeinfo_connection_timeout"), config.get("nodeinfo_read_timeout"))
154 logger.debug("data[]='%s'", type(data))
156 if "error_message" not in data and "json" in data and len(data["json"]) > 0:
157 logger.debug("path='%s' returned valid json()=%d", path, len(data["json"]))
160 logger.debug("data[]='%s'", type(data))
161 if "exception" in data:
162 logger.warning("domain='%s' returned exception '%s'", domain, str(data["exception"]))
163 raise data["exception"]
164 elif "error_message" in data:
165 logger.warning("domain='%s' returned error message: '%s'", domain, data["error_message"])
167 elif "json" not in data:
168 logger.warning("domain='%s' returned no 'json' key", domain)
172 logger.debug("infos()=%d has been returned", len(infos))
175 logger.debug("Marking domain='%s' as successfully handled ...", domain)
176 instances.set_success(domain)
178 logger.debug("Found infos[links]()=%d record(s),", len(infos["links"]))
179 for niid in nodeinfo_identifier:
182 logger.debug("Checking niid='%s' ...", niid)
183 for link in infos["links"]:
184 logger.debug("link[%s]='%s'", type(link), link)
185 if not isinstance(link, dict) or not "rel" in link:
186 logger.debug("link[]='%s' is not of type 'dict' or no element 'rel' found - SKIPPED!", type(link))
188 elif link["rel"] != niid:
189 logger.debug("link[re]='%s' does not matched niid='%s' - SKIPPED!", link["rel"], niid)
191 elif "href" not in link:
192 logger.warning("link[rel]='%s' has no element 'href' - SKIPPED!", link["rel"])
194 elif link["href"] is None:
195 logger.debug("link[href] is None, link[rel]='%s' - SKIPPED!", link["rel"])
198 # Default is that 'href' has a complete URL, but some hosts don't send that
199 logger.debug("link[rel]='%s' matches niid='%s'", link["rel"], niid)
200 url = link["href"].lower()
201 components = urlparse(url)
203 logger.debug("components[%s]='%s'", type(components), components)
204 if components.scheme == "" and components.netloc == "":
205 logger.warning("link[href]='%s' has no scheme and host name in it, prepending from domain='%s'", link['href'], domain)
206 url = f"https://{domain}{url}"
207 components = urlparse(url)
208 elif components.netloc == "":
209 logger.warning("link[href]='%s' has no netloc set, setting domain='%s'", link["href"], domain)
210 url = f"{components.scheme}://{domain}{components.path}"
211 components = urlparse(url)
213 domain2 = components.netloc.lower().split(":")[0]
214 logger.debug("domain2='%s'", domain2)
215 if not domain_helper.is_wanted(domain2):
216 logger.debug("domain2='%s' is not wanted - SKIPPED!", domain2)
219 logger.debug("Fetching nodeinfo from url='%s' ...", url)
220 data = network.fetch_api_url(
222 (config.get("connection_timeout"), config.get("read_timeout"))
225 logger.debug("link[href]='%s',data[]='%s'", link["href"], type(data))
226 if "error_message" not in data and "json" in data:
227 logger.debug("Found JSON data()=%d,link[href]='%s' - Setting detection_mode=AUTO_DISCOVERY ...", len(data), link["href"])
228 instances.set_detection_mode(domain, "AUTO_DISCOVERY")
229 instances.set_nodeinfo_url(domain, link["href"])
231 logger.debug("Marking domain='%s' as successfully handled ...", domain)
232 instances.set_success(domain)
235 logger.debug("Setting last error for domain='%s',data[]='%s'", domain, type(data))
236 instances.set_last_error(domain, data)
238 logger.debug("data()=%d", len(data))
239 if "error_message" not in data and "json" in data:
240 logger.debug("Auto-discovery successful: domain='%s'", domain)
242 elif "server" in infos:
243 logger.debug("Found infos[server][software]='%s'", infos["server"]["software"])
244 instances.set_detection_mode(domain, "AUTO_DISCOVERY")
245 instances.set_nodeinfo_url(domain, f"https://{domain}/.well-known/x-nodeinfo2")
247 logger.debug("Marking domain='%s' as successfully handled ...", domain)
248 instances.set_success(domain)
250 logger.warning("nodeinfo does not contain 'links' or 'server': domain='%s'", domain)
252 logger.debug("Returning data[]='%s' - EXIT!", type(data))