1 # Copyright (C) 2023 Free Software Foundation
3 # This program is free software: you can redistribute it and/or modify
4 # it under the terms of the GNU Affero General Public License as published
5 # by the Free Software Foundation, either version 3 of the License, or
6 # (at your option) any later version.
8 # This program is distributed in the hope that it will be useful,
9 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 # GNU Affero General Public License for more details.
13 # You should have received a copy of the GNU Affero General Public License
14 # along with this program. If not, see <https://www.gnu.org/licenses/>.
21 from fba import blacklist
22 from fba import config
24 from fba import instances
25 from fba import network
27 from fba.helpers import tidyup
29 from fba.networks import lemmy
30 from fba.networks import misskey
31 from fba.networks import peertube
33 # "rel" identifiers (no real URLs)
34 nodeinfo_identifier = [
35 "https://nodeinfo.diaspora.software/ns/schema/2.1",
36 "https://nodeinfo.diaspora.software/ns/schema/2.0",
37 "https://nodeinfo.diaspora.software/ns/schema/1.1",
38 "https://nodeinfo.diaspora.software/ns/schema/1.0",
39 "http://nodeinfo.diaspora.software/ns/schema/2.1",
40 "http://nodeinfo.diaspora.software/ns/schema/2.0",
41 "http://nodeinfo.diaspora.software/ns/schema/1.1",
42 "http://nodeinfo.diaspora.software/ns/schema/1.0",
45 def fetch_instances(domain: str, origin: str, software: str, script: str, path: str = None):
46 print(f"DEBUG: domain='{domain}',origin='{origin}',software='{software}',path='{path}' - CALLED!")
47 if not isinstance(domain, str):
48 raise ValueError(f"Parameter domain[]={type(domain)} is not 'str'")
50 raise ValueError("Parameter 'domain' is empty")
51 elif not isinstance(origin, str) and origin is not None:
52 raise ValueError(f"Parameter origin[]={type(origin)} is not 'str'")
53 elif software is None:
54 print(f"DEBUG: software for domain='{domain}' is not set, determining ...")
55 software = determine_software(domain, path)
56 print(f"DEBUG: Determined software='{software}' for domain='{domain}'")
57 elif not isinstance(software, str):
58 raise ValueError(f"Parameter software[]={type(software)} is not 'str'")
59 elif not isinstance(script, str):
60 raise ValueError(f"Parameter script[]={type(script)} is not 'str'")
62 raise ValueError("Parameter 'domain' is empty")
64 if not instances.is_registered(domain):
65 print("DEBUG: Adding new domain:", domain, origin)
66 instances.add(domain, origin, script, path)
68 print("DEBUG: Fetching instances for domain:", domain, software)
69 peerlist = fetch_peers(domain, software)
72 print("ERROR: Cannot fetch peers:", domain)
74 elif instances.has_pending_instance_data(domain):
75 print(f"DEBUG: domain='{domain}' has pending nodeinfo data, flushing ...")
76 instances.update_data(domain)
78 print(f"INFO: Checking {len(peerlist)} instances from {domain} ...")
79 for instance in peerlist:
81 # Skip "None" types as tidup() cannot parse them
84 print(f"DEBUG: instance='{instance}' - BEFORE")
85 instance = tidyup.domain(instance)
86 print(f"DEBUG: instance='{instance}' - AFTER")
89 print("WARNING: Empty instance after tidyup.domain(), domain:", domain)
91 elif not validators.domain(instance.split("/")[0]):
92 print(f"WARNING: Bad instance='{instance}' from domain='{domain}',origin='{origin}',software='{software}'")
94 elif blacklist.is_blacklisted(instance):
95 print("DEBUG: instance is blacklisted:", instance)
98 print("DEBUG: Handling instance:", instance)
100 if not instances.is_registered(instance):
101 print("DEBUG: Adding new instance:", instance, domain)
102 instances.add(instance, domain, script)
103 except BaseException as exception:
104 print(f"ERROR: instance='{instance}',exception[{type(exception)}]:'{str(exception)}'")
107 print("DEBUG: EXIT!")
109 def fetch_peers(domain: str, software: str) -> list:
110 print(f"DEBUG: domain({len(domain)})={domain},software={software} - CALLED!")
111 if not isinstance(domain, str):
112 raise ValueError(f"Parameter domain[]={type(domain)} is not 'str'")
114 raise ValueError("Parameter 'domain' is empty")
115 elif not isinstance(software, str) and software is not None:
116 raise ValueError(f"software[]={type(software)} is not 'str'")
118 if software == "misskey":
119 print(f"DEBUG: Invoking misskey.fetch_peers({domain}) ...")
120 return misskey.fetch_peers(domain)
121 elif software == "lemmy":
122 print(f"DEBUG: Invoking lemmy.fetch_peers({domain}) ...")
123 return lemmy.fetch_peers(domain)
124 elif software == "peertube":
125 print(f"DEBUG: Invoking peertube.fetch_peers({domain}) ...")
126 return peertube.fetch_peers(domain)
128 print(f"DEBUG: Fetching peers from '{domain}',software='{software}' ...")
130 response = network.fetch_response(domain, "/api/v1/instance/peers", network.api_headers, (config.get("connection_timeout"), config.get("read_timeout")))
131 print(f"DEBUG: response[]='{type(response)}'")
133 data = network.json_from_response(response)
134 print(f"DEBUG: response.ok={response.ok},response.status_code={response.status_code},data[]='{type(data)}'")
136 if not response.ok or response.status_code >= 400:
137 print("DEBUG: Was not able to fetch peers, trying alternative ...")
138 response = network.fetch_response(domain, "/api/v3/site", network.api_headers, (config.get("connection_timeout"), config.get("read_timeout")))
140 data = network.json_from_response(response)
141 print(f"DEBUG: response.ok={response.ok},response.status_code={response.status_code},data[]='{type(data)}'")
142 if not response.ok or response.status_code >= 400:
143 print("WARNING: Could not reach any JSON API:", domain)
144 instances.update_last_error(domain, response)
145 elif response.ok and isinstance(data, list):
146 print(f"DEBUG: domain='{domain}' returned a list: '{data}'")
148 elif "federated_instances" in data:
149 print(f"DEBUG: Found federated_instances for domain='{domain}'")
150 peers = peers + add_peers(data["federated_instances"])
151 print("DEBUG: Added instance(s) to peers")
153 print("WARNING: JSON response does not contain 'federated_instances':", domain)
154 instances.update_last_error(domain, response)
156 print("DEBUG: Querying API was successful:", domain, len(data))
159 print(f"DEBUG: Adding '{len(peers)}' for domain='{domain}'")
160 instances.set_data("total_peers", domain, len(peers))
162 print(f"DEBUG: Updating last_instance_fetch for domain='{domain}' ...")
163 instances.update_last_instance_fetch(domain)
165 print("DEBUG: Returning peers[]:", type(peers))
168 def fetch_nodeinfo(domain: str, path: str = None) -> list:
169 print(f"DEBUG: domain='{domain}',path={path} - CALLED!")
170 if not isinstance(domain, str):
171 raise ValueError(f"Parameter domain[]={type(domain)} is not 'str'")
173 raise ValueError("Parameter 'domain' is empty")
174 elif not isinstance(path, str) and path is not None:
175 raise ValueError(f"Parameter path[]={type(path)} is not 'str'")
177 print(f"DEBUG: Fetching nodeinfo from domain='{domain}' ...")
178 nodeinfo = fetch_wellknown_nodeinfo(domain)
180 print(f"DEBUG: nodeinfo({len(nodeinfo)})={nodeinfo}")
181 if len(nodeinfo) > 0:
182 print("DEBUG: nodeinfo()={len(nodeinfo))} - EXIT!")
186 "/nodeinfo/2.1.json",
188 "/nodeinfo/2.0.json",
194 for request in request_paths:
195 if path is not None and path != "" and path != request:
196 print(f"DEBUG: path='{path}' does not match request='{request}' - SKIPPED!")
199 print(f"DEBUG: Fetching request='{request}' from domain='{domain}' ...")
200 response = network.fetch_response(domain, request, network.api_headers, (config.get("nodeinfo_connection_timeout"), config.get("nodeinfo_read_timeout")))
202 data = network.json_from_response(response)
203 print(f"DEBUG: response.ok={response.ok},response.status_code={response.status_code},data[]='{type(data)}'")
204 if response.ok and isinstance(data, dict):
205 print("DEBUG: Success:", request)
206 instances.set_data("detection_mode", domain, "STATIC_CHECK")
207 instances.set_data("nodeinfo_url" , domain, request)
209 elif response.ok and isinstance(data, list):
210 print(f"UNSUPPORTED: domain='{domain}' returned a list: '{data}'")
212 elif not response.ok or response.status_code >= 400:
213 print("WARNING: Failed fetching nodeinfo from domain:", domain)
214 instances.update_last_error(domain, response)
217 print(f"DEBUG: data()={len(data)} - EXIT!")
220 def fetch_wellknown_nodeinfo(domain: str) -> list:
221 print(f"DEBUG: domain='{domain}' - CALLED!")
222 if not isinstance(domain, str):
223 raise ValueError(f"Parameter domain[]={type(domain)} is not 'str'")
225 raise ValueError("Parameter 'domain' is empty")
227 print("DEBUG: Fetching .well-known info for domain:", domain)
228 response = network.fetch_response(domain, "/.well-known/nodeinfo", network.api_headers, (config.get("nodeinfo_connection_timeout"), config.get("nodeinfo_read_timeout")))
230 data = network.json_from_response(response)
231 print("DEBUG: domain,response.ok,data[]:", domain, response.ok, type(data))
232 if response.ok and isinstance(data, dict):
234 print("DEBUG: Found entries:", len(nodeinfo), domain)
235 if "links" in nodeinfo:
236 print("DEBUG: Found links in nodeinfo():", len(nodeinfo["links"]))
237 for link in nodeinfo["links"]:
238 print("DEBUG: rel,href:", link["rel"], link["href"])
239 if link["rel"] in nodeinfo_identifier:
240 print("DEBUG: Fetching nodeinfo from:", link["href"])
241 response = fba.fetch_url(link["href"], network.api_headers, (config.get("connection_timeout"), config.get("read_timeout")))
243 data = network.json_from_response(response)
244 print("DEBUG: href,response.ok,response.status_code:", link["href"], response.ok, response.status_code)
245 if response.ok and isinstance(data, dict):
246 print("DEBUG: Found JSON nodeinfo():", len(data))
247 instances.set_data("detection_mode", domain, "AUTO_DISCOVERY")
248 instances.set_data("nodeinfo_url" , domain, link["href"])
251 print("WARNING: Unknown 'rel' value:", domain, link["rel"])
253 print("WARNING: nodeinfo does not contain 'links':", domain)
255 print("DEBUG: Returning data[]:", type(data))
258 def fetch_generator_from_path(domain: str, path: str = "/") -> str:
259 print(f"DEBUG: domain({len(domain)})={domain},path={path} - CALLED!")
260 if not isinstance(domain, str):
261 raise ValueError(f"Parameter domain[]={type(domain)} is not 'str'")
263 raise ValueError("Parameter 'domain' is empty")
264 elif not isinstance(path, str):
265 raise ValueError(f"path[]={type(path)} is not 'str'")
267 raise ValueError("Parameter 'path' is empty")
269 print(f"DEBUG: domain='{domain}',path='{path}' - CALLED!")
272 print(f"DEBUG: Fetching path='{path}' from '{domain}' ...")
273 response = network.fetch_response(domain, path, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
275 print("DEBUG: domain,response.ok,response.status_code,response.text[]:", domain, response.ok, response.status_code, type(response.text))
276 if response.ok and response.status_code < 300 and len(response.text) > 0:
277 print("DEBUG: Search for <meta name='generator'>:", domain)
278 doc = bs4.BeautifulSoup(response.text, "html.parser")
280 print("DEBUG: doc[]:", type(doc))
281 generator = doc.find("meta", {"name": "generator"})
282 site_name = doc.find("meta", {"property": "og:site_name"})
284 print(f"DEBUG: generator='{generator}',site_name='{site_name}'")
285 if isinstance(generator, bs4.element.Tag):
286 print("DEBUG: Found generator meta tag:", domain)
287 software = tidyup.domain(generator.get("content"))
288 print(f"INFO: domain='{domain}' is generated by '{software}'")
289 instances.set_data("detection_mode", domain, "GENERATOR")
290 fba.remove_pending_error(domain)
291 elif isinstance(site_name, bs4.element.Tag):
292 print("DEBUG: Found property=og:site_name:", domain)
293 sofware = tidyup.domain(site_name.get("content"))
294 print(f"INFO: domain='{domain}' has og:site_name='{software}'")
295 instances.set_data("detection_mode", domain, "SITE_NAME")
296 fba.remove_pending_error(domain)
298 print(f"DEBUG: software[]={type(software)}")
299 if isinstance(software, str) and software == "":
300 print(f"DEBUG: Corrected empty string to None for software of domain='{domain}'")
302 elif isinstance(software, str) and ("." in software or " " in software):
303 print(f"DEBUG: software='{software}' may contain a version number, domain='{domain}', removing it ...")
304 software = fba.remove_version(software)
306 print(f"DEBUG: software[]={type(software)}")
307 if isinstance(software, str) and " powered by " in software:
308 print(f"DEBUG: software='{software}' has 'powered by' in it")
309 software = fba.remove_version(fba.strip_powered_by(software))
310 elif isinstance(software, str) and " hosted on " in software:
311 print(f"DEBUG: software='{software}' has 'hosted on' in it")
312 software = fba.remove_version(fba.strip_hosted_on(software))
313 elif isinstance(software, str) and " by " in software:
314 print(f"DEBUG: software='{software}' has ' by ' in it")
315 software = fba.strip_until(software, " by ")
316 elif isinstance(software, str) and " see " in software:
317 print(f"DEBUG: software='{software}' has ' see ' in it")
318 software = fba.strip_until(software, " see ")
320 print(f"DEBUG: software='{software}' - EXIT!")
323 def determine_software(domain: str, path: str = None) -> str:
324 print(f"DEBUG: domain({len(domain)})={domain},path={path} - CALLED!")
325 if not isinstance(domain, str):
326 raise ValueError(f"Parameter domain[]={type(domain)} is not 'str'")
328 raise ValueError("Parameter 'domain' is empty")
329 elif not isinstance(path, str) and path is not None:
330 raise ValueError(f"Parameter path[]={type(path)} is not 'str'")
332 print("DEBUG: Determining software for domain,path:", domain, path)
335 print(f"DEBUG: Fetching nodeinfo from '{domain}' ...")
336 data = fetch_nodeinfo(domain, path)
338 print("DEBUG: data[]:", type(data))
339 if not isinstance(data, dict) or len(data) == 0:
340 print("DEBUG: Could not determine software type:", domain)
341 return fetch_generator_from_path(domain)
343 print("DEBUG: data():", len(data), data)
344 if "status" in data and data["status"] == "error" and "message" in data:
345 print("WARNING: JSON response is an error:", data["message"])
346 instances.update_last_error(domain, data["message"])
347 return fetch_generator_from_path(domain)
348 elif "message" in data:
349 print("WARNING: JSON response contains only a message:", data["message"])
350 instances.update_last_error(domain, data["message"])
351 return fetch_generator_from_path(domain)
352 elif "software" not in data or "name" not in data["software"]:
353 print(f"DEBUG: JSON response from domain='{domain}' does not include [software][name], fetching / ...")
354 software = fetch_generator_from_path(domain)
356 print(f"DEBUG: Generator for domain='{domain}' is: {software}, EXIT!")
359 software = tidyup.domain(data["software"]["name"])
361 print("DEBUG: sofware after tidyup.domain():", software)
362 if software in ["akkoma", "rebased"]:
363 print("DEBUG: Setting pleroma:", domain, software)
365 elif software in ["hometown", "ecko"]:
366 print("DEBUG: Setting mastodon:", domain, software)
367 software = "mastodon"
368 elif software in ["calckey", "groundpolis", "foundkey", "cherrypick", "meisskey"]:
369 print("DEBUG: Setting misskey:", domain, software)
371 elif software.find("/") > 0:
372 print("WARNING: Spliting of slash:", software)
373 software = tidyup.domain(software.split("/")[-1])
374 elif software.find("|") > 0:
375 print("WARNING: Spliting of pipe:", software)
376 software = tidyup.domain(software.split("|")[0])
377 elif "powered by" in software:
378 print(f"DEBUG: software='{software}' has 'powered by' in it")
379 software = fba.strip_powered_by(software)
380 elif isinstance(software, str) and " by " in software:
381 print(f"DEBUG: software='{software}' has ' by ' in it")
382 software = fba.strip_until(software, " by ")
383 elif isinstance(software, str) and " see " in software:
384 print(f"DEBUG: software='{software}' has ' see ' in it")
385 software = fba.strip_until(software, " see ")
387 print(f"DEBUG: software[]={type(software)}")
389 print("WARNING: tidyup.domain() left no software name behind:", domain)
392 print(f"DEBUG: software[]={type(software)}")
393 if str(software) == "":
394 print(f"DEBUG: software for '{domain}' was not detected, trying generator ...")
395 software = fetch_generator_from_path(domain)
396 elif len(str(software)) > 0 and ("." in software or " " in software):
397 print(f"DEBUG: software='{software}' may contain a version number, domain='{domain}', removing it ...")
398 software = fba.remove_version(software)
400 print(f"DEBUG: software[]={type(software)}")
401 if isinstance(software, str) and "powered by" in software:
402 print(f"DEBUG: software='{software}' has 'powered by' in it")
403 software = fba.remove_version(fba.strip_powered_by(software))
405 print("DEBUG: Returning domain,software:", domain, software)
408 def find_domains(tag: bs4.element.Tag) -> list:
409 print(f"DEBUG: tag[]={type(tag)} - CALLED!")
410 if not isinstance(tag, bs4.element.Tag):
411 raise ValueError(f"Parameter tag[]={type(tag)} is not type of bs4.element.Tag")
412 elif len(tag.select("tr")) == 0:
413 raise KeyError("No table rows found in table!")
416 for element in tag.select("tr"):
417 print(f"DEBUG: element[]={type(element)}")
418 if not element.find("td"):
419 print("DEBUG: Skipping element, no <td> found")
422 domain = tidyup.domain(element.find("td").text)
423 reason = tidyup.reason(element.findAll("td")[1].text)
425 print(f"DEBUG: domain='{domain}',reason='{reason}'")
427 if blacklist.is_blacklisted(domain):
428 print(f"WARNING: domain='{domain}' is blacklisted - skipped!")
430 elif domain == "gab.com/.ai, develop.gab.com":
431 print("DEBUG: Multiple domains detected in one row")
441 "domain": "develop.gab.com",
445 elif not validators.domain(domain):
446 print(f"WARNING: domain='{domain}' is not a valid domain - skipped!")
449 print(f"DEBUG: Adding domain='{domain}' ...")
455 print(f"DEBUG: domains()={len(domains)} - EXIT!")
458 def add_peers(rows: dict) -> list:
459 # DEBUG: print(f"DEBUG: rows()={len(rows)} - CALLED!")
461 for key in ["linked", "allowed", "blocked"]:
462 # DEBUG: print(f"DEBUG: Checking key='{key}'")
463 if key in rows and rows[key] is not None:
464 # DEBUG: print(f"DEBUG: Adding {len(rows[key])} peer(s) to peers list ...")
465 for peer in rows[key]:
466 # DEBUG: print(f"DEBUG: peer='{peer}' - BEFORE!")
467 peer = tidyup.domain(peer)
469 # DEBUG: print(f"DEBUG: peer='{peer}' - AFTER!")
470 if blacklist.is_blacklisted(peer):
471 # DEBUG: print(f"DEBUG: peer='{peer}' is blacklisted, skipped!")
474 # DEBUG: print(f"DEBUG: Adding peer='{peer}' ...")
477 # DEBUG: print(f"DEBUG: peers()={len(peers)} - EXIT!")