1 # Copyright (C) 2023 Free Software Foundation
3 # This program is free software: you can redistribute it and/or modify
4 # it under the terms of the GNU Affero General Public License as published
5 # by the Free Software Foundation, either version 3 of the License, or
6 # (at your option) any later version.
8 # This program is distributed in the hope that it will be useful,
9 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 # GNU Affero General Public License for more details.
13 # You should have received a copy of the GNU Affero General Public License
14 # along with this program. If not, see <https://www.gnu.org/licenses/>.
21 from fba import blacklist
22 from fba import config
24 from fba import instances
25 from fba import network
27 from fba.helpers import tidyup
29 from fba.networks import lemmy
30 from fba.networks import misskey
31 from fba.networks import peertube
33 # "rel" identifiers (no real URLs)
34 nodeinfo_identifier = [
35 "https://nodeinfo.diaspora.software/ns/schema/2.1",
36 "https://nodeinfo.diaspora.software/ns/schema/2.0",
37 "https://nodeinfo.diaspora.software/ns/schema/1.1",
38 "https://nodeinfo.diaspora.software/ns/schema/1.0",
39 "http://nodeinfo.diaspora.software/ns/schema/2.1",
40 "http://nodeinfo.diaspora.software/ns/schema/2.0",
41 "http://nodeinfo.diaspora.software/ns/schema/1.1",
42 "http://nodeinfo.diaspora.software/ns/schema/1.0",
45 def fetch_instances(domain: str, origin: str, software: str, script: str, path: str = None):
46 # DEBUG: print(f"DEBUG: domain='{domain}',origin='{origin}',software='{software}',path='{path}' - CALLED!")
47 if not isinstance(domain, str):
48 raise ValueError(f"Parameter domain[]={type(domain)} is not 'str'")
50 raise ValueError("Parameter 'domain' is empty")
51 elif not isinstance(origin, str) and origin is not None:
52 raise ValueError(f"Parameter origin[]={type(origin)} is not 'str'")
53 elif software is None:
54 # DEBUG: print(f"DEBUG: software for domain='{domain}' is not set, determining ...")
55 software = determine_software(domain, path)
56 # DEBUG: print(f"DEBUG: Determined software='{software}' for domain='{domain}'")
57 elif not isinstance(software, str):
58 raise ValueError(f"Parameter software[]={type(software)} is not 'str'")
59 elif not isinstance(script, str):
60 raise ValueError(f"Parameter script[]={type(script)} is not 'str'")
62 raise ValueError("Parameter 'domain' is empty")
64 if not instances.is_registered(domain):
65 # DEBUG: print("DEBUG: Adding new domain:", domain, origin)
66 instances.add(domain, origin, script, path)
68 # DEBUG: print("DEBUG: Fetching instances for domain:", domain, software)
69 peerlist = fetch_peers(domain, software)
72 print("ERROR: Cannot fetch peers:", domain)
74 elif instances.has_pending_instance_data(domain):
75 # DEBUG: print(f"DEBUG: domain='{domain}' has pending nodeinfo data, flushing ...")
76 instances.update_data(domain)
78 print(f"INFO: Checking {len(peerlist)} instances from {domain} ...")
79 for instance in peerlist:
81 # Skip "None" types as tidup.domain() cannot parse them
84 # DEBUG: print(f"DEBUG: instance='{instance}' - BEFORE")
85 instance = tidyup.domain(instance)
86 # DEBUG: print(f"DEBUG: instance='{instance}' - AFTER")
89 print("WARNING: Empty instance after tidyup.domain(), domain:", domain)
91 elif not validators.domain(instance.split("/")[0]):
92 print(f"WARNING: Bad instance='{instance}' from domain='{domain}',origin='{origin}',software='{software}'")
94 elif blacklist.is_blacklisted(instance):
95 # DEBUG: print("DEBUG: instance is blacklisted:", instance)
98 # DEBUG: print("DEBUG: Handling instance:", instance)
99 if not instances.is_registered(instance):
100 # DEBUG: print("DEBUG: Adding new instance:", instance, domain)
101 instances.add(instance, domain, script)
103 # DEBUG: print("DEBUG: EXIT!")
105 def fetch_peers(domain: str, software: str) -> list:
106 # DEBUG: print(f"DEBUG: domain({len(domain)})={domain},software={software} - CALLED!")
107 if not isinstance(domain, str):
108 raise ValueError(f"Parameter domain[]={type(domain)} is not 'str'")
110 raise ValueError("Parameter 'domain' is empty")
111 elif not isinstance(software, str) and software is not None:
112 raise ValueError(f"software[]={type(software)} is not 'str'")
114 if software == "misskey":
115 # DEBUG: print(f"DEBUG: Invoking misskey.fetch_peers({domain}) ...")
116 return misskey.fetch_peers(domain)
117 elif software == "lemmy":
118 # DEBUG: print(f"DEBUG: Invoking lemmy.fetch_peers({domain}) ...")
119 return lemmy.fetch_peers(domain)
120 elif software == "peertube":
121 # DEBUG: print(f"DEBUG: Invoking peertube.fetch_peers({domain}) ...")
122 return peertube.fetch_peers(domain)
124 # DEBUG: print(f"DEBUG: Fetching peers from '{domain}',software='{software}' ...")
125 data = network.get_json_api(
127 "/api/v1/instance/peers",
128 (config.get("connection_timeout"), config.get("read_timeout"))
130 # DEBUG: print(f"DEBUG: data[]='{type(data)}'")
132 if "error_message" in data:
133 # DEBUG: print("DEBUG: Was not able to fetch peers, trying alternative ...")
134 data = network.get_json_api(
137 (config.get("connection_timeout"), config.get("read_timeout"))
140 # DEBUG: print(f"DEBUG: response.ok={response.ok},response.status_code={response.status_code},data[]='{type(data)}'")
141 if "error_message" in data:
142 print(f"WARNING: Could not reach any JSON API at domain='{domain}',status_code='{data['status_code']}',error_message='{data['error_message']}'")
143 elif "federated_instances" in data["json"]:
144 # DEBUG: print(f"DEBUG: Found federated_instances for domain='{domain}'")
145 peers = peers + add_peers(data["json"]["federated_instances"])
146 # DEBUG: print("DEBUG: Added instance(s) to peers")
148 print("WARNING: JSON response does not contain 'federated_instances':", domain)
149 instances.update_last_error(domain, response)
151 # DEBUG: print("DEBUG: Querying API was successful:", domain, len(data))
154 # DEBUG: print(f"DEBUG: Adding '{len(peers)}' for domain='{domain}'")
155 instances.set_data("total_peers", domain, len(peers))
157 # DEBUG: print(f"DEBUG: Updating last_instance_fetch for domain='{domain}' ...")
158 instances.update_last_instance_fetch(domain)
160 # DEBUG: print("DEBUG: Returning peers[]:", type(peers))
163 def fetch_nodeinfo(domain: str, path: str = None) -> list:
164 # DEBUG: print(f"DEBUG: domain='{domain}',path={path} - CALLED!")
165 if not isinstance(domain, str):
166 raise ValueError(f"Parameter domain[]={type(domain)} is not 'str'")
168 raise ValueError("Parameter 'domain' is empty")
169 elif not isinstance(path, str) and path is not None:
170 raise ValueError(f"Parameter path[]={type(path)} is not 'str'")
172 # DEBUG: print(f"DEBUG: Fetching nodeinfo from domain='{domain}' ...")
173 nodeinfo = fetch_wellknown_nodeinfo(domain)
175 # DEBUG: print(f"DEBUG: nodeinfo({len(nodeinfo)})={nodeinfo}")
176 if len(nodeinfo) > 0:
177 # DEBUG: print("DEBUG: nodeinfo()={len(nodeinfo))} - EXIT!")
181 "/nodeinfo/2.1.json",
183 "/nodeinfo/2.0.json",
189 for request in request_paths:
190 if path is not None and path != "" and path != request:
191 # DEBUG: print(f"DEBUG: path='{path}' does not match request='{request}' - SKIPPED!")
194 # DEBUG: print(f"DEBUG: Fetching request='{request}' from domain='{domain}' ...")
195 data = network.get_json_api(
198 (config.get("nodeinfo_connection_timeout"), config.get("nodeinfo_read_timeout"))
201 # DEBUG: print(f"DEBUG: response.ok={response.ok},response.status_code={response.status_code},data[]='{type(data)}'")
202 if "error_message" not in data:
203 # DEBUG: print("DEBUG: Success:", request)
204 instances.set_data("detection_mode", domain, "STATIC_CHECK")
205 instances.set_data("nodeinfo_url" , domain, request)
208 print(f"WARNING: Failed fetching nodeinfo from domain='{domain}',status_code='{data['status_code']}',error_message='{data['error_message']}'")
210 # DEBUG: print(f"DEBUG: data()={len(data)} - EXIT!")
213 def fetch_wellknown_nodeinfo(domain: str) -> list:
214 # DEBUG: print(f"DEBUG: domain='{domain}' - CALLED!")
215 if not isinstance(domain, str):
216 raise ValueError(f"Parameter domain[]={type(domain)} is not 'str'")
218 raise ValueError("Parameter 'domain' is empty")
220 # DEBUG: print("DEBUG: Fetching .well-known info for domain:", domain)
221 data = network.get_json_api(
223 "/.well-known/nodeinfo",
224 (config.get("nodeinfo_connection_timeout"), config.get("nodeinfo_read_timeout"))
227 if "error_message" not in data:
228 nodeinfo = data["json"]
229 # DEBUG: print("DEBUG: Found entries:", len(nodeinfo), domain)
230 if "links" in nodeinfo:
231 # DEBUG: print("DEBUG: Found links in nodeinfo():", len(nodeinfo["links"]))
232 for link in nodeinfo["links"]:
233 # DEBUG: print("DEBUG: rel,href:", link["rel"], link["href"])
234 if link["rel"] in nodeinfo_identifier:
235 # DEBUG: print("DEBUG: Fetching nodeinfo from:", link["href"])
236 data = network.fetch_api_url(
238 (config.get("connection_timeout"), config.get("read_timeout"))
241 # DEBUG: print("DEBUG: href,data[]:", link["href"], type(data))
243 # DEBUG: print("DEBUG: Found JSON nodeinfo():", len(data))
244 instances.set_data("detection_mode", domain, "AUTO_DISCOVERY")
245 instances.set_data("nodeinfo_url" , domain, link["href"])
248 instances.update_last_error(domain, data)
250 print("WARNING: Unknown 'rel' value:", domain, link["rel"])
252 print("WARNING: nodeinfo does not contain 'links':", domain)
254 # DEBUG: print("DEBUG: Returning data[]:", type(data))
257 def fetch_generator_from_path(domain: str, path: str = "/") -> str:
258 # DEBUG: print(f"DEBUG: domain({len(domain)})={domain},path={path} - CALLED!")
259 if not isinstance(domain, str):
260 raise ValueError(f"Parameter domain[]={type(domain)} is not 'str'")
262 raise ValueError("Parameter 'domain' is empty")
263 elif not isinstance(path, str):
264 raise ValueError(f"path[]={type(path)} is not 'str'")
266 raise ValueError("Parameter 'path' is empty")
268 # DEBUG: print(f"DEBUG: domain='{domain}',path='{path}' - CALLED!")
271 # DEBUG: print(f"DEBUG: Fetching path='{path}' from '{domain}' ...")
272 response = network.fetch_response(domain, path, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
274 # DEBUG: print("DEBUG: domain,response.ok,response.status_code,response.text[]:", domain, response.ok, response.status_code, type(response.text))
275 if response.ok and response.status_code < 300 and len(response.text) > 0:
276 # DEBUG: print("DEBUG: Search for <meta name='generator'>:", domain)
277 doc = bs4.BeautifulSoup(response.text, "html.parser")
279 # DEBUG: print("DEBUG: doc[]:", type(doc))
280 generator = doc.find("meta", {"name" : "generator"})
281 site_name = doc.find("meta", {"property": "og:site_name"})
283 # DEBUG: print(f"DEBUG: generator='{generator}',site_name='{site_name}'")
284 if isinstance(generator, bs4.element.Tag):
285 # DEBUG: print("DEBUG: Found generator meta tag:", domain)
286 software = tidyup.domain(generator.get("content"))
287 print(f"INFO: domain='{domain}' is generated by '{software}'")
288 instances.set_data("detection_mode", domain, "GENERATOR")
289 elif isinstance(site_name, bs4.element.Tag):
290 # DEBUG: print("DEBUG: Found property=og:site_name:", domain)
291 sofware = tidyup.domain(site_name.get("content"))
292 print(f"INFO: domain='{domain}' has og:site_name='{software}'")
293 instances.set_data("detection_mode", domain, "SITE_NAME")
295 # DEBUG: print(f"DEBUG: software[]={type(software)}")
296 if isinstance(software, str) and software == "":
297 # DEBUG: print(f"DEBUG: Corrected empty string to None for software of domain='{domain}'")
299 elif isinstance(software, str) and ("." in software or " " in software):
300 # DEBUG: print(f"DEBUG: software='{software}' may contain a version number, domain='{domain}', removing it ...")
301 software = fba.remove_version(software)
303 # DEBUG: print(f"DEBUG: software[]={type(software)}")
304 if isinstance(software, str) and " powered by " in software:
305 # DEBUG: print(f"DEBUG: software='{software}' has 'powered by' in it")
306 software = fba.remove_version(fba.strip_powered_by(software))
307 elif isinstance(software, str) and " hosted on " in software:
308 # DEBUG: print(f"DEBUG: software='{software}' has 'hosted on' in it")
309 software = fba.remove_version(fba.strip_hosted_on(software))
310 elif isinstance(software, str) and " by " in software:
311 # DEBUG: print(f"DEBUG: software='{software}' has ' by ' in it")
312 software = fba.strip_until(software, " by ")
313 elif isinstance(software, str) and " see " in software:
314 # DEBUG: print(f"DEBUG: software='{software}' has ' see ' in it")
315 software = fba.strip_until(software, " see ")
317 # DEBUG: print(f"DEBUG: software='{software}' - EXIT!")
320 def determine_software(domain: str, path: str = None) -> str:
321 # DEBUG: print(f"DEBUG: domain({len(domain)})={domain},path={path} - CALLED!")
322 if not isinstance(domain, str):
323 raise ValueError(f"Parameter domain[]={type(domain)} is not 'str'")
325 raise ValueError("Parameter 'domain' is empty")
326 elif not isinstance(path, str) and path is not None:
327 raise ValueError(f"Parameter path[]={type(path)} is not 'str'")
329 # DEBUG: print("DEBUG: Determining software for domain,path:", domain, path)
332 # DEBUG: print(f"DEBUG: Fetching nodeinfo from '{domain}' ...")
333 data = fetch_nodeinfo(domain, path)
335 # DEBUG: print("DEBUG: data[]:", type(data))
336 if not isinstance(data, dict) or len(data) == 0:
337 # DEBUG: print("DEBUG: Could not determine software type:", domain)
338 return fetch_generator_from_path(domain)
340 # DEBUG: print("DEBUG: data():", len(data), data)
341 if "status" in data and data["status"] == "error" and "message" in data:
342 print("WARNING: JSON response is an error:", data["message"])
343 instances.update_last_error(domain, data["message"])
344 return fetch_generator_from_path(domain)
345 elif "message" in data:
346 print("WARNING: JSON response contains only a message:", data["message"])
347 instances.update_last_error(domain, data["message"])
348 return fetch_generator_from_path(domain)
349 elif "software" not in data or "name" not in data["software"]:
350 # DEBUG: print(f"DEBUG: JSON response from domain='{domain}' does not include [software][name], fetching / ...")
351 software = fetch_generator_from_path(domain)
353 # DEBUG: print(f"DEBUG: Generator for domain='{domain}' is: {software}, EXIT!")
356 software = tidyup.domain(data["software"]["name"])
358 # DEBUG: print("DEBUG: sofware after tidyup.domain():", software)
359 if software in ["akkoma", "rebased"]:
360 # DEBUG: print("DEBUG: Setting pleroma:", domain, software)
362 elif software in ["hometown", "ecko"]:
363 # DEBUG: print("DEBUG: Setting mastodon:", domain, software)
364 software = "mastodon"
365 elif software in ["calckey", "groundpolis", "foundkey", "cherrypick", "meisskey"]:
366 # DEBUG: print("DEBUG: Setting misskey:", domain, software)
368 elif software.find("/") > 0:
369 print("WARNING: Spliting of slash:", software)
370 software = tidyup.domain(software.split("/")[-1])
371 elif software.find("|") > 0:
372 print("WARNING: Spliting of pipe:", software)
373 software = tidyup.domain(software.split("|")[0])
374 elif "powered by" in software:
375 # DEBUG: print(f"DEBUG: software='{software}' has 'powered by' in it")
376 software = fba.strip_powered_by(software)
377 elif isinstance(software, str) and " by " in software:
378 # DEBUG: print(f"DEBUG: software='{software}' has ' by ' in it")
379 software = fba.strip_until(software, " by ")
380 elif isinstance(software, str) and " see " in software:
381 # DEBUG: print(f"DEBUG: software='{software}' has ' see ' in it")
382 software = fba.strip_until(software, " see ")
384 # DEBUG: print(f"DEBUG: software[]={type(software)}")
386 print("WARNING: tidyup.domain() left no software name behind:", domain)
389 # DEBUG: print(f"DEBUG: software[]={type(software)}")
390 if str(software) == "":
391 # DEBUG: print(f"DEBUG: software for '{domain}' was not detected, trying generator ...")
392 software = fetch_generator_from_path(domain)
393 elif len(str(software)) > 0 and ("." in software or " " in software):
394 # DEBUG: print(f"DEBUG: software='{software}' may contain a version number, domain='{domain}', removing it ...")
395 software = fba.remove_version(software)
397 # DEBUG: print(f"DEBUG: software[]={type(software)}")
398 if isinstance(software, str) and "powered by" in software:
399 # DEBUG: print(f"DEBUG: software='{software}' has 'powered by' in it")
400 software = fba.remove_version(fba.strip_powered_by(software))
402 # DEBUG: print("DEBUG: Returning domain,software:", domain, software)
405 def find_domains(tag: bs4.element.Tag) -> list:
406 # DEBUG: print(f"DEBUG: tag[]={type(tag)} - CALLED!")
407 if not isinstance(tag, bs4.element.Tag):
408 raise ValueError(f"Parameter tag[]={type(tag)} is not type of bs4.element.Tag")
409 elif len(tag.select("tr")) == 0:
410 raise KeyError("No table rows found in table!")
413 for element in tag.select("tr"):
414 # DEBUG: print(f"DEBUG: element[]={type(element)}")
415 if not element.find("td"):
416 # DEBUG: print("DEBUG: Skipping element, no <td> found")
419 domain = tidyup.domain(element.find("td").text)
420 reason = tidyup.reason(element.findAll("td")[1].text)
422 # DEBUG: print(f"DEBUG: domain='{domain}',reason='{reason}'")
424 if blacklist.is_blacklisted(domain):
425 print(f"WARNING: domain='{domain}' is blacklisted - skipped!")
427 elif domain == "gab.com/.ai, develop.gab.com":
428 # DEBUG: print("DEBUG: Multiple domains detected in one row")
438 "domain": "develop.gab.com",
442 elif not validators.domain(domain):
443 print(f"WARNING: domain='{domain}' is not a valid domain - skipped!")
446 # DEBUG: print(f"DEBUG: Adding domain='{domain}' ...")
452 # DEBUG: print(f"DEBUG: domains()={len(domains)} - EXIT!")
455 def add_peers(rows: dict) -> list:
456 # DEBUG: print(f"DEBUG: rows()={len(rows)} - CALLED!")
458 for key in ["linked", "allowed", "blocked"]:
459 # DEBUG: print(f"DEBUG: Checking key='{key}'")
460 if key in rows and rows[key] is not None:
461 # DEBUG: print(f"DEBUG: Adding {len(rows[key])} peer(s) to peers list ...")
462 for peer in rows[key]:
463 # DEBUG: print(f"DEBUG: peer='{peer}' - BEFORE!")
464 peer = tidyup.domain(peer)
466 # DEBUG: print(f"DEBUG: peer='{peer}' - AFTER!")
467 if blacklist.is_blacklisted(peer):
468 # DEBUG: print(f"DEBUG: peer='{peer}' is blacklisted, skipped!")
471 # DEBUG: print(f"DEBUG: Adding peer='{peer}' ...")
474 # DEBUG: print(f"DEBUG: peers()={len(peers)} - EXIT!")