1 # Copyright (C) 2023 Free Software Foundation
3 # This program is free software: you can redistribute it and/or modify
4 # it under the terms of the GNU Affero General Public License as published
5 # by the Free Software Foundation, either version 3 of the License, or
6 # (at your option) any later version.
8 # This program is distributed in the hope that it will be useful,
9 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 # GNU Affero General Public License for more details.
13 # You should have received a copy of the GNU Affero General Public License
14 # along with this program. If not, see <https://www.gnu.org/licenses/>.
27 from urllib.parse import urlparse
29 from fba import blacklist
30 from fba import config
31 from fba import instances
32 from fba import network
34 from fba.federation import lemmy
35 from fba.federation import misskey
36 from fba.federation import peertube
38 # Array with pending errors needed to be written to database
42 # "rel" identifiers (no real URLs)
43 nodeinfo_identifier = [
44 "https://nodeinfo.diaspora.software/ns/schema/2.1",
45 "https://nodeinfo.diaspora.software/ns/schema/2.0",
46 "https://nodeinfo.diaspora.software/ns/schema/1.1",
47 "https://nodeinfo.diaspora.software/ns/schema/1.0",
48 "http://nodeinfo.diaspora.software/ns/schema/2.1",
49 "http://nodeinfo.diaspora.software/ns/schema/2.0",
50 "http://nodeinfo.diaspora.software/ns/schema/1.1",
51 "http://nodeinfo.diaspora.software/ns/schema/1.0",
55 connection = sqlite3.connect("blocks.db")
56 cursor = connection.cursor()
58 # Pattern instance for version numbers
60 # semantic version number (with v|V) prefix)
61 re.compile("^(?P<version>v|V{0,1})(\.{0,1})(?P<major>0|[1-9]\d*)\.(?P<minor>0+|[1-9]\d*)(\.(?P<patch>0+|[1-9]\d*)(?:-(?P<prerelease>(?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*)(?:\.(?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*))*))?(?:\+(?P<buildmetadata>[0-9a-zA-Z-]+(?:\.[0-9a-zA-Z-]+)*))?)?$"),
62 # non-sematic, e.g. 1.2.3.4
63 re.compile("^(?P<version>v|V{0,1})(\.{0,1})(?P<major>0|[1-9]\d*)\.(?P<minor>0+|[1-9]\d*)(\.(?P<patch>0+|[1-9]\d*)(\.(?P<subpatch>0|[1-9]\d*))?)$"),
64 # non-sematic, e.g. 2023-05[-dev]
65 re.compile("^(?P<year>[1-9]{1}[0-9]{3})\.(?P<month>[0-9]{2})(-dev){0,1}$"),
66 # non-semantic, e.g. abcdef0
67 re.compile("^[a-f0-9]{7}$"),
70 ##### Other functions #####
72 def is_primitive(var: any) -> bool:
73 # DEBUG: print(f"DEBUG: var[]='{type(var)}' - CALLED!")
74 return type(var) in {int, str, float, bool} or var is None
76 def fetch_instances(domain: str, origin: str, software: str, script: str, path: str = None):
77 # DEBUG: print(f"DEBUG: domain='{domain}',origin='{origin}',software='{software}',path='{path}' - CALLED!")
78 if not isinstance(domain, str):
79 raise ValueError(f"Parameter domain[]={type(domain)} is not 'str'")
81 raise ValueError("Parameter 'domain' is empty")
82 elif not isinstance(origin, str) and origin is not None:
83 raise ValueError(f"Parameter origin[]={type(origin)} is not 'str'")
84 elif software is None:
85 print(f"DEBUG: software for domain='{domain}' is not set, determining ...")
86 software = determine_software(domain, path)
87 print(f"DEBUG: Determined software='{software}' for domain='{domain}'")
88 elif not isinstance(software, str):
89 raise ValueError(f"Parameter software[]={type(software)} is not 'str'")
90 elif not isinstance(script, str):
91 raise ValueError(f"Parameter script[]={type(script)} is not 'str'")
93 raise ValueError("Parameter 'domain' is empty")
95 if not instances.is_registered(domain):
96 # DEBUG: print("DEBUG: Adding new domain:", domain, origin)
97 instances.add(domain, origin, script, path)
99 # DEBUG: print("DEBUG: Fetching instances for domain:", domain, software)
100 peerlist = fetch_peers(domain, software)
102 if (peerlist is None):
103 print("ERROR: Cannot fetch peers:", domain)
105 elif instances.has_pending_instance_data(domain):
106 # DEBUG: print(f"DEBUG: domain='{domain}' has pending nodeinfo data, flushing ...")
107 instances.update_data(domain)
109 print(f"INFO: Checking {len(peerlist)} instances from {domain} ...")
110 for instance in peerlist:
112 # Skip "None" types as tidup() cannot parse them
115 # DEBUG: print(f"DEBUG: instance='{instance}' - BEFORE")
116 instance = tidyup_domain(instance)
117 # DEBUG: print(f"DEBUG: instance='{instance}' - AFTER")
120 print("WARNING: Empty instance after tidyup_domain(), domain:", domain)
122 elif not validators.domain(instance.split("/")[0]):
123 print(f"WARNING: Bad instance='{instance}' from domain='{domain}',origin='{origin}',software='{software}'")
125 elif blacklist.is_blacklisted(instance):
126 # DEBUG: print("DEBUG: instance is blacklisted:", instance)
129 # DEBUG: print("DEBUG: Handling instance:", instance)
131 if not instances.is_registered(instance):
132 # DEBUG: print("DEBUG: Adding new instance:", instance, domain)
133 instances.add(instance, domain, script)
134 except BaseException as exception:
135 print(f"ERROR: instance='{instance}',exception[{type(exception)}]:'{str(exception)}'")
138 # DEBUG: print("DEBUG: EXIT!")
140 def add_peers(rows: dict) -> list:
141 # DEBUG: print(f"DEBUG: rows()={len(rows)} - CALLED!")
143 for key in ["linked", "allowed", "blocked"]:
144 # DEBUG: print(f"DEBUG: Checking key='{key}'")
145 if key in rows and rows[key] is not None:
146 # DEBUG: print(f"DEBUG: Adding {len(rows[key])} peer(s) to peers list ...")
147 for peer in rows[key]:
148 # DEBUG: print(f"DEBUG: peer='{peer}' - BEFORE!")
149 peer = tidyup_domain(peer)
151 # DEBUG: print(f"DEBUG: peer='{peer}' - AFTER!")
152 if blacklist.is_blacklisted(peer):
153 # DEBUG: print(f"DEBUG: peer='{peer}' is blacklisted, skipped!")
156 # DEBUG: print(f"DEBUG: Adding peer='{peer}' ...")
159 # DEBUG: print(f"DEBUG: peers()={len(peers)} - EXIT!")
162 def remove_version(software: str) -> str:
163 # DEBUG: print(f"DEBUG: software='{software}' - CALLED!")
164 if not "." in software and " " not in software:
165 print(f"WARNING: software='{software}' does not contain a version number.")
170 temp = software.split(";")[0]
171 elif "," in software:
172 temp = software.split(",")[0]
173 elif " - " in software:
174 temp = software.split(" - ")[0]
176 # DEBUG: print(f"DEBUG: software='{software}'")
179 version = temp.split(" ")[-1]
180 elif "/" in software:
181 version = temp.split("/")[-1]
182 elif "-" in software:
183 version = temp.split("-")[-1]
185 # DEBUG: print(f"DEBUG: Was not able to find common seperator, returning untouched software='{software}'")
189 # DEBUG: print(f"DEBUG: Checking {len(patterns)} patterns ...")
190 for pattern in patterns:
192 match = pattern.match(version)
194 # DEBUG: print(f"DEBUG: match[]={type(match)}")
195 if isinstance(match, re.Match):
196 # DEBUG: print(f"DEBUG: version='{version}' is matching pattern='{pattern}'")
199 # DEBUG: print(f"DEBUG: version[{type(version)}]='{version}',match='{match}'")
200 if not isinstance(match, re.Match):
201 print(f"WARNING: version='{version}' does not match regex, leaving software='{software}' untouched.")
204 # DEBUG: print(f"DEBUG: Found valid version number: '{version}', removing it ...")
205 end = len(temp) - len(version) - 1
207 # DEBUG: print(f"DEBUG: end[{type(end)}]={end}")
208 software = temp[0:end].strip()
209 if " version" in software:
210 # DEBUG: print(f"DEBUG: software='{software}' contains word ' version'")
211 software = strip_until(software, " version")
213 # DEBUG: print(f"DEBUG: software='{software}' - EXIT!")
216 def strip_powered_by(software: str) -> str:
217 # DEBUG: print(f"DEBUG: software='{software}' - CALLED!")
218 if not isinstance(software, str):
219 raise ValueError(f"Parameter software[]='{type(software)}' is not 'str'")
221 raise ValueError("Parameter 'software' is empty")
222 elif not "powered by" in software:
223 print(f"WARNING: Cannot find 'powered by' in software='{software}'!")
226 start = software.find("powered by ")
227 # DEBUG: print(f"DEBUG: start[{type(start)}]='{start}'")
229 software = software[start + 11:].strip()
230 # DEBUG: print(f"DEBUG: software='{software}'")
232 software = strip_until(software, " - ")
234 # DEBUG: print(f"DEBUG: software='{software}' - EXIT!")
237 def strip_hosted_on(software: str) -> str:
238 # DEBUG: print(f"DEBUG: software='{software}' - CALLED!")
239 if not isinstance(software, str):
240 raise ValueError(f"Parameter software[]='{type(software)}' is not 'str'")
242 raise ValueError("Parameter 'software' is empty")
243 elif not "hosted on" in software:
244 print(f"WARNING: Cannot find 'hosted on' in '{software}'!")
247 end = software.find("hosted on ")
248 # DEBUG: print(f"DEBUG: end[{type(end)}]='{end}'")
250 software = software[0, end].strip()
251 # DEBUG: print(f"DEBUG: software='{software}'")
253 software = strip_until(software, " - ")
255 # DEBUG: print(f"DEBUG: software='{software}' - EXIT!")
258 def strip_until(software: str, until: str) -> str:
259 # DEBUG: print(f"DEBUG: software='{software}',until='{until}' - CALLED!")
260 if not isinstance(software, str):
261 raise ValueError(f"Parameter software[]='{type(software)}' is not 'str'")
263 raise ValueError("Parameter 'software' is empty")
264 elif not isinstance(until, str):
265 raise ValueError(f"Parameter until[]='{type(until)}' is not 'str'")
267 raise ValueError("Parameter 'until' is empty")
268 elif not until in software:
269 print(f"WARNING: Cannot find '{until}' in '{software}'!")
272 # Next, strip until part
273 end = software.find(until)
275 # DEBUG: print(f"DEBUG: end[{type(end)}]='{end}'")
277 software = software[0:end].strip()
279 # DEBUG: print(f"DEBUG: software='{software}' - EXIT!")
282 def remove_pending_error(domain: str):
283 if not isinstance(domain, str):
284 raise ValueError(f"Parameter domain[]={type(domain)} is not 'str'")
286 raise ValueError("Parameter 'domain' is empty")
289 # Prevent updating any pending errors, nodeinfo was found
290 del pending_errors[domain]
295 # DEBUG: print("DEBUG: EXIT!")
297 def get_hash(domain: str) -> str:
298 if not isinstance(domain, str):
299 raise ValueError(f"Parameter domain[]={type(domain)} is not 'str'")
301 raise ValueError("Parameter 'domain' is empty")
303 return hashlib.sha256(domain.encode("utf-8")).hexdigest()
305 def log_error(domain: str, response: requests.models.Response):
306 # DEBUG: print("DEBUG: domain,response[]:", domain, type(response))
307 if not isinstance(domain, str):
308 raise ValueError(f"Parameter domain[]={type(domain)} is not 'str'")
310 raise ValueError("Parameter 'domain' is empty")
313 # DEBUG: print("DEBUG: BEFORE response[]:", type(response))
314 if isinstance(response, BaseException) or isinstance(response, json.decoder.JSONDecodeError):
315 response = str(response)
317 # DEBUG: print("DEBUG: AFTER response[]:", type(response))
318 if isinstance(response, str):
319 cursor.execute("INSERT INTO error_log (domain, error_code, error_message, created) VALUES (?, 999, ?, ?)",[
325 cursor.execute("INSERT INTO error_log (domain, error_code, error_message, created) VALUES (?, ?, ?, ?)",[
327 response.status_code,
332 # Cleanup old entries
333 # DEBUG: print(f"DEBUG: Purging old records (distance: {config.get('error_log_cleanup')})")
334 cursor.execute("DELETE FROM error_log WHERE created < ?", [time.time() - config.get("error_log_cleanup")])
335 except BaseException as exception:
336 print(f"ERROR: failed SQL query: domain='{domain}',exception[{type(exception)}]:'{str(exception)}'")
339 # DEBUG: print("DEBUG: EXIT!")
341 def fetch_peers(domain: str, software: str) -> list:
342 # DEBUG: print(f"DEBUG: domain({len(domain)})={domain},software={software} - CALLED!")
343 if not isinstance(domain, str):
344 raise ValueError(f"Parameter domain[]={type(domain)} is not 'str'")
346 raise ValueError("Parameter 'domain' is empty")
347 elif not isinstance(software, str) and software is not None:
348 raise ValueError(f"software[]={type(software)} is not 'str'")
350 if software == "misskey":
351 # DEBUG: print(f"DEBUG: Invoking misskey.fetch_peers({domain}) ...")
352 return misskey.fetch_peers(domain)
353 elif software == "lemmy":
354 # DEBUG: print(f"DEBUG: Invoking lemmy.fetch_peers({domain}) ...")
355 return lemmy.fetch_peers(domain)
356 elif software == "peertube":
357 # DEBUG: print(f"DEBUG: Invoking peertube.fetch_peers({domain}) ...")
358 return peertube.fetch_peers(domain)
360 # DEBUG: print(f"DEBUG: Fetching peers from '{domain}',software='{software}' ...")
363 response = network.fetch_response(domain, "/api/v1/instance/peers", network.api_headers, (config.get("connection_timeout"), config.get("read_timeout")))
365 data = json_from_response(response)
367 # DEBUG: print(f"DEBUG: response.ok={response.ok},response.status_code={response.status_code},data[]='{type(data)}'")
368 if not response.ok or response.status_code >= 400:
369 # DEBUG: print(f"DEBUG: Was not able to fetch peers, trying alternative ...")
370 response = network.fetch_response(domain, "/api/v3/site", network.api_headers, (config.get("connection_timeout"), config.get("read_timeout")))
372 data = json_from_response(response)
373 # DEBUG: print(f"DEBUG: response.ok={response.ok},response.status_code={response.status_code},data[]='{type(data)}'")
374 if not response.ok or response.status_code >= 400:
375 print("WARNING: Could not reach any JSON API:", domain)
376 instances.update_last_error(domain, response)
377 elif response.ok and isinstance(data, list):
378 # DEBUG: print(f"DEBUG: domain='{domain}' returned a list: '{data}'")
380 elif "federated_instances" in data:
381 # DEBUG: print(f"DEBUG: Found federated_instances for domain='{domain}'")
382 peers = peers + add_peers(data["federated_instances"])
383 # DEBUG: print("DEBUG: Added instance(s) to peers")
385 print("WARNING: JSON response does not contain 'federated_instances':", domain)
386 instances.update_last_error(domain, response)
388 # DEBUG: print("DEBUG: Querying API was successful:", domain, len(data))
391 except BaseException as exception:
392 print("WARNING: Some error during get():", domain, exception)
393 instances.update_last_error(domain, exception)
395 # DEBUG: print(f"DEBUG: Adding '{len(peers)}' for domain='{domain}'")
396 instances.set("total_peers", domain, len(peers))
398 # DEBUG: print(f"DEBUG: Updating last_instance_fetch for domain='{domain}' ...")
399 instances.update_last_instance_fetch(domain)
401 # DEBUG: print("DEBUG: Returning peers[]:", type(peers))
404 def fetch_nodeinfo(domain: str, path: str = None) -> list:
405 # DEBUG: print(f"DEBUG: domain='{domain}',path={path} - CALLED!")
406 if not isinstance(domain, str):
407 raise ValueError(f"Parameter domain[]={type(domain)} is not 'str'")
409 raise ValueError("Parameter 'domain' is empty")
410 elif not isinstance(path, str) and path is not None:
411 raise ValueError(f"Parameter path[]={type(path)} is not 'str'")
413 # DEBUG: print(f"DEBUG: Fetching nodeinfo from domain='{domain}' ...")
414 nodeinfo = fetch_wellknown_nodeinfo(domain)
416 # DEBUG: print(f"DEBUG: nodeinfo({len(nodeinfo)})={nodeinfo}")
417 if len(nodeinfo) > 0:
418 # DEBUG: print("DEBUG: nodeinfo()={len(nodeinfo))} - EXIT!")
422 "/nodeinfo/2.1.json",
424 "/nodeinfo/2.0.json",
431 for request in request_paths:
432 if path is not None and path != "" and path != f"https://{domain}{path}":
433 # DEBUG: print(f"DEBUG: path='{path}' does not match request='{request}' - SKIPPED!")
437 # DEBUG: print(f"DEBUG: Fetching request='{request}' from domain='{domain}' ...")
438 response = network.fetch_response(domain, request, network.api_headers, (config.get("nodeinfo_connection_timeout"), config.get("nodeinfo_read_timeout")))
440 data = json_from_response(response)
441 # DEBUG: print(f"DEBUG: response.ok={response.ok},response.status_code={response.status_code},data[]='{type(data)}'")
442 if response.ok and isinstance(data, dict):
443 # DEBUG: print("DEBUG: Success:", request)
444 instances.set("detection_mode", domain, "STATIC_CHECK")
445 instances.set("nodeinfo_url" , domain, request)
447 elif response.ok and isinstance(data, list):
448 print(f"UNSUPPORTED: domain='{domain}' returned a list: '{data}'")
450 elif not response.ok or response.status_code >= 400:
451 print("WARNING: Failed fetching nodeinfo from domain:", domain)
452 instances.update_last_error(domain, response)
455 except BaseException as exception:
456 # DEBUG: print("DEBUG: Cannot fetch API request:", request)
457 instances.update_last_error(domain, exception)
460 # DEBUG: print(f"DEBUG: data()={len(data)} - EXIT!")
463 def fetch_wellknown_nodeinfo(domain: str) -> list:
464 # DEBUG: print(f"DEBUG: domain='{domain}' - CALLED!")
465 if not isinstance(domain, str):
466 raise ValueError(f"Parameter domain[]={type(domain)} is not 'str'")
468 raise ValueError("Parameter 'domain' is empty")
470 # DEBUG: print("DEBUG: Fetching .well-known info for domain:", domain)
474 response = network.fetch_response(domain, "/.well-known/nodeinfo", network.api_headers, (config.get("nodeinfo_connection_timeout"), config.get("nodeinfo_read_timeout")))
476 data = json_from_response(response)
477 # DEBUG: print("DEBUG: domain,response.ok,data[]:", domain, response.ok, type(data))
478 if response.ok and isinstance(data, dict):
480 # DEBUG: print("DEBUG: Found entries:", len(nodeinfo), domain)
481 if "links" in nodeinfo:
482 # DEBUG: print("DEBUG: Found links in nodeinfo():", len(nodeinfo["links"]))
483 for link in nodeinfo["links"]:
484 # DEBUG: print("DEBUG: rel,href:", link["rel"], link["href"])
485 if link["rel"] in nodeinfo_identifier:
486 # DEBUG: print("DEBUG: Fetching nodeinfo from:", link["href"])
487 response = fetch_url(link["href"], network.api_headers, (config.get("connection_timeout"), config.get("read_timeout")))
489 data = json_from_response(response)
490 # DEBUG: print("DEBUG: href,response.ok,response.status_code:", link["href"], response.ok, response.status_code)
491 if response.ok and isinstance(data, dict):
492 # DEBUG: print("DEBUG: Found JSON nodeinfo():", len(data))
493 instances.set("detection_mode", domain, "AUTO_DISCOVERY")
494 instances.set("nodeinfo_url" , domain, link["href"])
497 print("WARNING: Unknown 'rel' value:", domain, link["rel"])
499 print("WARNING: nodeinfo does not contain 'links':", domain)
501 except BaseException as exception:
502 print("WARNING: Failed fetching .well-known info:", domain)
503 instances.update_last_error(domain, exception)
506 # DEBUG: print("DEBUG: Returning data[]:", type(data))
509 def fetch_generator_from_path(domain: str, path: str = "/") -> str:
510 # DEBUG: print(f"DEBUG: domain({len(domain)})={domain},path={path} - CALLED!")
511 if not isinstance(domain, str):
512 raise ValueError(f"Parameter domain[]={type(domain)} is not 'str'")
514 raise ValueError("Parameter 'domain' is empty")
515 elif not isinstance(path, str):
516 raise ValueError(f"path[]={type(path)} is not 'str'")
518 raise ValueError("Parameter 'path' is empty")
520 # DEBUG: print(f"DEBUG: domain='{domain}',path='{path}' - CALLED!")
524 # DEBUG: print(f"DEBUG: Fetching path='{path}' from '{domain}' ...")
525 response = network.fetch_response(domain, path, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
527 # DEBUG: print("DEBUG: domain,response.ok,response.status_code,response.text[]:", domain, response.ok, response.status_code, type(response.text))
528 if response.ok and response.status_code < 300 and len(response.text) > 0:
529 # DEBUG: print("DEBUG: Search for <meta name='generator'>:", domain)
530 doc = bs4.BeautifulSoup(response.text, "html.parser")
532 # DEBUG: print("DEBUG: doc[]:", type(doc))
533 generator = doc.find("meta", {"name": "generator"})
534 site_name = doc.find("meta", {"property": "og:site_name"})
536 # DEBUG: print(f"DEBUG: generator='{generator}',site_name='{site_name}'")
537 if isinstance(generator, bs4.element.Tag):
538 # DEBUG: print("DEBUG: Found generator meta tag:", domain)
539 software = tidyup_domain(generator.get("content"))
540 print(f"INFO: domain='{domain}' is generated by '{software}'")
541 instances.set("detection_mode", domain, "GENERATOR")
542 remove_pending_error(domain)
543 elif isinstance(site_name, bs4.element.Tag):
544 # DEBUG: print("DEBUG: Found property=og:site_name:", domain)
545 sofware = tidyup_domain(site_name.get("content"))
546 print(f"INFO: domain='{domain}' has og:site_name='{software}'")
547 instances.set("detection_mode", domain, "SITE_NAME")
548 remove_pending_error(domain)
550 except BaseException as exception:
551 # DEBUG: print(f"DEBUG: Cannot fetch / from '{domain}':", exception)
552 instances.update_last_error(domain, exception)
555 # DEBUG: print(f"DEBUG: software[]={type(software)}")
556 if isinstance(software, str) and software == "":
557 # DEBUG: print(f"DEBUG: Corrected empty string to None for software of domain='{domain}'")
559 elif isinstance(software, str) and ("." in software or " " in software):
560 # DEBUG: print(f"DEBUG: software='{software}' may contain a version number, domain='{domain}', removing it ...")
561 software = remove_version(software)
563 # DEBUG: print(f"DEBUG: software[]={type(software)}")
564 if isinstance(software, str) and " powered by " in software:
565 # DEBUG: print(f"DEBUG: software='{software}' has 'powered by' in it")
566 software = remove_version(strip_powered_by(software))
567 elif isinstance(software, str) and " hosted on " in software:
568 # DEBUG: print(f"DEBUG: software='{software}' has 'hosted on' in it")
569 software = remove_version(strip_hosted_on(software))
570 elif isinstance(software, str) and " by " in software:
571 # DEBUG: print(f"DEBUG: software='{software}' has ' by ' in it")
572 software = strip_until(software, " by ")
573 elif isinstance(software, str) and " see " in software:
574 # DEBUG: print(f"DEBUG: software='{software}' has ' see ' in it")
575 software = strip_until(software, " see ")
577 # DEBUG: print(f"DEBUG: software='{software}' - EXIT!")
580 def determine_software(domain: str, path: str = None) -> str:
581 # DEBUG: print(f"DEBUG: domain({len(domain)})={domain},path={path} - CALLED!")
582 if not isinstance(domain, str):
583 raise ValueError(f"Parameter domain[]={type(domain)} is not 'str'")
585 raise ValueError("Parameter 'domain' is empty")
586 elif not isinstance(path, str) and path is not None:
587 raise ValueError(f"Parameter path[]={type(path)} is not 'str'")
589 # DEBUG: print("DEBUG: Determining software for domain,path:", domain, path)
592 # DEBUG: print(f"DEBUG: Fetching nodeinfo from '{domain}' ...")
593 data = fetch_nodeinfo(domain, path)
595 # DEBUG: print("DEBUG: data[]:", type(data))
596 if not isinstance(data, dict) or len(data) == 0:
597 # DEBUG: print("DEBUG: Could not determine software type:", domain)
598 return fetch_generator_from_path(domain)
600 # DEBUG: print("DEBUG: data():", len(data), data)
601 if "status" in data and data["status"] == "error" and "message" in data:
602 print("WARNING: JSON response is an error:", data["message"])
603 instances.update_last_error(domain, data["message"])
604 return fetch_generator_from_path(domain)
605 elif "message" in data:
606 print("WARNING: JSON response contains only a message:", data["message"])
607 instances.update_last_error(domain, data["message"])
608 return fetch_generator_from_path(domain)
609 elif "software" not in data or "name" not in data["software"]:
610 # DEBUG: print(f"DEBUG: JSON response from domain='{domain}' does not include [software][name], fetching / ...")
611 software = fetch_generator_from_path(domain)
613 # DEBUG: print(f"DEBUG: Generator for domain='{domain}' is: {software}, EXIT!")
616 software = tidyup_domain(data["software"]["name"])
618 # DEBUG: print("DEBUG: sofware after tidyup_domain():", software)
619 if software in ["akkoma", "rebased"]:
620 # DEBUG: print("DEBUG: Setting pleroma:", domain, software)
622 elif software in ["hometown", "ecko"]:
623 # DEBUG: print("DEBUG: Setting mastodon:", domain, software)
624 software = "mastodon"
625 elif software in ["calckey", "groundpolis", "foundkey", "cherrypick", "meisskey"]:
626 # DEBUG: print("DEBUG: Setting misskey:", domain, software)
628 elif software.find("/") > 0:
629 print("WARNING: Spliting of slash:", software)
630 software = tidyup_domain(software.split("/")[-1]);
631 elif software.find("|") > 0:
632 print("WARNING: Spliting of pipe:", software)
633 software = tidyup_domain(software.split("|")[0]);
634 elif "powered by" in software:
635 # DEBUG: print(f"DEBUG: software='{software}' has 'powered by' in it")
636 software = strip_powered_by(software)
637 elif isinstance(software, str) and " by " in software:
638 # DEBUG: print(f"DEBUG: software='{software}' has ' by ' in it")
639 software = strip_until(software, " by ")
640 elif isinstance(software, str) and " see " in software:
641 # DEBUG: print(f"DEBUG: software='{software}' has ' see ' in it")
642 software = strip_until(software, " see ")
644 # DEBUG: print(f"DEBUG: software[]={type(software)}")
646 print("WARNING: tidyup_domain() left no software name behind:", domain)
649 # DEBUG: print(f"DEBUG: software[]={type(software)}")
650 if str(software) == "":
651 # DEBUG: print(f"DEBUG: software for '{domain}' was not detected, trying generator ...")
652 software = fetch_generator_from_path(domain)
653 elif len(str(software)) > 0 and ("." in software or " " in software):
654 # DEBUG: print(f"DEBUG: software='{software}' may contain a version number, domain='{domain}', removing it ...")
655 software = remove_version(software)
657 # DEBUG: print(f"DEBUG: software[]={type(software)}")
658 if isinstance(software, str) and "powered by" in software:
659 # DEBUG: print(f"DEBUG: software='{software}' has 'powered by' in it")
660 software = remove_version(strip_powered_by(software))
662 # DEBUG: print("DEBUG: Returning domain,software:", domain, software)
665 def tidyup_reason(reason: str) -> str:
666 # DEBUG: print(f"DEBUG: reason='{reason}' - CALLED!")
667 if not isinstance(reason, str):
668 raise ValueError(f"Parameter reason[]={type(reason)} is not 'str'")
671 reason = reason.strip()
674 reason = re.sub("â", "\"", reason)
676 # DEBUG: print(f"DEBUG: reason='{reason}' - EXIT!")
679 def tidyup_domain(domain: str) -> str:
680 # DEBUG: print(f"DEBUG: domain='{domain}' - CALLED!")
681 if not isinstance(domain, str):
682 raise ValueError(f"Parameter domain[]={type(domain)} is not 'str'")
684 # All lower-case and strip spaces out + last dot
685 domain = domain.lower().strip().rstrip(".")
688 domain = re.sub("\:\d+$", "", domain)
690 # No protocol, sometimes without the slashes
691 domain = re.sub("^https?\:(\/*)", "", domain)
694 domain = re.sub("\/$", "", domain)
697 domain = re.sub("^\@", "", domain)
699 # No individual users in block lists
700 domain = re.sub("(.+)\@", "", domain)
701 if domain.find("/profile/"):
702 domain = domain.split("/profile/")[0]
703 elif domain.find("/users/"):
704 domain = domain.split("/users/")[0]
706 # DEBUG: print(f"DEBUG: domain='{domain}' - EXIT!")
709 def json_from_response(response: requests.models.Response) -> list:
710 # DEBUG: print(f"DEBUG: response[]={type(response)} - CALLED!")
711 if not isinstance(response, requests.models.Response):
712 raise ValueError(f"Parameter response[]='{type(response)}' is not type of 'Response'")
715 if response.text.strip() != "":
716 # DEBUG: print(f"DEBUG: response.text()={len(response.text)} is not empty, invoking response.json() ...")
718 data = response.json()
719 except json.decoder.JSONDecodeError:
722 # DEBUG: print(f"DEBUG: data[]={type(data)} - EXIT!")
725 def has_key(lists: list, key: str, value: any) -> bool:
726 # DEBUG: print(f"DEBUG: lists()={len(lists)},key='{key}',value[]='{type(value)}' - CALLED!")
727 if not isinstance(lists, list):
728 raise ValueError(f"Parameter lists[]='{type(lists)}' is not 'list'")
729 elif not isinstance(key, str):
730 raise ValueError(f"Parameter key[]='{type(key)}' is not 'str'")
732 raise ValueError("Parameter 'key' is empty")
735 # DEBUG: print(f"DEBUG: Checking lists()={len(lists)} ...")
737 # DEBUG: print(f"DEBUG: row['{type(row)}']={row}")
738 if not isinstance(row, dict):
739 raise ValueError(f"row[]='{type(row)}' is not 'dict'")
741 raise KeyError(f"Cannot find key='{key}'")
742 elif row[key] == value:
746 # DEBUG: print(f"DEBUG: has={has} - EXIT!")
749 def find_domains(tag: bs4.element.Tag) -> list:
750 # DEBUG: print(f"DEBUG: tag[]={type(tag)} - CALLED!")
751 if not isinstance(tag, bs4.element.Tag):
752 raise ValueError(f"Parameter tag[]={type(tag)} is not type of bs4.element.Tag")
753 elif len(tag.select("tr")) == 0:
754 raise KeyError("No table rows found in table!")
757 for element in tag.select("tr"):
758 # DEBUG: print(f"DEBUG: element[]={type(element)}")
759 if not element.find("td"):
760 # DEBUG: print("DEBUG: Skipping element, no <td> found")
763 domain = tidyup_domain(element.find("td").text)
764 reason = tidyup_reason(element.findAll("td")[1].text)
766 # DEBUG: print(f"DEBUG: domain='{domain}',reason='{reason}'")
768 if blacklist.is_blacklisted(domain):
769 print(f"WARNING: domain='{domain}' is blacklisted - skipped!")
771 elif domain == "gab.com/.ai, develop.gab.com":
772 # DEBUG: print(f"DEBUG: Multiple domains detected in one row")
782 "domain": "develop.gab.com",
786 elif not validators.domain(domain):
787 print(f"WARNING: domain='{domain}' is not a valid domain - skipped!")
790 # DEBUG: print(f"DEBUG: Adding domain='{domain}' ...")
796 # DEBUG: print(f"DEBUG: domains()={len(domains)} - EXIT!")
799 def fetch_url(url: str, headers: dict, timeout: tuple) -> requests.models.Response:
800 # DEBUG: print(f"DEBUG: url='{url}',headers()={len(headers)},timeout={timeout} - CALLED!")
801 if not isinstance(url, str):
802 raise ValueError(f"Parameter url[]='{type(url)}' is not 'str'")
804 raise ValueError("Parameter 'url' is empty")
805 elif not isinstance(headers, dict):
806 raise ValueError(f"Parameter headers[]='{type(headers)}' is not 'dict'")
807 elif not isinstance(timeout, tuple):
808 raise ValueError(f"Parameter timeout[]='{type(timeout)}' is not 'tuple'")
810 # DEBUG: print(f"DEBUG: Parsing url='{url}'")
811 components = urlparse(url)
813 # Invoke other function, avoid trailing ?
814 # DEBUG: print(f"DEBUG: components[{type(components)}]={components}")
815 if components.query != "":
816 response = network.fetch_response(components.hostname, f"{components.path}?{components.query}", headers, timeout)
818 response = network.fetch_response(components.hostname, f"{components.path}", headers, timeout)
820 # DEBUG: print(f"DEBUG: response[]='{type(response)}' - EXXIT!")