1 # Copyright (C) 2023 Free Software Foundation
3 # This program is free software: you can redistribute it and/or modify
4 # it under the terms of the GNU Affero General Public License as published
5 # by the Free Software Foundation, either version 3 of the License, or
6 # (at your option) any later version.
8 # This program is distributed in the hope that it will be useful,
9 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 # GNU Affero General Public License for more details.
13 # You should have received a copy of the GNU Affero General Public License
14 # along with this program. If not, see <https://www.gnu.org/licenses/>.
23 from urllib.parse import urlparse
29 from fba import blacklist
30 from fba import config
31 from fba import instances
32 from fba import network
34 from fba.federation import lemmy
35 from fba.federation import misskey
36 from fba.federation import peertube
38 # Array with pending errors needed to be written to database
42 # "rel" identifiers (no real URLs)
43 nodeinfo_identifier = [
44 "https://nodeinfo.diaspora.software/ns/schema/2.1",
45 "https://nodeinfo.diaspora.software/ns/schema/2.0",
46 "https://nodeinfo.diaspora.software/ns/schema/1.1",
47 "https://nodeinfo.diaspora.software/ns/schema/1.0",
48 "http://nodeinfo.diaspora.software/ns/schema/2.1",
49 "http://nodeinfo.diaspora.software/ns/schema/2.0",
50 "http://nodeinfo.diaspora.software/ns/schema/1.1",
51 "http://nodeinfo.diaspora.software/ns/schema/1.0",
55 connection = sqlite3.connect("blocks.db")
56 cursor = connection.cursor()
58 # Pattern instance for version numbers
60 # semantic version number (with v|V) prefix)
61 re.compile("^(?P<version>v|V{0,1})(\.{0,1})(?P<major>0|[1-9]\d*)\.(?P<minor>0+|[1-9]\d*)(\.(?P<patch>0+|[1-9]\d*)(?:-(?P<prerelease>(?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*)(?:\.(?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*))*))?(?:\+(?P<buildmetadata>[0-9a-zA-Z-]+(?:\.[0-9a-zA-Z-]+)*))?)?$"),
62 # non-sematic, e.g. 1.2.3.4
63 re.compile("^(?P<version>v|V{0,1})(\.{0,1})(?P<major>0|[1-9]\d*)\.(?P<minor>0+|[1-9]\d*)(\.(?P<patch>0+|[1-9]\d*)(\.(?P<subpatch>0|[1-9]\d*))?)$"),
64 # non-sematic, e.g. 2023-05[-dev]
65 re.compile("^(?P<year>[1-9]{1}[0-9]{3})\.(?P<month>[0-9]{2})(-dev){0,1}$"),
66 # non-semantic, e.g. abcdef0
67 re.compile("^[a-f0-9]{7}$"),
70 ##### Other functions #####
72 def is_primitive(var: any) -> bool:
73 # DEBUG: print(f"DEBUG: var[]='{type(var)}' - CALLED!")
74 return type(var) in {int, str, float, bool} or var is None
76 def fetch_instances(domain: str, origin: str, software: str, script: str, path: str = None):
77 # DEBUG: print(f"DEBUG: domain='{domain}',origin='{origin}',software='{software}',path='{path}' - CALLED!")
78 if not isinstance(domain, str):
79 raise ValueError(f"Parameter domain[]={type(domain)} is not 'str'")
81 raise ValueError("Parameter 'domain' is empty")
82 elif not isinstance(origin, str) and origin is not None:
83 raise ValueError(f"Parameter origin[]={type(origin)} is not 'str'")
84 elif software is None:
85 # DEBUG: print(f"DEBUG: software for domain='{domain}' is not set, determining ...")
86 software = determine_software(domain, path)
87 # DEBUG: print(f"DEBUG: Determined software='{software}' for domain='{domain}'")
88 elif not isinstance(software, str):
89 raise ValueError(f"Parameter software[]={type(software)} is not 'str'")
90 elif not isinstance(script, str):
91 raise ValueError(f"Parameter script[]={type(script)} is not 'str'")
93 raise ValueError("Parameter 'domain' is empty")
95 if not instances.is_registered(domain):
96 # DEBUG: print("DEBUG: Adding new domain:", domain, origin)
97 instances.add(domain, origin, script, path)
99 # DEBUG: print("DEBUG: Fetching instances for domain:", domain, software)
100 peerlist = fetch_peers(domain, software)
103 print("ERROR: Cannot fetch peers:", domain)
105 elif instances.has_pending_instance_data(domain):
106 # DEBUG: print(f"DEBUG: domain='{domain}' has pending nodeinfo data, flushing ...")
107 instances.update_data(domain)
109 print(f"INFO: Checking {len(peerlist)} instances from {domain} ...")
110 for instance in peerlist:
112 # Skip "None" types as tidup() cannot parse them
115 # DEBUG: print(f"DEBUG: instance='{instance}' - BEFORE")
116 instance = tidyup_domain(instance)
117 # DEBUG: print(f"DEBUG: instance='{instance}' - AFTER")
120 print("WARNING: Empty instance after tidyup_domain(), domain:", domain)
122 elif not validators.domain(instance.split("/")[0]):
123 print(f"WARNING: Bad instance='{instance}' from domain='{domain}',origin='{origin}',software='{software}'")
125 elif blacklist.is_blacklisted(instance):
126 # DEBUG: print("DEBUG: instance is blacklisted:", instance)
129 # DEBUG: print("DEBUG: Handling instance:", instance)
131 if not instances.is_registered(instance):
132 # DEBUG: print("DEBUG: Adding new instance:", instance, domain)
133 instances.add(instance, domain, script)
134 except BaseException as exc:
135 print(f"ERROR: instance='{instance}',exc[{type(exc)}]:'{str(exc)}'")
138 # DEBUG: print("DEBUG: EXIT!")
140 def add_peers(rows: dict) -> list:
141 # DEBUG: print(f"DEBUG: rows()={len(rows)} - CALLED!")
143 for key in ["linked", "allowed", "blocked"]:
144 # DEBUG: print(f"DEBUG: Checking key='{key}'")
145 if key in rows and rows[key] is not None:
146 # DEBUG: print(f"DEBUG: Adding {len(rows[key])} peer(s) to peers list ...")
147 for peer in rows[key]:
148 # DEBUG: print(f"DEBUG: peer='{peer}' - BEFORE!")
149 peer = tidyup_domain(peer)
151 # DEBUG: print(f"DEBUG: peer='{peer}' - AFTER!")
152 if blacklist.is_blacklisted(peer):
153 # DEBUG: print(f"DEBUG: peer='{peer}' is blacklisted, skipped!")
156 # DEBUG: print(f"DEBUG: Adding peer='{peer}' ...")
159 # DEBUG: print(f"DEBUG: peers()={len(peers)} - EXIT!")
162 def remove_version(software: str) -> str:
163 # DEBUG: print(f"DEBUG: software='{software}' - CALLED!")
164 if not "." in software and " " not in software:
165 print(f"WARNING: software='{software}' does not contain a version number.")
170 temp = software.split(";")[0]
171 elif "," in software:
172 temp = software.split(",")[0]
173 elif " - " in software:
174 temp = software.split(" - ")[0]
176 # DEBUG: print(f"DEBUG: software='{software}'")
179 version = temp.split(" ")[-1]
180 elif "/" in software:
181 version = temp.split("/")[-1]
182 elif "-" in software:
183 version = temp.split("-")[-1]
185 # DEBUG: print(f"DEBUG: Was not able to find common seperator, returning untouched software='{software}'")
189 # DEBUG: print(f"DEBUG: Checking {len(patterns)} patterns ...")
190 for pattern in patterns:
192 match = pattern.match(version)
194 # DEBUG: print(f"DEBUG: match[]={type(match)}")
195 if isinstance(match, re.Match):
196 # DEBUG: print(f"DEBUG: version='{version}' is matching pattern='{pattern}'")
199 # DEBUG: print(f"DEBUG: version[{type(version)}]='{version}',match='{match}'")
200 if not isinstance(match, re.Match):
201 print(f"WARNING: version='{version}' does not match regex, leaving software='{software}' untouched.")
204 # DEBUG: print(f"DEBUG: Found valid version number: '{version}', removing it ...")
205 end = len(temp) - len(version) - 1
207 # DEBUG: print(f"DEBUG: end[{type(end)}]={end}")
208 software = temp[0:end].strip()
209 if " version" in software:
210 # DEBUG: print(f"DEBUG: software='{software}' contains word ' version'")
211 software = strip_until(software, " version")
213 # DEBUG: print(f"DEBUG: software='{software}' - EXIT!")
216 def strip_powered_by(software: str) -> str:
217 # DEBUG: print(f"DEBUG: software='{software}' - CALLED!")
218 if not isinstance(software, str):
219 raise ValueError(f"Parameter software[]='{type(software)}' is not 'str'")
221 raise ValueError("Parameter 'software' is empty")
222 elif not "powered by" in software:
223 print(f"WARNING: Cannot find 'powered by' in software='{software}'!")
226 start = software.find("powered by ")
227 # DEBUG: print(f"DEBUG: start[{type(start)}]='{start}'")
229 software = software[start + 11:].strip()
230 # DEBUG: print(f"DEBUG: software='{software}'")
232 software = strip_until(software, " - ")
234 # DEBUG: print(f"DEBUG: software='{software}' - EXIT!")
237 def strip_hosted_on(software: str) -> str:
238 # DEBUG: print(f"DEBUG: software='{software}' - CALLED!")
239 if not isinstance(software, str):
240 raise ValueError(f"Parameter software[]='{type(software)}' is not 'str'")
242 raise ValueError("Parameter 'software' is empty")
243 elif not "hosted on" in software:
244 print(f"WARNING: Cannot find 'hosted on' in '{software}'!")
247 end = software.find("hosted on ")
248 # DEBUG: print(f"DEBUG: end[{type(end)}]='{end}'")
250 software = software[0, end].strip()
251 # DEBUG: print(f"DEBUG: software='{software}'")
253 software = strip_until(software, " - ")
255 # DEBUG: print(f"DEBUG: software='{software}' - EXIT!")
258 def strip_until(software: str, until: str) -> str:
259 # DEBUG: print(f"DEBUG: software='{software}',until='{until}' - CALLED!")
260 if not isinstance(software, str):
261 raise ValueError(f"Parameter software[]='{type(software)}' is not 'str'")
263 raise ValueError("Parameter 'software' is empty")
264 elif not isinstance(until, str):
265 raise ValueError(f"Parameter until[]='{type(until)}' is not 'str'")
267 raise ValueError("Parameter 'until' is empty")
268 elif not until in software:
269 print(f"WARNING: Cannot find '{until}' in '{software}'!")
272 # Next, strip until part
273 end = software.find(until)
275 # DEBUG: print(f"DEBUG: end[{type(end)}]='{end}'")
277 software = software[0:end].strip()
279 # DEBUG: print(f"DEBUG: software='{software}' - EXIT!")
282 def remove_pending_error(domain: str):
283 if not isinstance(domain, str):
284 raise ValueError(f"Parameter domain[]={type(domain)} is not 'str'")
286 raise ValueError("Parameter 'domain' is empty")
289 # Prevent updating any pending errors, nodeinfo was found
290 del pending_errors[domain]
295 # DEBUG: print("DEBUG: EXIT!")
297 def get_hash(domain: str) -> str:
298 if not isinstance(domain, str):
299 raise ValueError(f"Parameter domain[]={type(domain)} is not 'str'")
301 raise ValueError("Parameter 'domain' is empty")
303 return hashlib.sha256(domain.encode("utf-8")).hexdigest()
305 def log_error(domain: str, response: requests.models.Response):
306 # DEBUG: print("DEBUG: domain,response[]:", domain, type(response))
307 if not isinstance(domain, str):
308 raise ValueError(f"Parameter domain[]={type(domain)} is not 'str'")
310 raise ValueError("Parameter 'domain' is empty")
311 elif config.get("write_error_log").lower() != "true":
312 # DEBUG: print(f"DEBUG: Writing to error_log is disabled in configuruation file - EXIT!")
316 # DEBUG: print("DEBUG: BEFORE response[]:", type(response))
317 if isinstance(response, BaseException) or isinstance(response, json.decoder.JSONDecodeError):
318 response = f"response[{type(response)}]='{str(response)}'"
320 # DEBUG: print("DEBUG: AFTER response[]:", type(response))
321 if isinstance(response, str):
322 cursor.execute("INSERT INTO error_log (domain, error_code, error_message, created) VALUES (?, 999, ?, ?)",[
328 cursor.execute("INSERT INTO error_log (domain, error_code, error_message, created) VALUES (?, ?, ?, ?)",[
330 response.status_code,
335 # Cleanup old entries
336 # DEBUG: print(f"DEBUG: Purging old records (distance: {config.get('error_log_cleanup')})")
337 cursor.execute("DELETE FROM error_log WHERE created < ?", [time.time() - config.get("error_log_cleanup")])
338 except BaseException as exc:
339 print(f"ERROR: failed SQL query: domain='{domain}',exc[{type(exc)}]:'{str(exc)}'")
342 # DEBUG: print("DEBUG: EXIT!")
344 def fetch_peers(domain: str, software: str) -> list:
345 # DEBUG: print(f"DEBUG: domain({len(domain)})={domain},software={software} - CALLED!")
346 if not isinstance(domain, str):
347 raise ValueError(f"Parameter domain[]={type(domain)} is not 'str'")
349 raise ValueError("Parameter 'domain' is empty")
350 elif not isinstance(software, str) and software is not None:
351 raise ValueError(f"software[]={type(software)} is not 'str'")
353 if software == "misskey":
354 # DEBUG: print(f"DEBUG: Invoking misskey.fetch_peers({domain}) ...")
355 return misskey.fetch_peers(domain)
356 elif software == "lemmy":
357 # DEBUG: print(f"DEBUG: Invoking lemmy.fetch_peers({domain}) ...")
358 return lemmy.fetch_peers(domain)
359 elif software == "peertube":
360 # DEBUG: print(f"DEBUG: Invoking peertube.fetch_peers({domain}) ...")
361 return peertube.fetch_peers(domain)
363 # DEBUG: print(f"DEBUG: Fetching peers from '{domain}',software='{software}' ...")
366 response = network.fetch_response(domain, "/api/v1/instance/peers", network.api_headers, (config.get("connection_timeout"), config.get("read_timeout")))
367 # DEBUG: print(f"DEBUG: response[]='{type(response)}'")
369 data = json_from_response(response)
370 # DEBUG: print(f"DEBUG: response.ok={response.ok},response.status_code={response.status_code},data[]='{type(data)}'")
372 if not response.ok or response.status_code >= 400:
373 # DEBUG: print("DEBUG: Was not able to fetch peers, trying alternative ...")
374 response = network.fetch_response(domain, "/api/v3/site", network.api_headers, (config.get("connection_timeout"), config.get("read_timeout")))
376 data = json_from_response(response)
377 # DEBUG: print(f"DEBUG: response.ok={response.ok},response.status_code={response.status_code},data[]='{type(data)}'")
378 if not response.ok or response.status_code >= 400:
379 print("WARNING: Could not reach any JSON API:", domain)
380 instances.update_last_error(domain, response)
381 elif response.ok and isinstance(data, list):
382 # DEBUG: print(f"DEBUG: domain='{domain}' returned a list: '{data}'")
384 elif "federated_instances" in data:
385 # DEBUG: print(f"DEBUG: Found federated_instances for domain='{domain}'")
386 peers = peers + add_peers(data["federated_instances"])
387 # DEBUG: print("DEBUG: Added instance(s) to peers")
389 print("WARNING: JSON response does not contain 'federated_instances':", domain)
390 instances.update_last_error(domain, response)
392 # DEBUG: print("DEBUG: Querying API was successful:", domain, len(data))
395 except BaseException as exc:
396 print("WARNING: Some error during fetch_peers():", domain, exc)
397 instances.update_last_error(domain, exc)
399 # DEBUG: print(f"DEBUG: Adding '{len(peers)}' for domain='{domain}'")
400 instances.set_data("total_peers", domain, len(peers))
402 # DEBUG: print(f"DEBUG: Updating last_instance_fetch for domain='{domain}' ...")
403 instances.update_last_instance_fetch(domain)
405 # DEBUG: print("DEBUG: Returning peers[]:", type(peers))
408 def fetch_nodeinfo(domain: str, path: str = None) -> list:
409 # DEBUG: print(f"DEBUG: domain='{domain}',path={path} - CALLED!")
410 if not isinstance(domain, str):
411 raise ValueError(f"Parameter domain[]={type(domain)} is not 'str'")
413 raise ValueError("Parameter 'domain' is empty")
414 elif not isinstance(path, str) and path is not None:
415 raise ValueError(f"Parameter path[]={type(path)} is not 'str'")
417 # DEBUG: print(f"DEBUG: Fetching nodeinfo from domain='{domain}' ...")
418 nodeinfo = fetch_wellknown_nodeinfo(domain)
420 # DEBUG: print(f"DEBUG: nodeinfo({len(nodeinfo)})={nodeinfo}")
421 if len(nodeinfo) > 0:
422 # DEBUG: print("DEBUG: nodeinfo()={len(nodeinfo))} - EXIT!")
426 "/nodeinfo/2.1.json",
428 "/nodeinfo/2.0.json",
435 for request in request_paths:
436 if path is not None and path != "" and path != f"https://{domain}{path}":
437 # DEBUG: print(f"DEBUG: path='{path}' does not match request='{request}' - SKIPPED!")
441 # DEBUG: print(f"DEBUG: Fetching request='{request}' from domain='{domain}' ...")
442 response = network.fetch_response(domain, request, network.api_headers, (config.get("nodeinfo_connection_timeout"), config.get("nodeinfo_read_timeout")))
444 data = json_from_response(response)
445 # DEBUG: print(f"DEBUG: response.ok={response.ok},response.status_code={response.status_code},data[]='{type(data)}'")
446 if response.ok and isinstance(data, dict):
447 # DEBUG: print("DEBUG: Success:", request)
448 instances.set_data("detection_mode", domain, "STATIC_CHECK")
449 instances.set_data("nodeinfo_url" , domain, request)
451 elif response.ok and isinstance(data, list):
452 print(f"UNSUPPORTED: domain='{domain}' returned a list: '{data}'")
454 elif not response.ok or response.status_code >= 400:
455 print("WARNING: Failed fetching nodeinfo from domain:", domain)
456 instances.update_last_error(domain, response)
459 except BaseException as exc:
460 # DEBUG: print("DEBUG: Cannot fetch API request:", request)
461 instances.update_last_error(domain, exc)
464 # DEBUG: print(f"DEBUG: data()={len(data)} - EXIT!")
467 def fetch_wellknown_nodeinfo(domain: str) -> list:
468 # DEBUG: print(f"DEBUG: domain='{domain}' - CALLED!")
469 if not isinstance(domain, str):
470 raise ValueError(f"Parameter domain[]={type(domain)} is not 'str'")
472 raise ValueError("Parameter 'domain' is empty")
474 # DEBUG: print("DEBUG: Fetching .well-known info for domain:", domain)
478 response = network.fetch_response(domain, "/.well-known/nodeinfo", network.api_headers, (config.get("nodeinfo_connection_timeout"), config.get("nodeinfo_read_timeout")))
480 data = json_from_response(response)
481 # DEBUG: print("DEBUG: domain,response.ok,data[]:", domain, response.ok, type(data))
482 if response.ok and isinstance(data, dict):
484 # DEBUG: print("DEBUG: Found entries:", len(nodeinfo), domain)
485 if "links" in nodeinfo:
486 # DEBUG: print("DEBUG: Found links in nodeinfo():", len(nodeinfo["links"]))
487 for link in nodeinfo["links"]:
488 # DEBUG: print("DEBUG: rel,href:", link["rel"], link["href"])
489 if link["rel"] in nodeinfo_identifier:
490 # DEBUG: print("DEBUG: Fetching nodeinfo from:", link["href"])
491 response = fetch_url(link["href"], network.api_headers, (config.get("connection_timeout"), config.get("read_timeout")))
493 data = json_from_response(response)
494 # DEBUG: print("DEBUG: href,response.ok,response.status_code:", link["href"], response.ok, response.status_code)
495 if response.ok and isinstance(data, dict):
496 # DEBUG: print("DEBUG: Found JSON nodeinfo():", len(data))
497 instances.set_data("detection_mode", domain, "AUTO_DISCOVERY")
498 instances.set_data("nodeinfo_url" , domain, link["href"])
501 print("WARNING: Unknown 'rel' value:", domain, link["rel"])
503 print("WARNING: nodeinfo does not contain 'links':", domain)
505 except BaseException as exc:
506 print("WARNING: Failed fetching .well-known info:", domain)
507 instances.update_last_error(domain, exc)
510 # DEBUG: print("DEBUG: Returning data[]:", type(data))
513 def fetch_generator_from_path(domain: str, path: str = "/") -> str:
514 # DEBUG: print(f"DEBUG: domain({len(domain)})={domain},path={path} - CALLED!")
515 if not isinstance(domain, str):
516 raise ValueError(f"Parameter domain[]={type(domain)} is not 'str'")
518 raise ValueError("Parameter 'domain' is empty")
519 elif not isinstance(path, str):
520 raise ValueError(f"path[]={type(path)} is not 'str'")
522 raise ValueError("Parameter 'path' is empty")
524 # DEBUG: print(f"DEBUG: domain='{domain}',path='{path}' - CALLED!")
528 # DEBUG: print(f"DEBUG: Fetching path='{path}' from '{domain}' ...")
529 response = network.fetch_response(domain, path, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
531 # DEBUG: print("DEBUG: domain,response.ok,response.status_code,response.text[]:", domain, response.ok, response.status_code, type(response.text))
532 if response.ok and response.status_code < 300 and len(response.text) > 0:
533 # DEBUG: print("DEBUG: Search for <meta name='generator'>:", domain)
534 doc = bs4.BeautifulSoup(response.text, "html.parser")
536 # DEBUG: print("DEBUG: doc[]:", type(doc))
537 generator = doc.find("meta", {"name": "generator"})
538 site_name = doc.find("meta", {"property": "og:site_name"})
540 # DEBUG: print(f"DEBUG: generator='{generator}',site_name='{site_name}'")
541 if isinstance(generator, bs4.element.Tag):
542 # DEBUG: print("DEBUG: Found generator meta tag:", domain)
543 software = tidyup_domain(generator.get("content"))
544 print(f"INFO: domain='{domain}' is generated by '{software}'")
545 instances.set_data("detection_mode", domain, "GENERATOR")
546 remove_pending_error(domain)
547 elif isinstance(site_name, bs4.element.Tag):
548 # DEBUG: print("DEBUG: Found property=og:site_name:", domain)
549 sofware = tidyup_domain(site_name.get("content"))
550 print(f"INFO: domain='{domain}' has og:site_name='{software}'")
551 instances.set_data("detection_mode", domain, "SITE_NAME")
552 remove_pending_error(domain)
554 except BaseException as exc:
555 # DEBUG: print(f"DEBUG: Cannot fetch / from '{domain}':", exc)
556 instances.update_last_error(domain, exc)
559 # DEBUG: print(f"DEBUG: software[]={type(software)}")
560 if isinstance(software, str) and software == "":
561 # DEBUG: print(f"DEBUG: Corrected empty string to None for software of domain='{domain}'")
563 elif isinstance(software, str) and ("." in software or " " in software):
564 # DEBUG: print(f"DEBUG: software='{software}' may contain a version number, domain='{domain}', removing it ...")
565 software = remove_version(software)
567 # DEBUG: print(f"DEBUG: software[]={type(software)}")
568 if isinstance(software, str) and " powered by " in software:
569 # DEBUG: print(f"DEBUG: software='{software}' has 'powered by' in it")
570 software = remove_version(strip_powered_by(software))
571 elif isinstance(software, str) and " hosted on " in software:
572 # DEBUG: print(f"DEBUG: software='{software}' has 'hosted on' in it")
573 software = remove_version(strip_hosted_on(software))
574 elif isinstance(software, str) and " by " in software:
575 # DEBUG: print(f"DEBUG: software='{software}' has ' by ' in it")
576 software = strip_until(software, " by ")
577 elif isinstance(software, str) and " see " in software:
578 # DEBUG: print(f"DEBUG: software='{software}' has ' see ' in it")
579 software = strip_until(software, " see ")
581 # DEBUG: print(f"DEBUG: software='{software}' - EXIT!")
584 def determine_software(domain: str, path: str = None) -> str:
585 # DEBUG: print(f"DEBUG: domain({len(domain)})={domain},path={path} - CALLED!")
586 if not isinstance(domain, str):
587 raise ValueError(f"Parameter domain[]={type(domain)} is not 'str'")
589 raise ValueError("Parameter 'domain' is empty")
590 elif not isinstance(path, str) and path is not None:
591 raise ValueError(f"Parameter path[]={type(path)} is not 'str'")
593 # DEBUG: print("DEBUG: Determining software for domain,path:", domain, path)
596 # DEBUG: print(f"DEBUG: Fetching nodeinfo from '{domain}' ...")
597 data = fetch_nodeinfo(domain, path)
599 # DEBUG: print("DEBUG: data[]:", type(data))
600 if not isinstance(data, dict) or len(data) == 0:
601 # DEBUG: print("DEBUG: Could not determine software type:", domain)
602 return fetch_generator_from_path(domain)
604 # DEBUG: print("DEBUG: data():", len(data), data)
605 if "status" in data and data["status"] == "error" and "message" in data:
606 print("WARNING: JSON response is an error:", data["message"])
607 instances.update_last_error(domain, data["message"])
608 return fetch_generator_from_path(domain)
609 elif "message" in data:
610 print("WARNING: JSON response contains only a message:", data["message"])
611 instances.update_last_error(domain, data["message"])
612 return fetch_generator_from_path(domain)
613 elif "software" not in data or "name" not in data["software"]:
614 # DEBUG: print(f"DEBUG: JSON response from domain='{domain}' does not include [software][name], fetching / ...")
615 software = fetch_generator_from_path(domain)
617 # DEBUG: print(f"DEBUG: Generator for domain='{domain}' is: {software}, EXIT!")
620 software = tidyup_domain(data["software"]["name"])
622 # DEBUG: print("DEBUG: sofware after tidyup_domain():", software)
623 if software in ["akkoma", "rebased"]:
624 # DEBUG: print("DEBUG: Setting pleroma:", domain, software)
626 elif software in ["hometown", "ecko"]:
627 # DEBUG: print("DEBUG: Setting mastodon:", domain, software)
628 software = "mastodon"
629 elif software in ["calckey", "groundpolis", "foundkey", "cherrypick", "meisskey"]:
630 # DEBUG: print("DEBUG: Setting misskey:", domain, software)
632 elif software.find("/") > 0:
633 print("WARNING: Spliting of slash:", software)
634 software = tidyup_domain(software.split("/")[-1])
635 elif software.find("|") > 0:
636 print("WARNING: Spliting of pipe:", software)
637 software = tidyup_domain(software.split("|")[0])
638 elif "powered by" in software:
639 # DEBUG: print(f"DEBUG: software='{software}' has 'powered by' in it")
640 software = strip_powered_by(software)
641 elif isinstance(software, str) and " by " in software:
642 # DEBUG: print(f"DEBUG: software='{software}' has ' by ' in it")
643 software = strip_until(software, " by ")
644 elif isinstance(software, str) and " see " in software:
645 # DEBUG: print(f"DEBUG: software='{software}' has ' see ' in it")
646 software = strip_until(software, " see ")
648 # DEBUG: print(f"DEBUG: software[]={type(software)}")
650 print("WARNING: tidyup_domain() left no software name behind:", domain)
653 # DEBUG: print(f"DEBUG: software[]={type(software)}")
654 if str(software) == "":
655 # DEBUG: print(f"DEBUG: software for '{domain}' was not detected, trying generator ...")
656 software = fetch_generator_from_path(domain)
657 elif len(str(software)) > 0 and ("." in software or " " in software):
658 # DEBUG: print(f"DEBUG: software='{software}' may contain a version number, domain='{domain}', removing it ...")
659 software = remove_version(software)
661 # DEBUG: print(f"DEBUG: software[]={type(software)}")
662 if isinstance(software, str) and "powered by" in software:
663 # DEBUG: print(f"DEBUG: software='{software}' has 'powered by' in it")
664 software = remove_version(strip_powered_by(software))
666 # DEBUG: print("DEBUG: Returning domain,software:", domain, software)
669 def tidyup_reason(reason: str) -> str:
670 # DEBUG: print(f"DEBUG: reason='{reason}' - CALLED!")
671 if not isinstance(reason, str):
672 raise ValueError(f"Parameter reason[]={type(reason)} is not 'str'")
675 reason = reason.strip()
678 reason = re.sub("â", "\"", reason)
680 # DEBUG: print(f"DEBUG: reason='{reason}' - EXIT!")
683 def tidyup_domain(domain: str) -> str:
684 # DEBUG: print(f"DEBUG: domain='{domain}' - CALLED!")
685 if not isinstance(domain, str):
686 raise ValueError(f"Parameter domain[]={type(domain)} is not 'str'")
688 # All lower-case and strip spaces out + last dot
689 domain = domain.lower().strip().rstrip(".")
692 domain = re.sub("\:\d+$", "", domain)
694 # No protocol, sometimes without the slashes
695 domain = re.sub("^https?\:(\/*)", "", domain)
698 domain = re.sub("\/$", "", domain)
701 domain = re.sub("^\@", "", domain)
703 # No individual users in block lists
704 domain = re.sub("(.+)\@", "", domain)
705 if domain.find("/profile/"):
706 domain = domain.split("/profile/")[0]
707 elif domain.find("/users/"):
708 domain = domain.split("/users/")[0]
710 # DEBUG: print(f"DEBUG: domain='{domain}' - EXIT!")
713 def json_from_response(response: requests.models.Response) -> list:
714 # DEBUG: print(f"DEBUG: response[]={type(response)} - CALLED!")
715 if not isinstance(response, requests.models.Response):
716 raise ValueError(f"Parameter response[]='{type(response)}' is not type of 'Response'")
719 if response.text.strip() != "":
720 # DEBUG: print(f"DEBUG: response.text()={len(response.text)} is not empty, invoking response.json() ...")
722 data = response.json()
723 except json.decoder.JSONDecodeError:
726 # DEBUG: print(f"DEBUG: data[]={type(data)} - EXIT!")
729 def has_key(lists: list, key: str, value: any) -> bool:
730 # DEBUG: print(f"DEBUG: lists()={len(lists)},key='{key}',value[]='{type(value)}' - CALLED!")
731 if not isinstance(lists, list):
732 raise ValueError(f"Parameter lists[]='{type(lists)}' is not 'list'")
733 elif not isinstance(key, str):
734 raise ValueError(f"Parameter key[]='{type(key)}' is not 'str'")
736 raise ValueError("Parameter 'key' is empty")
739 # DEBUG: print(f"DEBUG: Checking lists()={len(lists)} ...")
741 # DEBUG: print(f"DEBUG: row['{type(row)}']={row}")
742 if not isinstance(row, dict):
743 raise ValueError(f"row[]='{type(row)}' is not 'dict'")
745 raise KeyError(f"Cannot find key='{key}'")
746 elif row[key] == value:
750 # DEBUG: print(f"DEBUG: has={has} - EXIT!")
753 def find_domains(tag: bs4.element.Tag) -> list:
754 # DEBUG: print(f"DEBUG: tag[]={type(tag)} - CALLED!")
755 if not isinstance(tag, bs4.element.Tag):
756 raise ValueError(f"Parameter tag[]={type(tag)} is not type of bs4.element.Tag")
757 elif len(tag.select("tr")) == 0:
758 raise KeyError("No table rows found in table!")
761 for element in tag.select("tr"):
762 # DEBUG: print(f"DEBUG: element[]={type(element)}")
763 if not element.find("td"):
764 # DEBUG: print("DEBUG: Skipping element, no <td> found")
767 domain = tidyup_domain(element.find("td").text)
768 reason = tidyup_reason(element.findAll("td")[1].text)
770 # DEBUG: print(f"DEBUG: domain='{domain}',reason='{reason}'")
772 if blacklist.is_blacklisted(domain):
773 print(f"WARNING: domain='{domain}' is blacklisted - skipped!")
775 elif domain == "gab.com/.ai, develop.gab.com":
776 # DEBUG: print("DEBUG: Multiple domains detected in one row")
786 "domain": "develop.gab.com",
790 elif not validators.domain(domain):
791 print(f"WARNING: domain='{domain}' is not a valid domain - skipped!")
794 # DEBUG: print(f"DEBUG: Adding domain='{domain}' ...")
800 # DEBUG: print(f"DEBUG: domains()={len(domains)} - EXIT!")
803 def fetch_url(url: str, headers: dict, timeout: tuple) -> requests.models.Response:
804 # DEBUG: print(f"DEBUG: url='{url}',headers()={len(headers)},timeout={timeout} - CALLED!")
805 if not isinstance(url, str):
806 raise ValueError(f"Parameter url[]='{type(url)}' is not 'str'")
808 raise ValueError("Parameter 'url' is empty")
809 elif not isinstance(headers, dict):
810 raise ValueError(f"Parameter headers[]='{type(headers)}' is not 'dict'")
811 elif not isinstance(timeout, tuple):
812 raise ValueError(f"Parameter timeout[]='{type(timeout)}' is not 'tuple'")
814 # DEBUG: print(f"DEBUG: Parsing url='{url}'")
815 components = urlparse(url)
817 # Invoke other function, avoid trailing ?
818 # DEBUG: print(f"DEBUG: components[{type(components)}]={components}")
819 if components.query != "":
820 response = network.fetch_response(components.hostname, f"{components.path}?{components.query}", headers, timeout)
822 response = network.fetch_response(components.hostname, f"{components.path}", headers, timeout)
824 # DEBUG: print(f"DEBUG: response[]='{type(response)}' - EXXIT!")