1 # Copyright (C) 2023 Free Software Foundation
3 # This program is free software: you can redistribute it and/or modify
4 # it under the terms of the GNU Affero General Public License as published
5 # by the Free Software Foundation, either version 3 of the License, or
6 # (at your option) any later version.
8 # This program is distributed in the hope that it will be useful,
9 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 # GNU Affero General Public License for more details.
13 # You should have received a copy of the GNU Affero General Public License
14 # along with this program. If not, see <https://www.gnu.org/licenses/>.
26 from urllib.parse import urlparse
28 from fba import blacklist
30 from fba import config
31 from fba import instances
32 from fba import network
34 from fba.federation import lemmy
35 from fba.federation import misskey
36 from fba.federation import peertube
38 # Array with pending errors needed to be written to database
42 # "rel" identifiers (no real URLs)
43 nodeinfo_identifier = [
44 "https://nodeinfo.diaspora.software/ns/schema/2.1",
45 "https://nodeinfo.diaspora.software/ns/schema/2.0",
46 "https://nodeinfo.diaspora.software/ns/schema/1.1",
47 "https://nodeinfo.diaspora.software/ns/schema/1.0",
48 "http://nodeinfo.diaspora.software/ns/schema/2.1",
49 "http://nodeinfo.diaspora.software/ns/schema/2.0",
50 "http://nodeinfo.diaspora.software/ns/schema/1.1",
51 "http://nodeinfo.diaspora.software/ns/schema/1.0",
54 # HTTP headers for non-API requests
56 "User-Agent": config.get("useragent"),
59 # HTTP headers for API requests
61 "User-Agent": config.get("useragent"),
62 "Content-Type": "application/json",
66 connection = sqlite3.connect("blocks.db")
67 cursor = connection.cursor()
69 # Pattern instance for version numbers
71 # semantic version number (with v|V) prefix)
72 re.compile("^(?P<version>v|V{0,1})(\.{0,1})(?P<major>0|[1-9]\d*)\.(?P<minor>0+|[1-9]\d*)(\.(?P<patch>0+|[1-9]\d*)(?:-(?P<prerelease>(?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*)(?:\.(?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*))*))?(?:\+(?P<buildmetadata>[0-9a-zA-Z-]+(?:\.[0-9a-zA-Z-]+)*))?)?$"),
73 # non-sematic, e.g. 1.2.3.4
74 re.compile("^(?P<version>v|V{0,1})(\.{0,1})(?P<major>0|[1-9]\d*)\.(?P<minor>0+|[1-9]\d*)(\.(?P<patch>0+|[1-9]\d*)(\.(?P<subpatch>0|[1-9]\d*))?)$"),
75 # non-sematic, e.g. 2023-05[-dev]
76 re.compile("^(?P<year>[1-9]{1}[0-9]{3})\.(?P<month>[0-9]{2})(-dev){0,1}$"),
77 # non-semantic, e.g. abcdef0
78 re.compile("^[a-f0-9]{7}$"),
81 ##### Other functions #####
83 def is_primitive(var: any) -> bool:
84 # DEBUG: print(f"DEBUG: var[]='{type(var)}' - CALLED!")
85 return type(var) in {int, str, float, bool} or var is None
87 def fetch_instances(domain: str, origin: str, software: str, script: str, path: str = None):
88 # DEBUG: print(f"DEBUG: domain='{domain}',origin='{origin}',software='{software}',path='{path}' - CALLED!")
89 if not isinstance(domain, str):
90 raise ValueError(f"Parameter domain[]={type(domain)} is not 'str'")
92 raise ValueError("Parameter 'domain' is empty")
93 elif not isinstance(origin, str) and origin is not None:
94 raise ValueError(f"Parameter origin[]={type(origin)} is not 'str'")
95 elif software is None:
96 print(f"DEBUG: software for domain='{domain}' is not set, determining ...")
97 software = determine_software(domain, path)
98 print(f"DEBUG: Determined software='{software}' for domain='{domain}'")
99 elif not isinstance(software, str):
100 raise ValueError(f"Parameter software[]={type(software)} is not 'str'")
101 elif not isinstance(script, str):
102 raise ValueError(f"Parameter script[]={type(script)} is not 'str'")
104 raise ValueError("Parameter 'domain' is empty")
106 if not instances.is_registered(domain):
107 # DEBUG: print("DEBUG: Adding new domain:", domain, origin)
108 instances.add(domain, origin, script, path)
110 # DEBUG: print("DEBUG: Fetching instances for domain:", domain, software)
111 peerlist = fetch_peers(domain, software)
113 if (peerlist is None):
114 print("ERROR: Cannot fetch peers:", domain)
116 elif instances.has_pending_instance_data(domain):
117 # DEBUG: print(f"DEBUG: domain='{domain}' has pending nodeinfo data, flushing ...")
118 instances.update_data(domain)
120 print(f"INFO: Checking {len(peerlist)} instances from {domain} ...")
121 for instance in peerlist:
123 # Skip "None" types as tidup() cannot parse them
126 # DEBUG: print(f"DEBUG: instance='{instance}' - BEFORE")
127 instance = tidyup_domain(instance)
128 # DEBUG: print(f"DEBUG: instance='{instance}' - AFTER")
131 print("WARNING: Empty instance after tidyup_domain(), domain:", domain)
133 elif not validators.domain(instance.split("/")[0]):
134 print(f"WARNING: Bad instance='{instance}' from domain='{domain}',origin='{origin}',software='{software}'")
136 elif blacklist.is_blacklisted(instance):
137 # DEBUG: print("DEBUG: instance is blacklisted:", instance)
140 # DEBUG: print("DEBUG: Handling instance:", instance)
142 if not instances.is_registered(instance):
143 # DEBUG: print("DEBUG: Adding new instance:", instance, domain)
144 instances.add(instance, domain, script)
145 except BaseException as exception:
146 print(f"ERROR: instance='{instance}',exception[{type(exception)}]:'{str(exception)}'")
149 # DEBUG: print("DEBUG: EXIT!")
151 def add_peers(rows: dict) -> list:
152 # DEBUG: print(f"DEBUG: rows()={len(rows)} - CALLED!")
154 for key in ["linked", "allowed", "blocked"]:
155 # DEBUG: print(f"DEBUG: Checking key='{key}'")
156 if key in rows and rows[key] is not None:
157 # DEBUG: print(f"DEBUG: Adding {len(rows[key])} peer(s) to peers list ...")
158 for peer in rows[key]:
159 # DEBUG: print(f"DEBUG: peer='{peer}' - BEFORE!")
160 peer = tidyup_domain(peer)
162 # DEBUG: print(f"DEBUG: peer='{peer}' - AFTER!")
163 if blacklist.is_blacklisted(peer):
164 # DEBUG: print(f"DEBUG: peer='{peer}' is blacklisted, skipped!")
167 # DEBUG: print(f"DEBUG: Adding peer='{peer}' ...")
170 # DEBUG: print(f"DEBUG: peers()={len(peers)} - EXIT!")
173 def remove_version(software: str) -> str:
174 # DEBUG: print(f"DEBUG: software='{software}' - CALLED!")
175 if not "." in software and " " not in software:
176 print(f"WARNING: software='{software}' does not contain a version number.")
181 temp = software.split(";")[0]
182 elif "," in software:
183 temp = software.split(",")[0]
184 elif " - " in software:
185 temp = software.split(" - ")[0]
187 # DEBUG: print(f"DEBUG: software='{software}'")
190 version = temp.split(" ")[-1]
191 elif "/" in software:
192 version = temp.split("/")[-1]
193 elif "-" in software:
194 version = temp.split("-")[-1]
196 # DEBUG: print(f"DEBUG: Was not able to find common seperator, returning untouched software='{software}'")
200 # DEBUG: print(f"DEBUG: Checking {len(patterns)} patterns ...")
201 for pattern in patterns:
203 match = pattern.match(version)
205 # DEBUG: print(f"DEBUG: match[]={type(match)}")
206 if isinstance(match, re.Match):
207 # DEBUG: print(f"DEBUG: version='{version}' is matching pattern='{pattern}'")
210 # DEBUG: print(f"DEBUG: version[{type(version)}]='{version}',match='{match}'")
211 if not isinstance(match, re.Match):
212 print(f"WARNING: version='{version}' does not match regex, leaving software='{software}' untouched.")
215 # DEBUG: print(f"DEBUG: Found valid version number: '{version}', removing it ...")
216 end = len(temp) - len(version) - 1
218 # DEBUG: print(f"DEBUG: end[{type(end)}]={end}")
219 software = temp[0:end].strip()
220 if " version" in software:
221 # DEBUG: print(f"DEBUG: software='{software}' contains word ' version'")
222 software = strip_until(software, " version")
224 # DEBUG: print(f"DEBUG: software='{software}' - EXIT!")
227 def strip_powered_by(software: str) -> str:
228 # DEBUG: print(f"DEBUG: software='{software}' - CALLED!")
229 if not isinstance(software, str):
230 raise ValueError(f"Parameter software[]='{type(software)}' is not 'str'")
232 raise ValueError("Parameter 'software' is empty")
233 elif not "powered by" in software:
234 print(f"WARNING: Cannot find 'powered by' in software='{software}'!")
237 start = software.find("powered by ")
238 # DEBUG: print(f"DEBUG: start[{type(start)}]='{start}'")
240 software = software[start + 11:].strip()
241 # DEBUG: print(f"DEBUG: software='{software}'")
243 software = strip_until(software, " - ")
245 # DEBUG: print(f"DEBUG: software='{software}' - EXIT!")
248 def strip_hosted_on(software: str) -> str:
249 # DEBUG: print(f"DEBUG: software='{software}' - CALLED!")
250 if not isinstance(software, str):
251 raise ValueError(f"Parameter software[]='{type(software)}' is not 'str'")
253 raise ValueError("Parameter 'software' is empty")
254 elif not "hosted on" in software:
255 print(f"WARNING: Cannot find 'hosted on' in '{software}'!")
258 end = software.find("hosted on ")
259 # DEBUG: print(f"DEBUG: end[{type(end)}]='{end}'")
261 software = software[0, end].strip()
262 # DEBUG: print(f"DEBUG: software='{software}'")
264 software = strip_until(software, " - ")
266 # DEBUG: print(f"DEBUG: software='{software}' - EXIT!")
269 def strip_until(software: str, until: str) -> str:
270 # DEBUG: print(f"DEBUG: software='{software}',until='{until}' - CALLED!")
271 if not isinstance(software, str):
272 raise ValueError(f"Parameter software[]='{type(software)}' is not 'str'")
274 raise ValueError("Parameter 'software' is empty")
275 elif not isinstance(until, str):
276 raise ValueError(f"Parameter until[]='{type(until)}' is not 'str'")
278 raise ValueError("Parameter 'until' is empty")
279 elif not until in software:
280 print(f"WARNING: Cannot find '{until}' in '{software}'!")
283 # Next, strip until part
284 end = software.find(until)
286 # DEBUG: print(f"DEBUG: end[{type(end)}]='{end}'")
288 software = software[0:end].strip()
290 # DEBUG: print(f"DEBUG: software='{software}' - EXIT!")
293 def remove_pending_error(domain: str):
294 if not isinstance(domain, str):
295 raise ValueError(f"Parameter domain[]={type(domain)} is not 'str'")
297 raise ValueError("Parameter 'domain' is empty")
300 # Prevent updating any pending errors, nodeinfo was found
301 del pending_errors[domain]
306 # DEBUG: print("DEBUG: EXIT!")
308 def get_hash(domain: str) -> str:
309 if not isinstance(domain, str):
310 raise ValueError(f"Parameter domain[]={type(domain)} is not 'str'")
312 raise ValueError("Parameter 'domain' is empty")
314 return hashlib.sha256(domain.encode("utf-8")).hexdigest()
316 def log_error(domain: str, response: requests.models.Response):
317 # DEBUG: print("DEBUG: domain,response[]:", domain, type(response))
318 if not isinstance(domain, str):
319 raise ValueError(f"Parameter domain[]={type(domain)} is not 'str'")
321 raise ValueError("Parameter 'domain' is empty")
324 # DEBUG: print("DEBUG: BEFORE response[]:", type(response))
325 if isinstance(response, BaseException) or isinstance(response, json.decoder.JSONDecodeError):
326 response = str(response)
328 # DEBUG: print("DEBUG: AFTER response[]:", type(response))
329 if isinstance(response, str):
330 cursor.execute("INSERT INTO error_log (domain, error_code, error_message, created) VALUES (?, 999, ?, ?)",[
336 cursor.execute("INSERT INTO error_log (domain, error_code, error_message, created) VALUES (?, ?, ?, ?)",[
338 response.status_code,
343 # Cleanup old entries
344 # DEBUG: print(f"DEBUG: Purging old records (distance: {config.get('error_log_cleanup')})")
345 cursor.execute("DELETE FROM error_log WHERE created < ?", [time.time() - config.get("error_log_cleanup")])
346 except BaseException as exception:
347 print(f"ERROR: failed SQL query: domain='{domain}',exception[{type(exception)}]:'{str(exception)}'")
350 # DEBUG: print("DEBUG: EXIT!")
352 def fetch_peers(domain: str, software: str) -> list:
353 # DEBUG: print(f"DEBUG: domain({len(domain)})={domain},software={software} - CALLED!")
354 if not isinstance(domain, str):
355 raise ValueError(f"Parameter domain[]={type(domain)} is not 'str'")
357 raise ValueError("Parameter 'domain' is empty")
358 elif not isinstance(software, str) and software is not None:
359 raise ValueError(f"software[]={type(software)} is not 'str'")
361 if software == "misskey":
362 # DEBUG: print(f"DEBUG: Invoking misskey.fetch_peers({domain}) ...")
363 return misskey.fetch_peers(domain)
364 elif software == "lemmy":
365 # DEBUG: print(f"DEBUG: Invoking lemmy.fetch_peers({domain}) ...")
366 return lemmy.fetch_peers(domain)
367 elif software == "peertube":
368 # DEBUG: print(f"DEBUG: Invoking peertube.fetch_peers({domain}) ...")
369 return peertube.fetch_peers(domain)
371 # DEBUG: print(f"DEBUG: Fetching peers from '{domain}',software='{software}' ...")
374 response = network.fetch_response(domain, "/api/v1/instance/peers", api_headers, (config.get("connection_timeout"), config.get("read_timeout")))
376 data = json_from_response(response)
378 # DEBUG: print(f"DEBUG: response.ok={response.ok},response.status_code={response.status_code},data[]='{type(data)}'")
379 if not response.ok or response.status_code >= 400:
380 # DEBUG: print(f"DEBUG: Was not able to fetch peers, trying alternative ...")
381 response = network.fetch_response(domain, "/api/v3/site", api_headers, (config.get("connection_timeout"), config.get("read_timeout")))
383 data = json_from_response(response)
384 # DEBUG: print(f"DEBUG: response.ok={response.ok},response.status_code={response.status_code},data[]='{type(data)}'")
385 if not response.ok or response.status_code >= 400:
386 print("WARNING: Could not reach any JSON API:", domain)
387 instances.update_last_error(domain, response)
388 elif response.ok and isinstance(data, list):
389 # DEBUG: print(f"DEBUG: domain='{domain}' returned a list: '{data}'")
391 elif "federated_instances" in data:
392 # DEBUG: print(f"DEBUG: Found federated_instances for domain='{domain}'")
393 peers = peers + add_peers(data["federated_instances"])
394 # DEBUG: print("DEBUG: Added instance(s) to peers")
396 print("WARNING: JSON response does not contain 'federated_instances':", domain)
397 instances.update_last_error(domain, response)
399 # DEBUG: print("DEBUG: Querying API was successful:", domain, len(data))
402 except BaseException as exception:
403 print("WARNING: Some error during get():", domain, exception)
404 instances.update_last_error(domain, exception)
406 # DEBUG: print(f"DEBUG: Adding '{len(peers)}' for domain='{domain}'")
407 instances.set("total_peers", domain, len(peers))
409 # DEBUG: print(f"DEBUG: Updating last_instance_fetch for domain='{domain}' ...")
410 instances.update_last_instance_fetch(domain)
412 # DEBUG: print("DEBUG: Returning peers[]:", type(peers))
415 def fetch_nodeinfo(domain: str, path: str = None) -> list:
416 # DEBUG: print(f"DEBUG: domain='{domain}',path={path} - CALLED!")
417 if not isinstance(domain, str):
418 raise ValueError(f"Parameter domain[]={type(domain)} is not 'str'")
420 raise ValueError("Parameter 'domain' is empty")
421 elif not isinstance(path, str) and path is not None:
422 raise ValueError(f"Parameter path[]={type(path)} is not 'str'")
424 # DEBUG: print(f"DEBUG: Fetching nodeinfo from domain='{domain}' ...")
425 nodeinfo = fetch_wellknown_nodeinfo(domain)
427 # DEBUG: print(f"DEBUG: nodeinfo({len(nodeinfo)})={nodeinfo}")
428 if len(nodeinfo) > 0:
429 # DEBUG: print("DEBUG: nodeinfo()={len(nodeinfo))} - EXIT!")
433 "/nodeinfo/2.1.json",
435 "/nodeinfo/2.0.json",
442 for request in request_paths:
443 if path is not None and path != "" and path != f"https://{domain}{path}":
444 # DEBUG: print(f"DEBUG: path='{path}' does not match request='{request}' - SKIPPED!")
448 # DEBUG: print(f"DEBUG: Fetching request='{request}' from domain='{domain}' ...")
449 response = network.fetch_response(domain, request, api_headers, (config.get("nodeinfo_connection_timeout"), config.get("nodeinfo_read_timeout")))
451 data = json_from_response(response)
452 # DEBUG: print(f"DEBUG: response.ok={response.ok},response.status_code={response.status_code},data[]='{type(data)}'")
453 if response.ok and isinstance(data, dict):
454 # DEBUG: print("DEBUG: Success:", request)
455 instances.set("detection_mode", domain, "STATIC_CHECK")
456 instances.set("nodeinfo_url" , domain, request)
458 elif response.ok and isinstance(data, list):
459 print(f"UNSUPPORTED: domain='{domain}' returned a list: '{data}'")
461 elif not response.ok or response.status_code >= 400:
462 print("WARNING: Failed fetching nodeinfo from domain:", domain)
463 instances.update_last_error(domain, response)
466 except BaseException as exception:
467 # DEBUG: print("DEBUG: Cannot fetch API request:", request)
468 instances.update_last_error(domain, exception)
471 # DEBUG: print(f"DEBUG: data()={len(data)} - EXIT!")
474 def fetch_wellknown_nodeinfo(domain: str) -> list:
475 # DEBUG: print(f"DEBUG: domain='{domain}' - CALLED!")
476 if not isinstance(domain, str):
477 raise ValueError(f"Parameter domain[]={type(domain)} is not 'str'")
479 raise ValueError("Parameter 'domain' is empty")
481 # DEBUG: print("DEBUG: Fetching .well-known info for domain:", domain)
485 response = network.fetch_response(domain, "/.well-known/nodeinfo", api_headers, (config.get("nodeinfo_connection_timeout"), config.get("nodeinfo_read_timeout")))
487 data = json_from_response(response)
488 # DEBUG: print("DEBUG: domain,response.ok,data[]:", domain, response.ok, type(data))
489 if response.ok and isinstance(data, dict):
491 # DEBUG: print("DEBUG: Found entries:", len(nodeinfo), domain)
492 if "links" in nodeinfo:
493 # DEBUG: print("DEBUG: Found links in nodeinfo():", len(nodeinfo["links"]))
494 for link in nodeinfo["links"]:
495 # DEBUG: print("DEBUG: rel,href:", link["rel"], link["href"])
496 if link["rel"] in nodeinfo_identifier:
497 # DEBUG: print("DEBUG: Fetching nodeinfo from:", link["href"])
498 response = fetch_url(link["href"], api_headers, (config.get("connection_timeout"), config.get("read_timeout")))
500 data = json_from_response(response)
501 # DEBUG: print("DEBUG: href,response.ok,response.status_code:", link["href"], response.ok, response.status_code)
502 if response.ok and isinstance(data, dict):
503 # DEBUG: print("DEBUG: Found JSON nodeinfo():", len(data))
504 instances.set("detection_mode", domain, "AUTO_DISCOVERY")
505 instances.set("nodeinfo_url" , domain, link["href"])
508 print("WARNING: Unknown 'rel' value:", domain, link["rel"])
510 print("WARNING: nodeinfo does not contain 'links':", domain)
512 except BaseException as exception:
513 print("WARNING: Failed fetching .well-known info:", domain)
514 instances.update_last_error(domain, exception)
517 # DEBUG: print("DEBUG: Returning data[]:", type(data))
520 def fetch_generator_from_path(domain: str, path: str = "/") -> str:
521 # DEBUG: print(f"DEBUG: domain({len(domain)})={domain},path={path} - CALLED!")
522 if not isinstance(domain, str):
523 raise ValueError(f"Parameter domain[]={type(domain)} is not 'str'")
525 raise ValueError("Parameter 'domain' is empty")
526 elif not isinstance(path, str):
527 raise ValueError(f"path[]={type(path)} is not 'str'")
529 raise ValueError("Parameter 'path' is empty")
531 # DEBUG: print(f"DEBUG: domain='{domain}',path='{path}' - CALLED!")
535 # DEBUG: print(f"DEBUG: Fetching path='{path}' from '{domain}' ...")
536 response = network.fetch_response(domain, path, headers, (config.get("connection_timeout"), config.get("read_timeout")))
538 # DEBUG: print("DEBUG: domain,response.ok,response.status_code,response.text[]:", domain, response.ok, response.status_code, type(response.text))
539 if response.ok and response.status_code < 300 and len(response.text) > 0:
540 # DEBUG: print("DEBUG: Search for <meta name='generator'>:", domain)
541 doc = bs4.BeautifulSoup(response.text, "html.parser")
543 # DEBUG: print("DEBUG: doc[]:", type(doc))
544 generator = doc.find("meta", {"name": "generator"})
545 site_name = doc.find("meta", {"property": "og:site_name"})
547 # DEBUG: print(f"DEBUG: generator='{generator}',site_name='{site_name}'")
548 if isinstance(generator, bs4.element.Tag):
549 # DEBUG: print("DEBUG: Found generator meta tag:", domain)
550 software = tidyup_domain(generator.get("content"))
551 print(f"INFO: domain='{domain}' is generated by '{software}'")
552 instances.set("detection_mode", domain, "GENERATOR")
553 remove_pending_error(domain)
554 elif isinstance(site_name, bs4.element.Tag):
555 # DEBUG: print("DEBUG: Found property=og:site_name:", domain)
556 sofware = tidyup_domain(site_name.get("content"))
557 print(f"INFO: domain='{domain}' has og:site_name='{software}'")
558 instances.set("detection_mode", domain, "SITE_NAME")
559 remove_pending_error(domain)
561 except BaseException as exception:
562 # DEBUG: print(f"DEBUG: Cannot fetch / from '{domain}':", exception)
563 instances.update_last_error(domain, exception)
566 # DEBUG: print(f"DEBUG: software[]={type(software)}")
567 if isinstance(software, str) and software == "":
568 # DEBUG: print(f"DEBUG: Corrected empty string to None for software of domain='{domain}'")
570 elif isinstance(software, str) and ("." in software or " " in software):
571 # DEBUG: print(f"DEBUG: software='{software}' may contain a version number, domain='{domain}', removing it ...")
572 software = remove_version(software)
574 # DEBUG: print(f"DEBUG: software[]={type(software)}")
575 if isinstance(software, str) and " powered by " in software:
576 # DEBUG: print(f"DEBUG: software='{software}' has 'powered by' in it")
577 software = remove_version(strip_powered_by(software))
578 elif isinstance(software, str) and " hosted on " in software:
579 # DEBUG: print(f"DEBUG: software='{software}' has 'hosted on' in it")
580 software = remove_version(strip_hosted_on(software))
581 elif isinstance(software, str) and " by " in software:
582 # DEBUG: print(f"DEBUG: software='{software}' has ' by ' in it")
583 software = strip_until(software, " by ")
584 elif isinstance(software, str) and " see " in software:
585 # DEBUG: print(f"DEBUG: software='{software}' has ' see ' in it")
586 software = strip_until(software, " see ")
588 # DEBUG: print(f"DEBUG: software='{software}' - EXIT!")
591 def determine_software(domain: str, path: str = None) -> str:
592 # DEBUG: print(f"DEBUG: domain({len(domain)})={domain},path={path} - CALLED!")
593 if not isinstance(domain, str):
594 raise ValueError(f"Parameter domain[]={type(domain)} is not 'str'")
596 raise ValueError("Parameter 'domain' is empty")
597 elif not isinstance(path, str) and path is not None:
598 raise ValueError(f"Parameter path[]={type(path)} is not 'str'")
600 # DEBUG: print("DEBUG: Determining software for domain,path:", domain, path)
603 # DEBUG: print(f"DEBUG: Fetching nodeinfo from '{domain}' ...")
604 data = fetch_nodeinfo(domain, path)
606 # DEBUG: print("DEBUG: data[]:", type(data))
607 if not isinstance(data, dict) or len(data) == 0:
608 # DEBUG: print("DEBUG: Could not determine software type:", domain)
609 return fetch_generator_from_path(domain)
611 # DEBUG: print("DEBUG: data():", len(data), data)
612 if "status" in data and data["status"] == "error" and "message" in data:
613 print("WARNING: JSON response is an error:", data["message"])
614 instances.update_last_error(domain, data["message"])
615 return fetch_generator_from_path(domain)
616 elif "message" in data:
617 print("WARNING: JSON response contains only a message:", data["message"])
618 instances.update_last_error(domain, data["message"])
619 return fetch_generator_from_path(domain)
620 elif "software" not in data or "name" not in data["software"]:
621 # DEBUG: print(f"DEBUG: JSON response from domain='{domain}' does not include [software][name], fetching / ...")
622 software = fetch_generator_from_path(domain)
624 # DEBUG: print(f"DEBUG: Generator for domain='{domain}' is: {software}, EXIT!")
627 software = tidyup_domain(data["software"]["name"])
629 # DEBUG: print("DEBUG: sofware after tidyup_domain():", software)
630 if software in ["akkoma", "rebased"]:
631 # DEBUG: print("DEBUG: Setting pleroma:", domain, software)
633 elif software in ["hometown", "ecko"]:
634 # DEBUG: print("DEBUG: Setting mastodon:", domain, software)
635 software = "mastodon"
636 elif software in ["calckey", "groundpolis", "foundkey", "cherrypick", "meisskey"]:
637 # DEBUG: print("DEBUG: Setting misskey:", domain, software)
639 elif software.find("/") > 0:
640 print("WARNING: Spliting of slash:", software)
641 software = tidup_domain(software.split("/")[-1]);
642 elif software.find("|") > 0:
643 print("WARNING: Spliting of pipe:", software)
644 software = tidyup_domain(software.split("|")[0]);
645 elif "powered by" in software:
646 # DEBUG: print(f"DEBUG: software='{software}' has 'powered by' in it")
647 software = strip_powered_by(software)
648 elif isinstance(software, str) and " by " in software:
649 # DEBUG: print(f"DEBUG: software='{software}' has ' by ' in it")
650 software = strip_until(software, " by ")
651 elif isinstance(software, str) and " see " in software:
652 # DEBUG: print(f"DEBUG: software='{software}' has ' see ' in it")
653 software = strip_until(software, " see ")
655 # DEBUG: print(f"DEBUG: software[]={type(software)}")
657 print("WARNING: tidyup_domain() left no software name behind:", domain)
660 # DEBUG: print(f"DEBUG: software[]={type(software)}")
661 if str(software) == "":
662 # DEBUG: print(f"DEBUG: software for '{domain}' was not detected, trying generator ...")
663 software = fetch_generator_from_path(domain)
664 elif len(str(software)) > 0 and ("." in software or " " in software):
665 # DEBUG: print(f"DEBUG: software='{software}' may contain a version number, domain='{domain}', removing it ...")
666 software = remove_version(software)
668 # DEBUG: print(f"DEBUG: software[]={type(software)}")
669 if isinstance(software, str) and "powered by" in software:
670 # DEBUG: print(f"DEBUG: software='{software}' has 'powered by' in it")
671 software = remove_version(strip_powered_by(software))
673 # DEBUG: print("DEBUG: Returning domain,software:", domain, software)
676 def tidyup_reason(reason: str) -> str:
677 # DEBUG: print(f"DEBUG: reason='{reason}' - CALLED!")
678 if not isinstance(reason, str):
679 raise ValueError(f"Parameter reason[]={type(reason)} is not 'str'")
682 reason = reason.strip()
685 reason = re.sub("â", "\"", reason)
687 # DEBUG: print(f"DEBUG: reason='{reason}' - EXIT!")
690 def tidyup_domain(domain: str) -> str:
691 # DEBUG: print(f"DEBUG: domain='{domain}' - CALLED!")
692 if not isinstance(domain, str):
693 raise ValueError(f"Parameter domain[]={type(domain)} is not 'str'")
695 # All lower-case and strip spaces out + last dot
696 domain = domain.lower().strip().rstrip(".")
699 domain = re.sub("\:\d+$", "", domain)
701 # No protocol, sometimes without the slashes
702 domain = re.sub("^https?\:(\/*)", "", domain)
705 domain = re.sub("\/$", "", domain)
708 domain = re.sub("^\@", "", domain)
710 # No individual users in block lists
711 domain = re.sub("(.+)\@", "", domain)
712 if domain.find("/profile/"):
713 domain = domain.split("/profile/")[0]
714 elif domain.find("/users/"):
715 domain = domain.split("/users/")[0]
717 # DEBUG: print(f"DEBUG: domain='{domain}' - EXIT!")
720 def json_from_response(response: requests.models.Response) -> list:
721 # DEBUG: print(f"DEBUG: response[]={type(response)} - CALLED!")
722 if not isinstance(response, requests.models.Response):
723 raise ValueError(f"Parameter response[]='{type(response)}' is not type of 'Response'")
726 if response.text.strip() != "":
727 # DEBUG: print(f"DEBUG: response.text()={len(response.text)} is not empty, invoking response.json() ...")
729 data = response.json()
730 except json.decoder.JSONDecodeError:
733 # DEBUG: print(f"DEBUG: data[]={type(data)} - EXIT!")
736 def has_key(lists: list, key: str, value: any) -> bool:
737 # DEBUG: print(f"DEBUG: lists()={len(lists)},key='{key}',value[]='{type(value)}' - CALLED!")
738 if not isinstance(lists, list):
739 raise ValueError(f"Parameter lists[]='{type(lists)}' is not 'list'")
740 elif not isinstance(key, str):
741 raise ValueError(f"Parameter key[]='{type(key)}' is not 'str'")
743 raise ValueError("Parameter 'key' is empty")
746 # DEBUG: print(f"DEBUG: Checking lists()={len(lists)} ...")
748 # DEBUG: print(f"DEBUG: row['{type(row)}']={row}")
749 if not isinstance(row, dict):
750 raise ValueError(f"row[]='{type(row)}' is not 'dict'")
752 raise KeyError(f"Cannot find key='{key}'")
753 elif row[key] == value:
757 # DEBUG: print(f"DEBUG: has={has} - EXIT!")
760 def find_domains(tag: bs4.element.Tag) -> list:
761 # DEBUG: print(f"DEBUG: tag[]={type(tag)} - CALLED!")
762 if not isinstance(tag, bs4.element.Tag):
763 raise ValueError(f"Parameter tag[]={type(tag)} is not type of bs4.element.Tag")
764 elif not isinstance(tag, bs4.element.Tag):
765 raise KeyError("Cannot find table with instances!")
766 elif len(tag.select("tr")) == 0:
767 raise KeyError("No table rows found in table!")
770 for element in tag.select("tr"):
771 # DEBUG: print(f"DEBUG: element[]={type(element)}")
772 if not element.find("td"):
773 # DEBUG: print("DEBUG: Skipping element, no <td> found")
776 domain = tidyup_domain(element.find("td").text)
777 reason = tidyup_reason(element.findAll("td")[1].text)
779 # DEBUG: print(f"DEBUG: domain='{domain}',reason='{reason}'")
781 if blacklist.is_blacklisted(domain):
782 print(f"WARNING: domain='{domain}' is blacklisted - skipped!")
784 elif domain == "gab.com/.ai, develop.gab.com":
785 # DEBUG: print(f"DEBUG: Multiple domains detected in one row")
795 "domain": "develop.gab.com",
799 elif not validators.domain(domain):
800 print(f"WARNING: domain='{domain}' is not a valid domain - skipped!")
803 # DEBUG: print(f"DEBUG: Adding domain='{domain}' ...")
809 # DEBUG: print(f"DEBUG: domains()={len(domains)} - EXIT!")
812 def fetch_url(url: str, headers: dict, timeout: list) -> requests.models.Response:
813 # DEBUG: print(f"DEBUG: url='{url}',headers()={len(headers)},timeout={timeout} - CALLED!")
814 if not isinstance(url, str):
815 raise ValueError(f"Parameter url[]='{type(url)}' is not 'str'")
817 raise ValueError("Parameter 'url' is empty")
818 elif not isinstance(headers, dict):
819 raise ValueError(f"Parameter headers[]='{type(headers)}' is not 'dict'")
820 elif not isinstance(timeout, list):
821 raise ValueError(f"Parameter timeout[]='{type(timeout)}' is not 'list'")
823 # DEBUG: print(f"DEBUG: Parsing url='{url}'")
824 components = urlparse(url)
826 # Invoke other function, avoid trailing ?
827 # DEBUG: print(f"DEBUG: components[{type(components)}]={components}")
828 if components.query != "":
829 response = network.fetch_response(components.hostname, f"{components.path}?{components.query}", headers, timeout)
831 response = network.fetch_response(components.hostname, f"{components.path}", headers, timeout)
833 # DEBUG: print(f"DEBUG: response[]='{type(response)}' - EXXIT!")