1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU Affero General Public License for more details.
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <https://www.gnu.org/licenses/>.
28 from urllib.parse import urlparse
30 from fba import blacklist
32 from fba import config
33 from fba import instances
35 from fba.federation import lemmy
36 from fba.federation import misskey
37 from fba.federation import peertube
39 # Array with pending errors needed to be written to database
43 # "rel" identifiers (no real URLs)
44 nodeinfo_identifier = [
45 "https://nodeinfo.diaspora.software/ns/schema/2.1",
46 "https://nodeinfo.diaspora.software/ns/schema/2.0",
47 "https://nodeinfo.diaspora.software/ns/schema/1.1",
48 "https://nodeinfo.diaspora.software/ns/schema/1.0",
49 "http://nodeinfo.diaspora.software/ns/schema/2.1",
50 "http://nodeinfo.diaspora.software/ns/schema/2.0",
51 "http://nodeinfo.diaspora.software/ns/schema/1.1",
52 "http://nodeinfo.diaspora.software/ns/schema/1.0",
55 # HTTP headers for non-API requests
57 "User-Agent": config.get("useragent"),
60 # HTTP headers for API requests
62 "User-Agent": config.get("useragent"),
63 "Content-Type": "application/json",
67 connection = sqlite3.connect("blocks.db")
68 cursor = connection.cursor()
70 # Pattern instance for version numbers
72 # semantic version number (with v|V) prefix)
73 re.compile("^(?P<version>v|V{0,1})(\.{0,1})(?P<major>0|[1-9]\d*)\.(?P<minor>0+|[1-9]\d*)(\.(?P<patch>0+|[1-9]\d*)(?:-(?P<prerelease>(?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*)(?:\.(?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*))*))?(?:\+(?P<buildmetadata>[0-9a-zA-Z-]+(?:\.[0-9a-zA-Z-]+)*))?)?$"),
74 # non-sematic, e.g. 1.2.3.4
75 re.compile("^(?P<version>v|V{0,1})(\.{0,1})(?P<major>0|[1-9]\d*)\.(?P<minor>0+|[1-9]\d*)(\.(?P<patch>0+|[1-9]\d*)(\.(?P<subpatch>0|[1-9]\d*))?)$"),
76 # non-sematic, e.g. 2023-05[-dev]
77 re.compile("^(?P<year>[1-9]{1}[0-9]{3})\.(?P<month>[0-9]{2})(-dev){0,1}$"),
78 # non-semantic, e.g. abcdef0
79 re.compile("^[a-f0-9]{7}$"),
82 ##### Other functions #####
84 def is_primitive(var: any) -> bool:
85 # DEBUG: print(f"DEBUG: var[]='{type(var)}' - CALLED!")
86 return type(var) in {int, str, float, bool} or var == None
88 def fetch_instances(domain: str, origin: str, software: str, script: str, path: str = None):
89 # DEBUG: print(f"DEBUG: domain='{domain}',origin='{origin}',software='{software}',path='{path}' - CALLED!")
90 if type(domain) != str:
91 raise ValueError(f"Parameter domain[]={type(domain)} is not 'str'")
93 raise ValueError(f"Parameter 'domain' is empty")
94 elif type(origin) != str and origin != None:
95 raise ValueError(f"Parameter origin[]={type(origin)} is not 'str'")
96 elif type(script) != str:
97 raise ValueError(f"Parameter script[]={type(script)} is not 'str'")
99 raise ValueError(f"Parameter 'domain' is empty")
101 if not instances.is_registered(domain):
102 # DEBUG: print("DEBUG: Adding new domain:", domain, origin)
103 instances.add(domain, origin, script, path)
105 # DEBUG: print("DEBUG: Fetching instances for domain:", domain, software)
106 peerlist = fetch_peers(domain, software)
108 if (peerlist is None):
109 print("ERROR: Cannot fetch peers:", domain)
111 elif instances.has_pending_instance_data(domain):
112 # DEBUG: print(f"DEBUG: domain='{domain}' has pending nodeinfo data, flushing ...")
113 instances.update_data(domain)
115 print(f"INFO: Checking {len(peerlist)} instances from {domain} ...")
116 for instance in peerlist:
118 # Skip "None" types as tidup() cannot parse them
121 # DEBUG: print(f"DEBUG: instance='{instance}' - BEFORE")
122 instance = tidyup_domain(instance)
123 # DEBUG: print(f"DEBUG: instance='{instance}' - AFTER")
126 print("WARNING: Empty instance after tidyup_domain(), domain:", domain)
128 elif not validators.domain(instance.split("/")[0]):
129 print(f"WARNING: Bad instance='{instance}' from domain='{domain}',origin='{origin}',software='{software}'")
131 elif blacklist.is_blacklisted(instance):
132 # DEBUG: print("DEBUG: instance is blacklisted:", instance)
135 # DEBUG: print("DEBUG: Handling instance:", instance)
137 if not instances.is_registered(instance):
138 # DEBUG: print("DEBUG: Adding new instance:", instance, domain)
139 instances.add(instance, domain, script)
140 except BaseException as e:
141 print(f"ERROR: instance='{instance}',exception[{type(e)}]:'{str(e)}'")
144 # DEBUG: print("DEBUG: EXIT!")
146 def add_peers(rows: dict) -> list:
147 # DEBUG: print(f"DEBUG: rows()={len(rows)} - CALLED!")
149 for key in ["linked", "allowed", "blocked"]:
150 # DEBUG: print(f"DEBUG: Checking key='{key}'")
151 if key in rows and rows[key] != None:
152 # DEBUG: print(f"DEBUG: Adding {len(rows[key])} peer(s) to peers list ...")
153 for peer in rows[key]:
154 # DEBUG: print(f"DEBUG: peer='{peer}' - BEFORE!")
155 peer = tidyup_domain(peer)
157 # DEBUG: print(f"DEBUG: peer='{peer}' - AFTER!")
158 if blacklist.is_blacklisted(peer):
159 # DEBUG: print(f"DEBUG: peer='{peer}' is blacklisted, skipped!")
162 # DEBUG: print(f"DEBUG: Adding peer='{peer}' ...")
165 # DEBUG: print(f"DEBUG: peers()={len(peers)} - EXIT!")
168 def remove_version(software: str) -> str:
169 # DEBUG: print(f"DEBUG: software='{software}' - CALLED!")
170 if not "." in software and " " not in software:
171 print(f"WARNING: software='{software}' does not contain a version number.")
176 temp = software.split(";")[0]
177 elif "," in software:
178 temp = software.split(",")[0]
179 elif " - " in software:
180 temp = software.split(" - ")[0]
182 # DEBUG: print(f"DEBUG: software='{software}'")
185 version = temp.split(" ")[-1]
186 elif "/" in software:
187 version = temp.split("/")[-1]
188 elif "-" in software:
189 version = temp.split("-")[-1]
191 # DEBUG: print(f"DEBUG: Was not able to find common seperator, returning untouched software='{software}'")
196 # DEBUG: print(f"DEBUG: Checking {len(patterns)} patterns ...")
197 for pattern in patterns:
199 match = pattern.match(version)
201 # DEBUG: print(f"DEBUG: match[]={type(match)}")
202 if type(match) is re.Match:
205 # DEBUG: print(f"DEBUG: version[{type(version)}]='{version}',match='{match}'")
206 if type(match) is not re.Match:
207 print(f"WARNING: version='{version}' does not match regex, leaving software='{software}' untouched.")
210 # DEBUG: print(f"DEBUG: Found valid version number: '{version}', removing it ...")
211 end = len(temp) - len(version) - 1
213 # DEBUG: print(f"DEBUG: end[{type(end)}]={end}")
214 software = temp[0:end].strip()
215 if " version" in software:
216 # DEBUG: print(f"DEBUG: software='{software}' contains word ' version'")
217 software = strip_until(software, " version")
219 # DEBUG: print(f"DEBUG: software='{software}' - EXIT!")
222 def strip_powered_by(software: str) -> str:
223 # DEBUG: print(f"DEBUG: software='{software}' - CALLED!")
225 print(f"ERROR: Bad method call, 'software' is empty")
226 raise Exception("Parameter 'software' is empty")
227 elif not "powered by" in software:
228 print(f"WARNING: Cannot find 'powered by' in '{software}'!")
231 start = software.find("powered by ")
232 # DEBUG: print(f"DEBUG: start[{type(start)}]='{start}'")
234 software = software[start + 11:].strip()
235 # DEBUG: print(f"DEBUG: software='{software}'")
237 software = strip_until(software, " - ")
239 # DEBUG: print(f"DEBUG: software='{software}' - EXIT!")
242 def strip_hosted_on(software: str) -> str:
243 # DEBUG: print(f"DEBUG: software='{software}' - CALLED!")
245 print(f"ERROR: Bad method call, 'software' is empty")
246 raise Exception("Parameter 'software' is empty")
247 elif not "hosted on" in software:
248 print(f"WARNING: Cannot find 'hosted on' in '{software}'!")
251 end = software.find("hosted on ")
252 # DEBUG: print(f"DEBUG: end[{type(end)}]='{end}'")
254 software = software[0, start].strip()
255 # DEBUG: print(f"DEBUG: software='{software}'")
257 software = strip_until(software, " - ")
259 # DEBUG: print(f"DEBUG: software='{software}' - EXIT!")
262 def strip_until(software: str, until: str) -> str:
263 # DEBUG: print(f"DEBUG: software='{software}',until='{until}' - CALLED!")
265 print(f"ERROR: Bad method call, 'software' is empty")
266 raise Exception("Parameter 'software' is empty")
268 print(f"ERROR: Bad method call, 'until' is empty")
269 raise Exception("Parameter 'until' is empty")
270 elif not until in software:
271 print(f"WARNING: Cannot find '{until}' in '{software}'!")
274 # Next, strip until part
275 end = software.find(until)
277 # DEBUG: print(f"DEBUG: end[{type(end)}]='{end}'")
279 software = software[0:end].strip()
281 # DEBUG: print(f"DEBUG: software='{software}' - EXIT!")
284 def remove_pending_error(domain: str):
285 if type(domain) != str:
286 raise ValueError(f"Parameter domain[]={type(domain)} is not 'str'")
288 raise ValueError(f"Parameter 'domain' is empty")
291 # Prevent updating any pending errors, nodeinfo was found
292 del pending_errors[domain]
297 # DEBUG: print("DEBUG: EXIT!")
299 def get_hash(domain: str) -> str:
300 if type(domain) != str:
301 raise ValueError(f"Parameter domain[]={type(domain)} is not 'str'")
303 raise ValueError(f"Parameter 'domain' is empty")
305 return hashlib.sha256(domain.encode("utf-8")).hexdigest()
307 def log_error(domain: str, response: requests.models.Response):
308 # DEBUG: print("DEBUG: domain,response[]:", domain, type(response))
309 if type(domain) != str:
310 raise ValueError(f"Parameter domain[]={type(domain)} is not 'str'")
312 raise ValueError(f"Parameter 'domain' is empty")
315 # DEBUG: print("DEBUG: BEFORE response[]:", type(response))
316 if isinstance(response, BaseException) or isinstance(response, json.decoder.JSONDecodeError):
317 response = str(response)
319 # DEBUG: print("DEBUG: AFTER response[]:", type(response))
320 if type(response) is str:
321 cursor.execute("INSERT INTO error_log (domain, error_code, error_message, created) VALUES (?, 999, ?, ?)",[
327 cursor.execute("INSERT INTO error_log (domain, error_code, error_message, created) VALUES (?, ?, ?, ?)",[
329 response.status_code,
334 # Cleanup old entries
335 # DEBUG: print(f"DEBUG: Purging old records (distance: {config.get('error_log_cleanup')})")
336 cursor.execute("DELETE FROM error_log WHERE created < ?", [time.time() - config.get("error_log_cleanup")])
337 except BaseException as e:
338 print(f"ERROR: failed SQL query: domain='{domain}',exception[{type(e)}]:'{str(e)}'")
341 # DEBUG: print("DEBUG: EXIT!")
343 def fetch_peers(domain: str, software: str) -> list:
344 # DEBUG: print(f"DEBUG: domain({len(domain)})={domain},software={software} - CALLED!")
345 if type(domain) != str:
346 raise ValueError(f"Parameter domain[]={type(domain)} is not 'str'")
348 raise ValueError(f"Parameter 'domain' is empty")
349 elif type(software) != str and software != None:
350 raise ValueError(f"software[]={type(software)} is not 'str'")
352 if software == "misskey":
353 # DEBUG: print(f"DEBUG: Invoking misskey.fetch_peers({domain}) ...")
354 return misskey.fetch_peers(domain)
355 elif software == "lemmy":
356 # DEBUG: print(f"DEBUG: Invoking lemmy.fetch_peers({domain}) ...")
357 return lemmy.fetch_peers(domain)
358 elif software == "peertube":
359 # DEBUG: print(f"DEBUG: Invoking peertube.fetch_peers({domain}) ...")
360 return peertube.fetch_peers(domain)
362 # DEBUG: print(f"DEBUG: Fetching peers from '{domain}',software='{software}' ...")
365 response = get_response(domain, "/api/v1/instance/peers", api_headers, (config.get("connection_timeout"), config.get("read_timeout")))
367 data = json_from_response(response)
369 # DEBUG: print(f"DEBUG: response.ok={response.ok},response.status_code={response.status_code},data[]='{type(data)}'")
370 if not response.ok or response.status_code >= 400:
371 # DEBUG: print(f"DEBUG: Was not able to fetch peers, trying alternative ...")
372 response = get_response(domain, "/api/v3/site", api_headers, (config.get("connection_timeout"), config.get("read_timeout")))
374 data = json_from_response(response)
375 # DEBUG: print(f"DEBUG: response.ok={response.ok},response.status_code={response.status_code},data[]='{type(data)}'")
376 if not response.ok or response.status_code >= 400:
377 print("WARNING: Could not reach any JSON API:", domain)
378 instances.update_last_error(domain, response)
379 elif response.ok and isinstance(data, list):
380 # DEBUG: print(f"DEBUG: domain='{domain}' returned a list: '{data}'")
382 elif "federated_instances" in data:
383 # DEBUG: print(f"DEBUG: Found federated_instances for domain='{domain}'")
384 peers = peers + add_peers(data["federated_instances"])
385 # DEBUG: print("DEBUG: Added instance(s) to peers")
387 print("WARNING: JSON response does not contain 'federated_instances':", domain)
388 instances.update_last_error(domain, response)
390 # DEBUG: print("DEBUG: Querying API was successful:", domain, len(data))
393 except BaseException as e:
394 print("WARNING: Some error during get():", domain, e)
395 instances.update_last_error(domain, e)
397 # DEBUG: print(f"DEBUG: Adding '{len(peers)}' for domain='{domain}'")
398 instances.set("total_peers", domain, len(peers))
400 # DEBUG: print(f"DEBUG: Updating last_instance_fetch for domain='{domain}' ...")
401 instances.update_last_instance_fetch(domain)
403 # DEBUG: print("DEBUG: Returning peers[]:", type(peers))
406 def post_json_api(domain: str, path: str, parameter: str, extra_headers: dict = {}) -> dict:
407 # DEBUG: print(f"DEBUG: domain='{domain}',path='{path}',parameter='{parameter}',extra_headers()={len(extra_headers)} - CALLED!")
408 if type(domain) != str:
409 raise ValueError(f"Parameter domain[]={type(domain)} is not 'str'")
411 raise ValueError(f"Parameter 'domain' is empty")
412 elif type(path) != str:
413 raise ValueError(f"path[]={type(path)} is not 'str'")
415 raise ValueError("Parameter 'path' cannot be empty")
416 elif type(parameter) != str:
417 raise ValueError(f"parameter[]={type(parameter)} is not 'str'")
419 # DEBUG: print("DEBUG: Sending POST to domain,path,parameter:", domain, path, parameter, extra_headers)
422 response = reqto.post(
423 f"https://{domain}{path}",
425 headers={**api_headers, **extra_headers},
426 timeout=(config.get("connection_timeout"), config.get("read_timeout"))
429 data = json_from_response(response)
430 # DEBUG: print(f"DEBUG: response.ok={response.ok},response.status_code={response.status_code},data[]='{type(data)}'")
431 if not response.ok or response.status_code >= 400:
432 print(f"WARNING: Cannot query JSON API: domain='{domain}',path='{path}',parameter()={len(parameter)},response.status_code='{response.status_code}',data[]='{type(data)}'")
433 instances.update_last_error(domain, response)
435 except BaseException as e:
436 print(f"WARNING: Some error during post(): domain='{domain}',path='{path}',parameter()={len(parameter)},exception[{type(e)}]:'{str(e)}'")
438 # DEBUG: print(f"DEBUG: Returning data({len(data)})=[]:{type(data)}")
441 def fetch_nodeinfo(domain: str, path: str = None) -> list:
442 # DEBUG: print(f"DEBUG: domain='{domain}',path={path} - CALLED!")
443 if type(domain) != str:
444 raise ValueError(f"Parameter domain[]={type(domain)} is not 'str'")
446 raise ValueError(f"Parameter 'domain' is empty")
447 elif type(path) != str and path != None:
448 raise ValueError(f"Parameter path[]={type(path)} is not 'str'")
450 # DEBUG: print(f"DEBUG: Fetching nodeinfo from domain='{domain}' ...")
451 nodeinfo = fetch_wellknown_nodeinfo(domain)
453 # DEBUG: print(f"DEBUG: nodeinfo({len(nodeinfo)})={nodeinfo}")
454 if len(nodeinfo) > 0:
455 # DEBUG: print("DEBUG: nodeinfo()={len(nodeinfo))} - EXIT!")
459 "/nodeinfo/2.1.json",
461 "/nodeinfo/2.0.json",
468 for request in request_paths:
469 if path != None and path != "" and path != f"https://{domain}{path}":
470 # DEBUG: print(f"DEBUG: path='{path}' does not match request='{request}' - SKIPPED!")
474 # DEBUG: print(f"DEBUG: Fetching request='{request}' from domain='{domain}' ...")
475 response = get_response(domain, request, api_headers, (config.get("nodeinfo_connection_timeout"), config.get("nodeinfo_read_timeout")))
477 data = json_from_response(response)
478 # DEBUG: print(f"DEBUG: response.ok={response.ok},response.status_code={response.status_code},data[]='{type(data)}'")
479 if response.ok and isinstance(data, dict):
480 # DEBUG: print("DEBUG: Success:", request)
481 instances.set("detection_mode", domain, "STATIC_CHECK")
482 instances.set("nodeinfo_url" , domain, request)
484 elif response.ok and isinstance(data, list):
485 print(f"UNSUPPORTED: domain='{domain}' returned a list: '{data}'")
487 elif not response.ok or response.status_code >= 400:
488 print("WARNING: Failed fetching nodeinfo from domain:", domain)
489 instances.update_last_error(domain, response)
492 except BaseException as e:
493 # DEBUG: print("DEBUG: Cannot fetch API request:", request)
494 instances.update_last_error(domain, e)
497 # DEBUG: print(f"DEBUG: data()={len(data)} - EXIT!")
500 def fetch_wellknown_nodeinfo(domain: str) -> list:
501 # DEBUG: print(f"DEBUG: domain='{domain}' - CALLED!")
502 if type(domain) != str:
503 raise ValueError(f"Parameter domain[]={type(domain)} is not 'str'")
505 raise ValueError(f"Parameter 'domain' is empty")
507 # DEBUG: print("DEBUG: Fetching .well-known info for domain:", domain)
511 response = get_response(domain, "/.well-known/nodeinfo", api_headers, (config.get("nodeinfo_connection_timeout"), config.get("nodeinfo_read_timeout")))
513 data = json_from_response(response)
514 # DEBUG: print("DEBUG: domain,response.ok,data[]:", domain, response.ok, type(data))
515 if response.ok and isinstance(data, dict):
517 # DEBUG: print("DEBUG: Found entries:", len(nodeinfo), domain)
518 if "links" in nodeinfo:
519 # DEBUG: print("DEBUG: Found links in nodeinfo():", len(nodeinfo["links"]))
520 for link in nodeinfo["links"]:
521 # DEBUG: print("DEBUG: rel,href:", link["rel"], link["href"])
522 if link["rel"] in nodeinfo_identifier:
523 # DEBUG: print("DEBUG: Fetching nodeinfo from:", link["href"])
524 response = get_url(link["href"], api_headers, (config.get("connection_timeout"), config.get("read_timeout")))
526 data = json_from_response(response)
527 # DEBUG: print("DEBUG: href,response.ok,response.status_code:", link["href"], response.ok, response.status_code)
528 if response.ok and isinstance(data, dict):
529 # DEBUG: print("DEBUG: Found JSON nodeinfo():", len(data))
530 instances.set("detection_mode", domain, "AUTO_DISCOVERY")
531 instances.set("nodeinfo_url" , domain, link["href"])
534 print("WARNING: Unknown 'rel' value:", domain, link["rel"])
536 print("WARNING: nodeinfo does not contain 'links':", domain)
538 except BaseException as e:
539 print("WARNING: Failed fetching .well-known info:", domain)
540 instances.update_last_error(domain, e)
543 # DEBUG: print("DEBUG: Returning data[]:", type(data))
546 def fetch_generator_from_path(domain: str, path: str = "/") -> str:
547 # DEBUG: print(f"DEBUG: domain({len(domain)})={domain},path={path} - CALLED!")
548 if type(domain) != str:
549 raise ValueError(f"Parameter domain[]={type(domain)} is not 'str'")
551 raise ValueError(f"Parameter 'domain' is empty")
552 elif type(path) != str:
553 raise ValueError(f"path[]={type(path)} is not 'str'")
555 raise ValueError(f"Parameter 'domain' is empty")
557 # DEBUG: print(f"DEBUG: domain='{domain}',path='{path}' - CALLED!")
561 # DEBUG: print(f"DEBUG: Fetching path='{path}' from '{domain}' ...")
562 response = get_response(domain, path, headers, (config.get("connection_timeout"), config.get("read_timeout")))
564 # DEBUG: print("DEBUG: domain,response.ok,response.status_code,response.text[]:", domain, response.ok, response.status_code, type(response.text))
565 if response.ok and response.status_code < 300 and len(response.text) > 0:
566 # DEBUG: print("DEBUG: Search for <meta name='generator'>:", domain)
567 doc = bs4.BeautifulSoup(response.text, "html.parser")
569 # DEBUG: print("DEBUG: doc[]:", type(doc))
570 generator = doc.find("meta", {"name": "generator"})
571 site_name = doc.find("meta", {"property": "og:site_name"})
573 # DEBUG: print(f"DEBUG: generator='{generator}',site_name='{site_name}'")
574 if isinstance(generator, bs4.element.Tag):
575 # DEBUG: print("DEBUG: Found generator meta tag:", domain)
576 software = tidyup_domain(generator.get("content"))
577 print(f"INFO: domain='{domain}' is generated by '{software}'")
578 instances.set("detection_mode", domain, "GENERATOR")
579 remove_pending_error(domain)
580 elif isinstance(site_name, bs4.element.Tag):
581 # DEBUG: print("DEBUG: Found property=og:site_name:", domain)
582 sofware = tidyup_domain(site_name.get("content"))
583 print(f"INFO: domain='{domain}' has og:site_name='{software}'")
584 instances.set("detection_mode", domain, "SITE_NAME")
585 remove_pending_error(domain)
587 except BaseException as e:
588 # DEBUG: print(f"DEBUG: Cannot fetch / from '{domain}':", e)
589 instances.update_last_error(domain, e)
592 # DEBUG: print(f"DEBUG: software[]={type(software)}")
593 if type(software) is str and software == "":
594 # DEBUG: print(f"DEBUG: Corrected empty string to None for software of domain='{domain}'")
596 elif type(software) is str and ("." in software or " " in software):
597 # DEBUG: print(f"DEBUG: software='{software}' may contain a version number, domain='{domain}', removing it ...")
598 software = remove_version(software)
600 # DEBUG: print(f"DEBUG: software[]={type(software)}")
601 if type(software) is str and " powered by " in software:
602 # DEBUG: print(f"DEBUG: software='{software}' has 'powered by' in it")
603 software = remove_version(strip_powered_by(software))
604 elif type(software) is str and " hosted on " in software:
605 # DEBUG: print(f"DEBUG: software='{software}' has 'hosted on' in it")
606 software = remove_version(strip_hosted_on(software))
607 elif type(software) is str and " by " in software:
608 # DEBUG: print(f"DEBUG: software='{software}' has ' by ' in it")
609 software = strip_until(software, " by ")
610 elif type(software) is str and " see " in software:
611 # DEBUG: print(f"DEBUG: software='{software}' has ' see ' in it")
612 software = strip_until(software, " see ")
614 # DEBUG: print(f"DEBUG: software='{software}' - EXIT!")
617 def determine_software(domain: str, path: str = None) -> str:
618 # DEBUG: print(f"DEBUG: domain({len(domain)})={domain},path={path} - CALLED!")
619 if type(domain) != str:
620 raise ValueError(f"Parameter domain[]={type(domain)} is not 'str'")
622 raise ValueError(f"Parameter 'domain' is empty")
623 elif type(path) != str and path != None:
624 raise ValueError(f"Parameter path[]={type(path)} is not 'str'")
626 # DEBUG: print("DEBUG: Determining software for domain,path:", domain, path)
629 # DEBUG: print(f"DEBUG: Fetching nodeinfo from '{domain}' ...")
630 data = fetch_nodeinfo(domain, path)
632 # DEBUG: print("DEBUG: data[]:", type(data))
633 if not isinstance(data, dict) or len(data) == 0:
634 # DEBUG: print("DEBUG: Could not determine software type:", domain)
635 return fetch_generator_from_path(domain)
637 # DEBUG: print("DEBUG: data():", len(data), data)
638 if "status" in data and data["status"] == "error" and "message" in data:
639 print("WARNING: JSON response is an error:", data["message"])
640 instances.update_last_error(domain, data["message"])
641 return fetch_generator_from_path(domain)
642 elif "message" in data:
643 print("WARNING: JSON response contains only a message:", data["message"])
644 instances.update_last_error(domain, data["message"])
645 return fetch_generator_from_path(domain)
646 elif "software" not in data or "name" not in data["software"]:
647 # DEBUG: print(f"DEBUG: JSON response from domain='{domain}' does not include [software][name], fetching / ...")
648 software = fetch_generator_from_path(domain)
650 # DEBUG: print(f"DEBUG: Generator for domain='{domain}' is: {software}, EXIT!")
653 software = tidyup_domain(data["software"]["name"])
655 # DEBUG: print("DEBUG: sofware after tidyup_domain():", software)
656 if software in ["akkoma", "rebased"]:
657 # DEBUG: print("DEBUG: Setting pleroma:", domain, software)
659 elif software in ["hometown", "ecko"]:
660 # DEBUG: print("DEBUG: Setting mastodon:", domain, software)
661 software = "mastodon"
662 elif software in ["calckey", "groundpolis", "foundkey", "cherrypick", "meisskey"]:
663 # DEBUG: print("DEBUG: Setting misskey:", domain, software)
665 elif software.find("/") > 0:
666 print("WARNING: Spliting of slash:", software)
667 software = tidup_domain(software.split("/")[-1]);
668 elif software.find("|") > 0:
669 print("WARNING: Spliting of pipe:", software)
670 software = tidyup_domain(software.split("|")[0]);
671 elif "powered by" in software:
672 # DEBUG: print(f"DEBUG: software='{software}' has 'powered by' in it")
673 software = strip_powered_by(software)
674 elif type(software) is str and " by " in software:
675 # DEBUG: print(f"DEBUG: software='{software}' has ' by ' in it")
676 software = strip_until(software, " by ")
677 elif type(software) is str and " see " in software:
678 # DEBUG: print(f"DEBUG: software='{software}' has ' see ' in it")
679 software = strip_until(software, " see ")
681 # DEBUG: print(f"DEBUG: software[]={type(software)}")
683 print("WARNING: tidyup_domain() left no software name behind:", domain)
686 # DEBUG: print(f"DEBUG: software[]={type(software)}")
687 if str(software) == "":
688 # DEBUG: print(f"DEBUG: software for '{domain}' was not detected, trying generator ...")
689 software = fetch_generator_from_path(domain)
690 elif len(str(software)) > 0 and ("." in software or " " in software):
691 # DEBUG: print(f"DEBUG: software='{software}' may contain a version number, domain='{domain}', removing it ...")
692 software = remove_version(software)
694 # DEBUG: print(f"DEBUG: software[]={type(software)}")
695 if type(software) is str and "powered by" in software:
696 # DEBUG: print(f"DEBUG: software='{software}' has 'powered by' in it")
697 software = remove_version(strip_powered_by(software))
699 # DEBUG: print("DEBUG: Returning domain,software:", domain, software)
702 def send_bot_post(instance: str, blocklist: dict):
703 # DEBUG: print(f"DEBUG: instance={instance},blocklist()={len(blocklist)} - CALLED!")
704 if type(domain) != str:
705 raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
707 raise ValueError("Parameter 'domain' is empty")
708 elif type(blocklist) != dict:
709 raise ValueError(f"Parameter blocklist[]='{type(blocklist)}' is not 'dict'")
711 message = instance + " has blocked the following instances:\n\n"
714 if len(blocklist) > 20:
716 blocklist = blocklist[0 : 19]
718 # DEBUG: print(f"DEBUG: blocklist()={len(blocklist)}")
719 for block in blocklist:
720 # DEBUG: print(f"DEBUG: block['{type(block)}']={block}")
721 if block["reason"] == None or block["reason"] == '':
722 message = message + block["blocked"] + " with unspecified reason\n"
724 if len(block["reason"]) > 420:
725 block["reason"] = block["reason"][0:419] + "[…]"
727 message = message + block["blocked"] + ' for "' + block["reason"].replace("@", "@\u200b") + '"\n'
730 message = message + "(the list has been truncated to the first 20 entries)"
732 botheaders = {**api_headers, **{"Authorization": "Bearer " + config.get("bot_token")}}
735 f"{config.get('bot_instance')}/api/v1/statuses",
738 "visibility" : config.get('bot_visibility'),
739 "content_type": "text/plain"
747 def fetch_friendica_blocks(domain: str) -> dict:
748 # DEBUG: print(f"DEBUG: domain='{domain}' - CALLED!")
749 if type(domain) != str:
750 raise ValueError(f"Parameter domain[]={type(domain)} is not 'str'")
752 raise ValueError(f"Parameter 'domain' is empty")
754 # DEBUG: print("DEBUG: Fetching friendica blocks from domain:", domain)
758 doc = bs4.BeautifulSoup(
759 get_response(domain, "/friendica", headers, (config.get("connection_timeout"), config.get("read_timeout"))).text,
762 except BaseException as e:
763 print("WARNING: Failed to fetch /friendica from domain:", domain, e)
764 instances.update_last_error(domain, e)
767 blocklist = doc.find(id="about_blocklist")
769 # Prevents exceptions:
770 if blocklist is None:
771 # DEBUG: print("DEBUG: Instance has no block list:", domain)
774 table = blocklist.find("table")
776 # DEBUG: print(f"DEBUG: table[]='{type(table)}'")
777 if table.find("tbody"):
778 rows = table.find("tbody").find_all("tr")
780 rows = table.find_all("tr")
782 # DEBUG: print(f"DEBUG: Found rows()={len(rows)}")
784 # DEBUG: print(f"DEBUG: line='{line}'")
786 "domain": tidyup_domain(line.find_all("td")[0].text),
787 "reason": tidyup_reason(line.find_all("td")[1].text)
789 # DEBUG: print("DEBUG: Next!")
791 # DEBUG: print("DEBUG: Returning blocklist() for domain:", domain, len(blocklist))
796 def tidyup_reason(reason: str) -> str:
797 # DEBUG: print(f"DEBUG: reason='{reason}' - CALLED!")
798 if type(reason) != str:
799 raise ValueError(f"Parameter reason[]={type(reason)} is not 'str'")
802 reason = reason.strip()
805 reason = re.sub("â", "\"", reason)
807 # DEBUG: print(f"DEBUG: reason='{reason}' - EXIT!")
810 def tidyup_domain(domain: str) -> str:
811 # DEBUG: print(f"DEBUG: domain='{domain}' - CALLED!")
812 if type(domain) != str:
813 raise ValueError(f"Parameter domain[]={type(domain)} is not 'str'")
815 # All lower-case and strip spaces out + last dot
816 domain = domain.lower().strip().rstrip(".")
819 domain = re.sub("\:\d+$", "", domain)
821 # No protocol, sometimes without the slashes
822 domain = re.sub("^https?\:(\/*)", "", domain)
825 domain = re.sub("\/$", "", domain)
828 domain = re.sub("^\@", "", domain)
830 # No individual users in block lists
831 domain = re.sub("(.+)\@", "", domain)
832 if domain.find("/profile/"):
833 domain = domain.split("/profile/")[0]
834 elif domain.find("/users/"):
835 domain = domain.split("/users/")[0]
837 # DEBUG: print(f"DEBUG: domain='{domain}' - EXIT!")
840 def json_from_response(response: requests.models.Response) -> list:
841 # DEBUG: print(f"DEBUG: response[]={type(response)} - CALLED!")
842 if not isinstance(response, requests.models.Response):
843 raise ValueError(f"Parameter response[]='{type(response)}' is not type of 'Response'")
846 if response.text.strip() != "":
847 # DEBUG: print(f"DEBUG: response.text()={len(response.text)} is not empty, invoking response.json() ...")
849 data = response.json()
850 except json.decoder.JSONDecodeError:
853 # DEBUG: print(f"DEBUG: data[]={type(data)} - EXIT!")
856 def get_response(domain: str, path: str, headers: dict, timeout: list) -> requests.models.Response:
857 # DEBUG: print(f"DEBUG: domain='{domain}',path='{path}',headers()={len(headers)},timeout={timeout} - CALLED!")
858 if type(domain) != str:
859 raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
861 raise ValueError("Parameter 'domain' is empty")
862 elif type(path) != str:
863 raise ValueError(f"Parameter path[]='{type(path)}' is not 'str'")
865 raise ValueError("Parameter 'path' is empty")
868 # DEBUG: print(f"DEBUG: Sending request to '{domain}{path}' ...")
869 response = reqto.get(
870 f"https://{domain}{path}",
874 except requests.exceptions.ConnectionError as e:
875 # DEBUG: print(f"DEBUG: Fetching '{path}' from '{domain}' failed. exception[{type(e)}]='{str(e)}'")
876 instances.update_last_error(domain, e)
879 # DEBUG: print(f"DEBUG: response[]='{type(response)}' - EXXIT!")
882 def has_key(keys: list, search: str, value: any) -> bool:
883 # DEBUG: print(f"DEBUG: keys()={len(keys)},search='{search}',value[]='{type(value)}' - CALLED!")
884 if type(keys) != list:
885 raise ValueError(f"Parameter keys[]='{type(keys)}' is not 'list'")
886 elif type(search) != str:
887 raise ValueError(f"Parameter search[]='{type(search)}' is not 'str'")
889 raise ValueError("Parameter 'search' is empty")
892 # DEBUG: print(f"DEBUG: Checking keys()={len(keys)} ...")
894 # DEBUG: print(f"DEBUG: key['{type(key)}']={key}")
895 if type(key) != dict:
896 raise ValueError(f"key[]='{type(key)}' is not 'dict'")
897 elif not search in key:
898 raise KeyError(f"Cannot find search='{search}'")
899 elif key[search] == value:
903 # DEBUG: print(f"DEBUG: has={has} - EXIT!")
906 def find_domains(tag: bs4.element.Tag) -> list:
907 # DEBUG: print(f"DEBUG: tag[]={type(tag)} - CALLED!")
908 if not isinstance(tag, bs4.element.Tag):
909 raise ValueError(f"Parameter tag[]={type(tag)} is not type of bs4.element.Tag")
910 elif not isinstance(tag, bs4.element.Tag):
911 raise KeyError("Cannot find table with instances!")
912 elif len(tag.select("tr")) == 0:
913 raise KeyError("No table rows found in table!")
916 for element in tag.select("tr"):
917 # DEBUG: print(f"DEBUG: element[]={type(element)}")
918 if not element.find("td"):
919 # DEBUG: print("DEBUG: Skipping element, no <td> found")
922 domain = tidyup_domain(element.find("td").text)
923 reason = tidyup_reason(element.findAll("td")[1].text)
925 # DEBUG: print(f"DEBUG: domain='{domain}',reason='{reason}'")
927 if blacklist.is_blacklisted(domain):
928 print(f"WARNING: domain='{domain}' is blacklisted - skipped!")
930 elif domain == "gab.com/.ai, develop.gab.com":
931 # DEBUG: print(f"DEBUG: Multiple domains detected in one row")
941 "domain": "develop.gab.com",
945 elif not validators.domain(domain):
946 print(f"WARNING: domain='{domain}' is not a valid domain - skipped!")
949 # DEBUG: print(f"DEBUG: Adding domain='{domain}' ...")
955 # DEBUG: print(f"DEBUG: domains()={len(domains)} - EXIT!")
958 def get_url(url: str, headers: dict, timeout: list) -> requests.models.Response:
959 # DEBUG: print(f"DEBUG: url='{url}',headers()={len(headers)},timeout={timeout} - CALLED!")
961 raise ValueError(f"Parameter url[]='{type(url)}' is not 'str'")
963 raise ValueError("Parameter 'url' is empty")
965 # DEBUG: print(f"DEBUG: Parsing url='{url}'")
966 components = urlparse(url)
968 # Invoke other function, avoid trailing ?
969 # DEBUG: print(f"DEBUG: components[{type(components)}]={components}")
970 if components.query != "":
971 response = get_response(components.hostname, f"{components.path}?{components.query}", headers, timeout)
973 response = get_response(components.hostname, f"{components.path}", headers, timeout)
975 # DEBUG: print(f"DEBUG: response[]='{type(response)}' - EXXIT!")