1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU Affero General Public License for more details.
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <https://www.gnu.org/licenses/>.
28 from urllib.parse import urlparse
31 from fba import config
32 from fba import instances
34 # Don't check these, known trolls/flooders/testing/developing
36 # Floods network with fake nodes as "research" project
37 "activitypub-troll.cf",
43 "social.shrimpcam.pw",
45 "mastotroll.netz.org",
46 # Testing/developing installations
49 "misskeytest.chn.moe",
52 # Array with pending errors needed to be written to database
56 # "rel" identifiers (no real URLs)
57 nodeinfo_identifier = [
58 "https://nodeinfo.diaspora.software/ns/schema/2.1",
59 "https://nodeinfo.diaspora.software/ns/schema/2.0",
60 "https://nodeinfo.diaspora.software/ns/schema/1.1",
61 "https://nodeinfo.diaspora.software/ns/schema/1.0",
62 "http://nodeinfo.diaspora.software/ns/schema/2.1",
63 "http://nodeinfo.diaspora.software/ns/schema/2.0",
64 "http://nodeinfo.diaspora.software/ns/schema/1.1",
65 "http://nodeinfo.diaspora.software/ns/schema/1.0",
68 # HTTP headers for non-API requests
70 "User-Agent": config.get("useragent"),
73 # HTTP headers for API requests
75 "User-Agent": config.get("useragent"),
76 "Content-Type": "application/json",
81 "Silenced instances" : "Silenced servers",
82 "Suspended instances" : "Suspended servers",
83 "Limited instances" : "Limited servers",
84 # Mappuing German -> English
85 "Gesperrte Server" : "Suspended servers",
86 "Gefilterte Medien" : "Filtered media",
87 "Stummgeschaltete Server" : "Silenced servers",
89 "停止済みのサーバー" : "Suspended servers",
90 "制限中のサーバー" : "Limited servers",
91 "メディアを拒否しているサーバー": "Filtered media",
92 "サイレンス済みのサーバー" : "Silenced servers",
94 "שרתים מושעים" : "Suspended servers",
95 "מדיה מסוננת" : "Filtered media",
96 "שרתים מוגבלים" : "Silenced servers",
98 "Serveurs suspendus" : "Suspended servers",
99 "Médias filtrés" : "Filtered media",
100 "Serveurs limités" : "Limited servers",
101 "Serveurs modérés" : "Limited servers",
104 # URL for fetching peers
105 get_peers_url = "/api/v1/instance/peers"
107 # Connect to database
108 connection = sqlite3.connect("blocks.db")
109 cursor = connection.cursor()
111 # Pattern instance for version numbers
113 # semantic version number (with v|V) prefix)
114 re.compile("^(?P<version>v|V{0,1})(\.{0,1})(?P<major>0|[1-9]\d*)\.(?P<minor>0+|[1-9]\d*)(\.(?P<patch>0+|[1-9]\d*)(?:-(?P<prerelease>(?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*)(?:\.(?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*))*))?(?:\+(?P<buildmetadata>[0-9a-zA-Z-]+(?:\.[0-9a-zA-Z-]+)*))?)?$"),
115 # non-sematic, e.g. 1.2.3.4
116 re.compile("^(?P<version>v|V{0,1})(\.{0,1})(?P<major>0|[1-9]\d*)\.(?P<minor>0+|[1-9]\d*)(\.(?P<patch>0+|[1-9]\d*)(\.(?P<subpatch>0|[1-9]\d*))?)$"),
117 # non-sematic, e.g. 2023-05[-dev]
118 re.compile("^(?P<year>[1-9]{1}[0-9]{3})\.(?P<month>[0-9]{2})(-dev){0,1}$"),
119 # non-semantic, e.g. abcdef0
120 re.compile("^[a-f0-9]{7}$"),
123 ##### Other functions #####
125 def is_primitive(var: any) -> bool:
126 # DEBUG: print(f"DEBUG: var[]='{type(var)}' - CALLED!")
127 return type(var) in {int, str, float, bool} or var == None
129 def fetch_instances(domain: str, origin: str, software: str, script: str, path: str = None):
130 # DEBUG: print(f"DEBUG: domain='{domain}',origin='{origin}',software='{software}',path='{path}' - CALLED!")
131 if type(domain) != str:
132 raise ValueError(f"Parameter domain[]={type(domain)} is not 'str'")
134 raise ValueError(f"Parameter 'domain' cannot be empty")
135 elif type(origin) != str and origin != None:
136 raise ValueError(f"Parameter origin[]={type(origin)} is not 'str'")
137 elif type(script) != str:
138 raise ValueError(f"Parameter script[]={type(script)} is not 'str'")
140 raise ValueError(f"Parameter 'domain' cannot be empty")
142 if not is_instance_registered(domain):
143 # DEBUG: print("DEBUG: Adding new domain:", domain, origin)
144 add_instance(domain, origin, script, path)
146 # DEBUG: print("DEBUG: Fetching instances for domain:", domain, software)
147 peerlist = get_peers(domain, software)
149 if (peerlist is None):
150 print("ERROR: Cannot fetch peers:", domain)
152 elif instances.has_pending_instance_data(domain):
153 # DEBUG: print(f"DEBUG: domain='{domain}' has pending nodeinfo data, flushing ...")
154 instances.update_instance_data(domain)
156 print(f"INFO: Checking {len(peerlist)} instances from {domain} ...")
157 for instance in peerlist:
159 # Skip "None" types as tidup() cannot parse them
162 # DEBUG: print(f"DEBUG: instance='{instance}' - BEFORE")
163 instance = tidyup_domain(instance)
164 # DEBUG: print(f"DEBUG: instance='{instance}' - AFTER")
167 print("WARNING: Empty instance after tidyup_domain(), domain:", domain)
169 elif not validators.domain(instance.split("/")[0]):
170 print(f"WARNING: Bad instance='{instance}' from domain='{domain}',origin='{origin}',software='{software}'")
172 elif is_blacklisted(instance):
173 # DEBUG: print("DEBUG: instance is blacklisted:", instance)
176 # DEBUG: print("DEBUG: Handling instance:", instance)
178 if not is_instance_registered(instance):
179 # DEBUG: print("DEBUG: Adding new instance:", instance, domain)
180 add_instance(instance, domain, script)
181 except BaseException as e:
182 print(f"ERROR: instance='{instance}',exception[{type(e)}]:'{str(e)}'")
185 # DEBUG: print("DEBUG: EXIT!")
187 def add_peers(rows: dict) -> list:
188 # DEBUG: print(f"DEBUG: rows()={len(rows)} - CALLED!")
190 for key in ["linked", "allowed", "blocked"]:
191 # DEBUG: print(f"DEBUG: Checking key='{key}'")
192 if key in rows and rows[key] != None:
193 # DEBUG: print(f"DEBUG: Adding {len(rows[key])} peer(s) to peers list ...")
194 for peer in rows[key]:
195 # DEBUG: print(f"DEBUG: peer='{peer}' - BEFORE!")
196 peer = tidyup_domain(peer)
198 # DEBUG: print(f"DEBUG: peer='{peer}' - AFTER!")
199 if is_blacklisted(peer):
200 # DEBUG: print(f"DEBUG: peer='{peer}' is blacklisted, skipped!")
203 # DEBUG: print(f"DEBUG: Adding peer='{peer}' ...")
206 # DEBUG: print(f"DEBUG: peers()={len(peers)} - EXIT!")
209 def remove_version(software: str) -> str:
210 # DEBUG: print(f"DEBUG: software='{software}' - CALLED!")
211 if not "." in software and " " not in software:
212 print(f"WARNING: software='{software}' does not contain a version number.")
217 temp = software.split(";")[0]
218 elif "," in software:
219 temp = software.split(",")[0]
220 elif " - " in software:
221 temp = software.split(" - ")[0]
223 # DEBUG: print(f"DEBUG: software='{software}'")
226 version = temp.split(" ")[-1]
227 elif "/" in software:
228 version = temp.split("/")[-1]
229 elif "-" in software:
230 version = temp.split("-")[-1]
232 # DEBUG: print(f"DEBUG: Was not able to find common seperator, returning untouched software='{software}'")
237 # DEBUG: print(f"DEBUG: Checking {len(patterns)} patterns ...")
238 for pattern in patterns:
240 match = pattern.match(version)
242 # DEBUG: print(f"DEBUG: match[]={type(match)}")
243 if type(match) is re.Match:
246 # DEBUG: print(f"DEBUG: version[{type(version)}]='{version}',match='{match}'")
247 if type(match) is not re.Match:
248 print(f"WARNING: version='{version}' does not match regex, leaving software='{software}' untouched.")
251 # DEBUG: print(f"DEBUG: Found valid version number: '{version}', removing it ...")
252 end = len(temp) - len(version) - 1
254 # DEBUG: print(f"DEBUG: end[{type(end)}]={end}")
255 software = temp[0:end].strip()
256 if " version" in software:
257 # DEBUG: print(f"DEBUG: software='{software}' contains word ' version'")
258 software = strip_until(software, " version")
260 # DEBUG: print(f"DEBUG: software='{software}' - EXIT!")
263 def strip_powered_by(software: str) -> str:
264 # DEBUG: print(f"DEBUG: software='{software}' - CALLED!")
266 print(f"ERROR: Bad method call, 'software' is empty")
267 raise Exception("Parameter 'software' is empty")
268 elif not "powered by" in software:
269 print(f"WARNING: Cannot find 'powered by' in '{software}'!")
272 start = software.find("powered by ")
273 # DEBUG: print(f"DEBUG: start[{type(start)}]='{start}'")
275 software = software[start + 11:].strip()
276 # DEBUG: print(f"DEBUG: software='{software}'")
278 software = strip_until(software, " - ")
280 # DEBUG: print(f"DEBUG: software='{software}' - EXIT!")
283 def strip_hosted_on(software: str) -> str:
284 # DEBUG: print(f"DEBUG: software='{software}' - CALLED!")
286 print(f"ERROR: Bad method call, 'software' is empty")
287 raise Exception("Parameter 'software' is empty")
288 elif not "hosted on" in software:
289 print(f"WARNING: Cannot find 'hosted on' in '{software}'!")
292 end = software.find("hosted on ")
293 # DEBUG: print(f"DEBUG: end[{type(end)}]='{end}'")
295 software = software[0, start].strip()
296 # DEBUG: print(f"DEBUG: software='{software}'")
298 software = strip_until(software, " - ")
300 # DEBUG: print(f"DEBUG: software='{software}' - EXIT!")
303 def strip_until(software: str, until: str) -> str:
304 # DEBUG: print(f"DEBUG: software='{software}',until='{until}' - CALLED!")
306 print(f"ERROR: Bad method call, 'software' is empty")
307 raise Exception("Parameter 'software' is empty")
309 print(f"ERROR: Bad method call, 'until' is empty")
310 raise Exception("Parameter 'until' is empty")
311 elif not until in software:
312 print(f"WARNING: Cannot find '{until}' in '{software}'!")
315 # Next, strip until part
316 end = software.find(until)
318 # DEBUG: print(f"DEBUG: end[{type(end)}]='{end}'")
320 software = software[0:end].strip()
322 # DEBUG: print(f"DEBUG: software='{software}' - EXIT!")
325 def is_blacklisted(domain: str) -> bool:
326 if type(domain) != str:
327 raise ValueError(f"Parameter domain[]={type(domain)} is not 'str'")
329 raise ValueError(f"Parameter 'domain' cannot be empty")
332 for peer in blacklist:
338 def remove_pending_error(domain: str):
339 if type(domain) != str:
340 raise ValueError(f"Parameter domain[]={type(domain)} is not 'str'")
342 raise ValueError(f"Parameter 'domain' cannot be empty")
345 # Prevent updating any pending errors, nodeinfo was found
346 del pending_errors[domain]
351 # DEBUG: print("DEBUG: EXIT!")
353 def get_hash(domain: str) -> str:
354 if type(domain) != str:
355 raise ValueError(f"Parameter domain[]={type(domain)} is not 'str'")
357 raise ValueError(f"Parameter 'domain' cannot be empty")
359 return hashlib.sha256(domain.encode("utf-8")).hexdigest()
361 def update_last_blocked(domain: str):
362 if type(domain) != str:
363 raise ValueError(f"Parameter domain[]={type(domain)} is not 'str'")
365 raise ValueError(f"Parameter 'domain' cannot be empty")
367 # DEBUG: print("DEBUG: Updating last_blocked for domain", domain)
368 instances.set("last_blocked", domain, time.time())
370 # Running pending updated
371 # DEBUG: print(f"DEBUG: Invoking instances.update_instance_data({domain}) ...")
372 instances.update_instance_data(domain)
374 # DEBUG: print("DEBUG: EXIT!")
376 def log_error(domain: str, response: requests.models.Response):
377 # DEBUG: print("DEBUG: domain,response[]:", domain, type(response))
378 if type(domain) != str:
379 raise ValueError(f"Parameter domain[]={type(domain)} is not 'str'")
381 raise ValueError(f"Parameter 'domain' cannot be empty")
384 # DEBUG: print("DEBUG: BEFORE response[]:", type(response))
385 if isinstance(response, BaseException) or isinstance(response, json.decoder.JSONDecodeError):
386 response = str(response)
388 # DEBUG: print("DEBUG: AFTER response[]:", type(response))
389 if type(response) is str:
390 cursor.execute("INSERT INTO error_log (domain, error_code, error_message, created) VALUES (?, 999, ?, ?)",[
396 cursor.execute("INSERT INTO error_log (domain, error_code, error_message, created) VALUES (?, ?, ?, ?)",[
398 response.status_code,
403 # Cleanup old entries
404 # DEBUG: print(f"DEBUG: Purging old records (distance: {config.get('error_log_cleanup')})")
405 cursor.execute("DELETE FROM error_log WHERE created < ?", [time.time() - config.get("error_log_cleanup")])
406 except BaseException as e:
407 print(f"ERROR: failed SQL query: domain='{domain}',exception[{type(e)}]:'{str(e)}'")
410 # DEBUG: print("DEBUG: EXIT!")
412 def update_last_error(domain: str, response: requests.models.Response):
413 # DEBUG: print("DEBUG: domain,response[]:", domain, type(response))
414 if type(domain) != str:
415 raise ValueError(f"Parameter domain[]={type(domain)} is not 'str'")
417 raise ValueError(f"Parameter 'domain' cannot be empty")
419 # DEBUG: print("DEBUG: BEFORE response[]:", type(response))
420 if isinstance(response, BaseException) or isinstance(response, json.decoder.JSONDecodeError):
421 response = f"{type}:str(response)"
423 # DEBUG: print("DEBUG: AFTER response[]:", type(response))
424 if type(response) is str:
425 # DEBUG: print(f"DEBUG: Setting last_error_details='{response}'");
426 instances.set("last_status_code" , domain, 999)
427 instances.set("last_error_details", domain, response)
429 # DEBUG: print(f"DEBUG: Setting last_error_details='{response.reason}'");
430 instances.set("last_status_code" , domain, response.status_code)
431 instances.set("last_error_details", domain, response.reason)
433 # Running pending updated
434 # DEBUG: print(f"DEBUG: Invoking instances.update_instance_data({domain}) ...")
435 instances.update_instance_data(domain)
437 log_error(domain, response)
439 # DEBUG: print("DEBUG: EXIT!")
441 def update_last_instance_fetch(domain: str):
442 # DEBUG: print(f"DEBUG: domain='{domain}' - CALLED!")
443 if type(domain) != str:
444 raise ValueError(f"Parameter domain[]={type(domain)} is not 'str'")
446 raise ValueError(f"Parameter 'domain' cannot be empty")
448 # DEBUG: print("DEBUG: Updating last_instance_fetch for domain:", domain)
449 instances.set("last_instance_fetch", domain, time.time())
451 # Running pending updated
452 # DEBUG: print(f"DEBUG: Invoking instances.update_instance_data({domain}) ...")
453 instances.update_instance_data(domain)
455 # DEBUG: print("DEBUG: EXIT!")
457 def update_last_nodeinfo(domain: str):
458 # DEBUG: print(f"DEBUG: domain='{domain}' - CALLED!")
459 if type(domain) != str:
460 raise ValueError(f"Parameter domain[]={type(domain)} is not 'str'")
462 raise ValueError(f"Parameter 'domain' cannot be empty")
464 # DEBUG: print("DEBUG: Updating last_nodeinfo for domain:", domain)
465 instances.set("last_nodeinfo", domain, time.time())
466 instances.set("last_updated" , domain, time.time())
468 # Running pending updated
469 # DEBUG: print(f"DEBUG: Invoking instances.update_instance_data({domain}) ...")
470 instances.update_instance_data(domain)
472 # DEBUG: print("DEBUG: EXIT!")
474 def get_peers(domain: str, software: str) -> list:
475 # DEBUG: print(f"DEBUG: domain({len(domain)})={domain},software={software} - CALLED!")
476 if type(domain) != str:
477 raise ValueError(f"Parameter domain[]={type(domain)} is not 'str'")
479 raise ValueError(f"Parameter 'domain' cannot be empty")
480 elif type(software) != str and software != None:
481 raise ValueError(f"software[]={type(software)} is not 'str'")
485 if software == "misskey":
486 # DEBUG: print(f"DEBUG: domain='{domain}' is misskey, sending API POST request ...")
488 step = config.get("misskey_limit")
490 # iterating through all "suspended" (follow-only in its terminology)
491 # instances page-by-page, since that troonware doesn't support
492 # sending them all at once
494 # DEBUG: print(f"DEBUG: Fetching offset='{offset}' from '{domain}' ...")
496 fetched = post_json_api(domain, "/api/federation/instances", json.dumps({
504 fetched = post_json_api(domain, "/api/federation/instances", json.dumps({
513 # DEBUG: print(f"DEBUG: fetched()={len(fetched)}")
514 if len(fetched) == 0:
515 # DEBUG: print("DEBUG: Returned zero bytes, exiting loop:", domain)
517 elif len(fetched) != config.get("misskey_limit"):
518 # DEBUG: print(f"DEBUG: Fetched '{len(fetched)}' row(s) but expected: '{config.get('misskey_limit')}'")
519 offset = offset + (config.get("misskey_limit") - len(fetched))
521 # DEBUG: print("DEBUG: Raising offset by step:", step)
522 offset = offset + step
525 # DEBUG: print(f"DEBUG: fetched({len(fetched)})[]={type(fetched)}")
526 if isinstance(fetched, dict) and "error" in fetched and "message" in fetched["error"]:
527 print(f"WARNING: post_json_api() returned error: {fetched['error']['message']}")
528 update_last_error(domain, fetched["error"]["message"])
533 # DEBUG: print(f"DEBUG: row():{len(row)}")
534 if not "host" in row:
535 print(f"WARNING: row()={len(row)} does not contain key 'host': {row},domain='{domain}'")
537 elif type(row["host"]) != str:
538 print(f"WARNING: row[host][]={type(row['host'])} is not 'str'")
540 elif is_blacklisted(row["host"]):
541 # DEBUG: print(f"DEBUG: row[host]='{row['host']}' is blacklisted. domain='{domain}'")
543 elif row["host"] in peers:
544 # DEBUG: print(f"DEBUG: Not adding row[host]='{row['host']}', already found.")
545 already = already + 1
548 # DEBUG: print(f"DEBUG: Adding peer: '{row['host']}'")
549 peers.append(row["host"])
551 if already == len(fetched):
552 print(f"WARNING: Host returned same set of '{already}' instances, aborting loop!")
555 # DEBUG: print(f"DEBUG: Adding '{len(peers)}' for domain='{domain}'")
556 instances.set("total_peers", domain, len(peers))
558 # DEBUG: print(f"DEBUG: Updating last_instance_fetch for domain='{domain}' ...")
559 update_last_instance_fetch(domain)
561 # DEBUG: print("DEBUG: Returning peers[]:", type(peers))
563 elif software == "lemmy":
564 # DEBUG: print(f"DEBUG: domain='{domain}' is Lemmy, fetching JSON ...")
566 response = get_response(domain, "/api/v3/site", api_headers, (config.get("connection_timeout"), config.get("read_timeout")))
568 data = json_from_response(response)
570 # DEBUG: print(f"DEBUG: response.ok={response.ok},response.status_code='{response.status_code}',data[]='{type(data)}'")
571 if not response.ok or response.status_code >= 400:
572 print("WARNING: Could not reach any JSON API:", domain)
573 update_last_error(domain, response)
574 elif response.ok and isinstance(data, list):
575 # DEBUG: print(f"DEBUG: domain='{domain}' returned a list: '{data}'")
577 elif "federated_instances" in data:
578 # DEBUG: print(f"DEBUG: Found federated_instances for domain='{domain}'")
579 peers = peers + add_peers(data["federated_instances"])
580 # DEBUG: print("DEBUG: Added instance(s) to peers")
582 print("WARNING: JSON response does not contain 'federated_instances':", domain)
583 update_last_error(domain, response)
585 except BaseException as e:
586 print(f"WARNING: Exception during fetching JSON: domain='{domain}',exception[{type(e)}]:'{str(e)}'")
588 # DEBUG: print(f"DEBUG: Adding '{len(peers)}' for domain='{domain}'")
589 instances.set("total_peers", domain, len(peers))
591 # DEBUG: print(f"DEBUG: Updating last_instance_fetch for domain='{domain}' ...")
592 update_last_instance_fetch(domain)
594 # DEBUG: print("DEBUG: Returning peers[]:", type(peers))
596 elif software == "peertube":
597 # DEBUG: print(f"DEBUG: domain='{domain}' is a PeerTube, fetching JSON ...")
600 for mode in ["followers", "following"]:
601 # DEBUG: print(f"DEBUG: domain='{domain}',mode='{mode}'")
604 response = get_response(domain, "/api/v1/server/{mode}?start={start}&count=100", headers, (config.get("connection_timeout"), config.get("read_timeout")))
606 data = json_from_response(response)
607 # DEBUG: print(f"DEBUG: response.ok={response.ok},response.status_code='{response.status_code}',data[]='{type(data)}'")
608 if response.ok and isinstance(data, dict):
609 # DEBUG: print("DEBUG: Success, data:", len(data))
611 # DEBUG: print(f"DEBUG: Found {len(data['data'])} record(s).")
612 for record in data["data"]:
613 # DEBUG: print(f"DEBUG: record()={len(record)}")
614 if mode in record and "host" in record[mode]:
615 # DEBUG: print(f"DEBUG: Found host={record[mode]['host']}, adding ...")
616 peers.append(record[mode]["host"])
618 print(f"WARNING: record from '{domain}' has no '{mode}' or 'host' record: {record}")
620 if len(data["data"]) < 100:
621 # DEBUG: print("DEBUG: Reached end of JSON response:", domain)
624 # Continue with next row
627 except BaseException as e:
628 print(f"WARNING: Exception during fetching JSON: domain='{domain}',exception[{type(e)}]:'{str(e)}'")
630 # DEBUG: print(f"DEBUG: Adding '{len(peers)}' for domain='{domain}'")
631 instances.set("total_peers", domain, len(peers))
633 # DEBUG: print(f"DEBUG: Updating last_instance_fetch for domain='{domain}' ...")
634 update_last_instance_fetch(domain)
636 # DEBUG: print("DEBUG: Returning peers[]:", type(peers))
639 # DEBUG: print(f"DEBUG: Fetching get_peers_url='{get_peers_url}' from '{domain}' ...")
641 response = get_response(domain, get_peers_url, api_headers, (config.get("connection_timeout"), config.get("read_timeout")))
643 data = json_from_response(response)
645 # DEBUG: print(f"DEBUG: response.ok={response.ok},response.status_code={response.status_code},data[]='{type(data)}'")
646 if not response.ok or response.status_code >= 400:
647 # DEBUG: print(f"DEBUG: Was not able to fetch '{get_peers_url}', trying alternative ...")
648 response = get_response(domain, "/api/v3/site", api_headers, (config.get("connection_timeout"), config.get("read_timeout")))
650 data = json_from_response(response)
651 # DEBUG: print(f"DEBUG: response.ok={response.ok},response.status_code={response.status_code},data[]='{type(data)}'")
652 if not response.ok or response.status_code >= 400:
653 print("WARNING: Could not reach any JSON API:", domain)
654 update_last_error(domain, response)
655 elif response.ok and isinstance(data, list):
656 # DEBUG: print(f"DEBUG: domain='{domain}' returned a list: '{data}'")
658 elif "federated_instances" in data:
659 # DEBUG: print(f"DEBUG: Found federated_instances for domain='{domain}'")
660 peers = peers + add_peers(data["federated_instances"])
661 # DEBUG: print("DEBUG: Added instance(s) to peers")
663 print("WARNING: JSON response does not contain 'federated_instances':", domain)
664 update_last_error(domain, response)
666 # DEBUG: print("DEBUG: Querying API was successful:", domain, len(data))
669 except BaseException as e:
670 print("WARNING: Some error during get():", domain, e)
671 update_last_error(domain, e)
673 # DEBUG: print(f"DEBUG: Adding '{len(peers)}' for domain='{domain}'")
674 instances.set("total_peers", domain, len(peers))
676 # DEBUG: print(f"DEBUG: Updating last_instance_fetch for domain='{domain}' ...")
677 update_last_instance_fetch(domain)
679 # DEBUG: print("DEBUG: Returning peers[]:", type(peers))
682 def post_json_api(domain: str, path: str, parameter: str, extra_headers: dict = {}) -> dict:
683 if type(domain) != str:
684 raise ValueError(f"Parameter domain[]={type(domain)} is not 'str'")
686 raise ValueError(f"Parameter 'domain' cannot be empty")
687 elif type(path) != str:
688 raise ValueError(f"path[]={type(path)} is not 'str'")
690 raise ValueError("Parameter 'path' cannot be empty")
691 elif type(parameter) != str:
692 raise ValueError(f"parameter[]={type(parameter)} is not 'str'")
694 # DEBUG: print("DEBUG: Sending POST to domain,path,parameter:", domain, path, parameter, extra_headers)
697 response = reqto.post(
698 f"https://{domain}{path}",
700 headers={**api_headers, **extra_headers},
701 timeout=(config.get("connection_timeout"), config.get("read_timeout"))
704 data = json_from_response(response)
705 # DEBUG: print(f"DEBUG: response.ok={response.ok},response.status_code={response.status_code},data[]='{type(data)}'")
706 if not response.ok or response.status_code >= 400:
707 print(f"WARNING: Cannot query JSON API: domain='{domain}',path='{path}',parameter()={len(parameter)},response.status_code='{response.status_code}',data[]='{type(data)}'")
708 update_last_error(domain, response)
710 except BaseException as e:
711 print(f"WARNING: Some error during post(): domain='{domain}',path='{path}',parameter()={len(parameter)},exception[{type(e)}]:'{str(e)}'")
713 # DEBUG: print(f"DEBUG: Returning data({len(data)})=[]:{type(data)}")
716 def fetch_nodeinfo(domain: str, path: str = None) -> list:
717 # DEBUG: print(f"DEBUG: domain='{domain}',path={path} - CALLED!")
718 if type(domain) != str:
719 raise ValueError(f"Parameter domain[]={type(domain)} is not 'str'")
721 raise ValueError(f"Parameter 'domain' cannot be empty")
722 elif type(path) != str and path != None:
723 raise ValueError(f"Parameter path[]={type(path)} is not 'str'")
725 # DEBUG: print(f"DEBUG: Fetching nodeinfo from domain='{domain}' ...")
726 nodeinfo = fetch_wellknown_nodeinfo(domain)
728 # DEBUG: print(f"DEBUG: nodeinfo({len(nodeinfo)})={nodeinfo}")
729 if len(nodeinfo) > 0:
730 # DEBUG: print("DEBUG: nodeinfo()={len(nodeinfo))} - EXIT!")
734 "/nodeinfo/2.1.json",
736 "/nodeinfo/2.0.json",
743 for request in request_paths:
744 if path != None and path != "" and path != f"https://{domain}{path}":
745 # DEBUG: print(f"DEBUG: path='{path}' does not match request='{request}' - SKIPPED!")
749 # DEBUG: print(f"DEBUG: Fetching request='{request}' from domain='{domain}' ...")
750 response = get_response(domain, request, api_headers, (config.get("nodeinfo_connection_timeout"), config.get("nodeinfo_read_timeout")))
752 data = json_from_response(response)
753 # DEBUG: print(f"DEBUG: response.ok={response.ok},response.status_code={response.status_code},data[]='{type(data)}'")
754 if response.ok and isinstance(data, dict):
755 # DEBUG: print("DEBUG: Success:", request)
756 instances.set("detection_mode", domain, "STATIC_CHECK")
757 instances.set("nodeinfo_url" , domain, request)
759 elif response.ok and isinstance(data, list):
760 print(f"UNSUPPORTED: domain='{domain}' returned a list: '{data}'")
762 elif not response.ok or response.status_code >= 400:
763 print("WARNING: Failed fetching nodeinfo from domain:", domain)
764 update_last_error(domain, response)
767 except BaseException as e:
768 # DEBUG: print("DEBUG: Cannot fetch API request:", request)
769 update_last_error(domain, e)
772 # DEBUG: print(f"DEBUG: data()={len(data)} - EXIT!")
775 def fetch_wellknown_nodeinfo(domain: str) -> list:
776 # DEBUG: print(f"DEBUG: domain='{domain}' - CALLED!")
777 if type(domain) != str:
778 raise ValueError(f"Parameter domain[]={type(domain)} is not 'str'")
780 raise ValueError(f"Parameter 'domain' cannot be empty")
782 # DEBUG: print("DEBUG: Fetching .well-known info for domain:", domain)
786 response = get_response(domain, "/.well-known/nodeinfo", api_headers, (config.get("nodeinfo_connection_timeout"), config.get("nodeinfo_read_timeout")))
788 data = json_from_response(response)
789 # DEBUG: print("DEBUG: domain,response.ok,data[]:", domain, response.ok, type(data))
790 if response.ok and isinstance(data, dict):
792 # DEBUG: print("DEBUG: Found entries:", len(nodeinfo), domain)
793 if "links" in nodeinfo:
794 # DEBUG: print("DEBUG: Found links in nodeinfo():", len(nodeinfo["links"]))
795 for link in nodeinfo["links"]:
796 # DEBUG: print("DEBUG: rel,href:", link["rel"], link["href"])
797 if link["rel"] in nodeinfo_identifier:
798 # DEBUG: print("DEBUG: Fetching nodeinfo from:", link["href"])
799 response = get_url(link["href"], api_headers, (config.get("connection_timeout"), config.get("read_timeout")))
801 data = json_from_response(response)
802 # DEBUG: print("DEBUG: href,response.ok,response.status_code:", link["href"], response.ok, response.status_code)
803 if response.ok and isinstance(data, dict):
804 # DEBUG: print("DEBUG: Found JSON nodeinfo():", len(data))
805 instances.set("detection_mode", domain, "AUTO_DISCOVERY")
806 instances.set("nodeinfo_url" , domain, link["href"])
809 print("WARNING: Unknown 'rel' value:", domain, link["rel"])
811 print("WARNING: nodeinfo does not contain 'links':", domain)
813 except BaseException as e:
814 print("WARNING: Failed fetching .well-known info:", domain)
815 update_last_error(domain, e)
818 # DEBUG: print("DEBUG: Returning data[]:", type(data))
821 def fetch_generator_from_path(domain: str, path: str = "/") -> str:
822 # DEBUG: print(f"DEBUG: domain({len(domain)})={domain},path={path} - CALLED!")
823 if type(domain) != str:
824 raise ValueError(f"Parameter domain[]={type(domain)} is not 'str'")
826 raise ValueError(f"Parameter 'domain' cannot be empty")
827 elif type(path) != str:
828 raise ValueError(f"path[]={type(path)} is not 'str'")
830 raise ValueError(f"Parameter 'domain' cannot be empty")
832 # DEBUG: print(f"DEBUG: domain='{domain}',path='{path}' - CALLED!")
836 # DEBUG: print(f"DEBUG: Fetching path='{path}' from '{domain}' ...")
837 response = get_response(domain, path, headers, (config.get("connection_timeout"), config.get("read_timeout")))
839 # DEBUG: print("DEBUG: domain,response.ok,response.status_code,response.text[]:", domain, response.ok, response.status_code, type(response.text))
840 if response.ok and response.status_code < 300 and len(response.text) > 0:
841 # DEBUG: print("DEBUG: Search for <meta name='generator'>:", domain)
842 doc = bs4.BeautifulSoup(response.text, "html.parser")
844 # DEBUG: print("DEBUG: doc[]:", type(doc))
845 generator = doc.find("meta", {"name": "generator"})
846 site_name = doc.find("meta", {"property": "og:site_name"})
848 # DEBUG: print(f"DEBUG: generator='{generator}',site_name='{site_name}'")
849 if isinstance(generator, bs4.element.Tag):
850 # DEBUG: print("DEBUG: Found generator meta tag:", domain)
851 software = tidyup_domain(generator.get("content"))
852 print(f"INFO: domain='{domain}' is generated by '{software}'")
853 instances.set("detection_mode", domain, "GENERATOR")
854 remove_pending_error(domain)
855 elif isinstance(site_name, bs4.element.Tag):
856 # DEBUG: print("DEBUG: Found property=og:site_name:", domain)
857 sofware = tidyup_domain(site_name.get("content"))
858 print(f"INFO: domain='{domain}' has og:site_name='{software}'")
859 instances.set("detection_mode", domain, "SITE_NAME")
860 remove_pending_error(domain)
862 except BaseException as e:
863 # DEBUG: print(f"DEBUG: Cannot fetch / from '{domain}':", e)
864 update_last_error(domain, e)
867 # DEBUG: print(f"DEBUG: software[]={type(software)}")
868 if type(software) is str and software == "":
869 # DEBUG: print(f"DEBUG: Corrected empty string to None for software of domain='{domain}'")
871 elif type(software) is str and ("." in software or " " in software):
872 # DEBUG: print(f"DEBUG: software='{software}' may contain a version number, domain='{domain}', removing it ...")
873 software = remove_version(software)
875 # DEBUG: print(f"DEBUG: software[]={type(software)}")
876 if type(software) is str and " powered by " in software:
877 # DEBUG: print(f"DEBUG: software='{software}' has 'powered by' in it")
878 software = remove_version(strip_powered_by(software))
879 elif type(software) is str and " hosted on " in software:
880 # DEBUG: print(f"DEBUG: software='{software}' has 'hosted on' in it")
881 software = remove_version(strip_hosted_on(software))
882 elif type(software) is str and " by " in software:
883 # DEBUG: print(f"DEBUG: software='{software}' has ' by ' in it")
884 software = strip_until(software, " by ")
885 elif type(software) is str and " see " in software:
886 # DEBUG: print(f"DEBUG: software='{software}' has ' see ' in it")
887 software = strip_until(software, " see ")
889 # DEBUG: print(f"DEBUG: software='{software}' - EXIT!")
892 def determine_software(domain: str, path: str = None) -> str:
893 # DEBUG: print(f"DEBUG: domain({len(domain)})={domain},path={path} - CALLED!")
894 if type(domain) != str:
895 raise ValueError(f"Parameter domain[]={type(domain)} is not 'str'")
897 raise ValueError(f"Parameter 'domain' cannot be empty")
898 elif type(path) != str and path != None:
899 raise ValueError(f"Parameter path[]={type(path)} is not 'str'")
901 # DEBUG: print("DEBUG: Determining software for domain,path:", domain, path)
904 # DEBUG: print(f"DEBUG: Fetching nodeinfo from '{domain}' ...")
905 data = fetch_nodeinfo(domain, path)
907 # DEBUG: print("DEBUG: data[]:", type(data))
908 if not isinstance(data, dict) or len(data) == 0:
909 # DEBUG: print("DEBUG: Could not determine software type:", domain)
910 return fetch_generator_from_path(domain)
912 # DEBUG: print("DEBUG: data():", len(data), data)
913 if "status" in data and data["status"] == "error" and "message" in data:
914 print("WARNING: JSON response is an error:", data["message"])
915 update_last_error(domain, data["message"])
916 return fetch_generator_from_path(domain)
917 elif "message" in data:
918 print("WARNING: JSON response contains only a message:", data["message"])
919 update_last_error(domain, data["message"])
920 return fetch_generator_from_path(domain)
921 elif "software" not in data or "name" not in data["software"]:
922 # DEBUG: print(f"DEBUG: JSON response from domain='{domain}' does not include [software][name], fetching / ...")
923 software = fetch_generator_from_path(domain)
925 # DEBUG: print(f"DEBUG: Generator for domain='{domain}' is: {software}, EXIT!")
928 software = tidyup_domain(data["software"]["name"])
930 # DEBUG: print("DEBUG: sofware after tidyup_domain():", software)
931 if software in ["akkoma", "rebased"]:
932 # DEBUG: print("DEBUG: Setting pleroma:", domain, software)
934 elif software in ["hometown", "ecko"]:
935 # DEBUG: print("DEBUG: Setting mastodon:", domain, software)
936 software = "mastodon"
937 elif software in ["calckey", "groundpolis", "foundkey", "cherrypick", "meisskey"]:
938 # DEBUG: print("DEBUG: Setting misskey:", domain, software)
940 elif software.find("/") > 0:
941 print("WARNING: Spliting of slash:", software)
942 software = software.split("/")[-1];
943 elif software.find("|") > 0:
944 print("WARNING: Spliting of pipe:", software)
945 software = tidyup_domain(software.split("|")[0]);
946 elif "powered by" in software:
947 # DEBUG: print(f"DEBUG: software='{software}' has 'powered by' in it")
948 software = strip_powered_by(software)
949 elif type(software) is str and " by " in software:
950 # DEBUG: print(f"DEBUG: software='{software}' has ' by ' in it")
951 software = strip_until(software, " by ")
952 elif type(software) is str and " see " in software:
953 # DEBUG: print(f"DEBUG: software='{software}' has ' see ' in it")
954 software = strip_until(software, " see ")
956 # DEBUG: print(f"DEBUG: software[]={type(software)}")
958 print("WARNING: tidyup_domain() left no software name behind:", domain)
961 # DEBUG: print(f"DEBUG: software[]={type(software)}")
962 if str(software) == "":
963 # DEBUG: print(f"DEBUG: software for '{domain}' was not detected, trying generator ...")
964 software = fetch_generator_from_path(domain)
965 elif len(str(software)) > 0 and ("." in software or " " in software):
966 # DEBUG: print(f"DEBUG: software='{software}' may contain a version number, domain='{domain}', removing it ...")
967 software = remove_version(software)
969 # DEBUG: print(f"DEBUG: software[]={type(software)}")
970 if type(software) is str and "powered by" in software:
971 # DEBUG: print(f"DEBUG: software='{software}' has 'powered by' in it")
972 software = remove_version(strip_powered_by(software))
974 # DEBUG: print("DEBUG: Returning domain,software:", domain, software)
977 def update_block_reason(reason: str, blocker: str, blocked: str, block_level: str):
978 # DEBUG: print(f"DEBUG: reason='{reason}',blocker={blocker},blocked={blocked},block_level={block_level} - CALLED!")
979 if type(reason) != str and reason != None:
980 raise ValueError(f"Parameter reason[]='{type(reason)}' is not 'str'")
981 elif type(blocker) != str:
982 raise ValueError(f"Parameter blocker[]='{type(blocker)}' is not 'str'")
983 elif type(blocked) != str:
984 raise ValueError(f"Parameter blocked[]='{type(blocked)}' is not 'str'")
985 elif type(block_level) != str:
986 raise ValueError(f"Parameter block_level[]='{type(block_level)}' is not 'str'")
988 # DEBUG: print("DEBUG: Updating block reason:", reason, blocker, blocked, block_level)
991 "UPDATE blocks SET reason = ?, last_seen = ? WHERE blocker = ? AND blocked = ? AND block_level = ? AND reason IN ('','unknown') LIMIT 1",
1001 # DEBUG: print(f"DEBUG: cursor.rowcount={cursor.rowcount}")
1002 if cursor.rowcount == 0:
1003 # DEBUG: print(f"DEBUG: Did not update any rows: blocker='{blocker}',blocked='{blocked}',block_level='{block_level}',reason='{reason}' - EXIT!")
1006 except BaseException as e:
1007 print(f"ERROR: failed SQL query: reason='{reason}',blocker='{blocker}',blocked='{blocked}',block_level='{block_level}',exception[{type(e)}]:'{str(e)}'")
1010 # DEBUG: print("DEBUG: EXIT!")
1012 def update_last_seen(blocker: str, blocked: str, block_level: str):
1013 # DEBUG: print("DEBUG: Updating last_seen for:", blocker, blocked, block_level)
1016 "UPDATE blocks SET last_seen = ? WHERE blocker = ? AND blocked = ? AND block_level = ? LIMIT 1",
1025 # DEBUG: print(f"DEBUG: cursor.rowcount={cursor.rowcount}")
1026 if cursor.rowcount == 0:
1027 # DEBUG: print(f"DEBUG: Did not update any rows: blocker='{blocker}',blocked='{blocked}',block_level='{block_level}' - EXIT!")
1030 except BaseException as e:
1031 print(f"ERROR: failed SQL query: blocker='{blocker}',blocked='{blocked}',block_level='{block_level}',exception[{type(e)}]:'{str(e)}'")
1034 # DEBUG: print("DEBUG: EXIT!")
1036 def is_instance_blocked(blocker: str, blocked: str, block_level: str) -> bool:
1037 # DEBUG: print(f"DEBUG: blocker={blocker},blocked={blocked},block_level={block_level} - CALLED!")
1038 if type(blocker) != str:
1039 raise ValueError(f"Parameter blocker[]={type(blocker)} is not of type 'str'")
1041 raise ValueError("Parameter 'blocker' cannot be empty")
1042 elif type(blocked) != str:
1043 raise ValueError(f"Parameter blocked[]={type(blocked)} is not of type 'str'")
1045 raise ValueError("Parameter 'blocked' cannot be empty")
1046 elif type(block_level) != str:
1047 raise ValueError(f"Parameter block_level[]={type(block_level)} is not of type 'str'")
1048 elif block_level == "":
1049 raise ValueError("Parameter 'block_level' cannot be empty")
1052 "SELECT * FROM blocks WHERE blocker = ? AND blocked = ? AND block_level = ? LIMIT 1",
1060 is_blocked = cursor.fetchone() != None
1062 # DEBUG: print(f"DEBUG: is_blocked='{is_blocked}' - EXIT!")
1065 def block_instance(blocker: str, blocked: str, reason: str, block_level: str):
1066 # DEBUG: print("DEBUG: blocker,blocked,reason,block_level:", blocker, blocked, reason, block_level)
1067 if type(blocker) != str:
1068 raise ValueError(f"Parameter blocker[]={type(blocker)} is not 'str'")
1070 raise ValueError(f"Parameter 'blocker' cannot be empty")
1071 elif not validators.domain(blocker.split("/")[0]):
1072 raise ValueError(f"Bad blocker='{blocker}'")
1073 elif type(blocked) != str:
1074 raise ValueError(f"Parameter blocked[]={type(blocked)} is not 'str'")
1076 raise ValueError(f"Parameter 'blocked' cannot be empty")
1077 elif not validators.domain(blocked.split("/")[0]):
1078 raise ValueError(f"Bad blocked='{blocked}'")
1079 elif is_blacklisted(blocker):
1080 raise Exception(f"blocker='{blocker}' is blacklisted but function invoked")
1081 elif is_blacklisted(blocked):
1082 raise Exception(f"blocked='{blocked}' is blacklisted but function invoked")
1085 # Maybe needs cleaning
1086 reason = tidyup_reason(reason)
1088 print(f"INFO: New block: blocker='{blocker}',blocked='{blocked}', reason='{reason}', block_level='{block_level}'")
1091 "INSERT INTO blocks (blocker, blocked, reason, block_level, first_seen, last_seen) VALUES(?, ?, ?, ?, ?, ?)",
1101 except BaseException as e:
1102 print(f"ERROR: failed SQL query: blocker='{blocker}',blocked='{blocked}',reason='{reason}',block_level='{block_level}',exception[{type(e)}]:'{str(e)}'")
1105 # DEBUG: print("DEBUG: EXIT!")
1107 def is_instance_registered(domain: str) -> bool:
1108 # DEBUG: print(f"DEBUG: domain='{domain}' - CALLED!")
1109 if type(domain) != str:
1110 raise ValueError(f"Parameter domain[]={type(domain)} is not 'str'")
1112 raise ValueError(f"Parameter 'domain' cannot be empty")
1114 # DEBUG: print(f"DEBUG: domain='{domain}' - CALLED!")
1115 if not cache.key_exists("is_registered"):
1116 # DEBUG: print(f"DEBUG: Cache for 'is_registered' not initialized, fetching all rows ...")
1118 cursor.execute("SELECT domain FROM instances")
1121 cache.set_all("is_registered", cursor.fetchall(), True)
1122 except BaseException as e:
1123 print(f"ERROR: failed SQL query: domain='{domain}',exception[{type(e)}]:'{str(e)}'")
1127 registered = cache.sub_key_exists("is_registered", domain)
1129 # DEBUG: print(f"DEBUG: registered='{registered}' - EXIT!")
1132 def add_instance(domain: str, origin: str, originator: str, path: str = None):
1133 # DEBUG: print(f"DEBUG: domain='{domain}',origin='{origin}',originator='{originator}',path='{path}' - CALLED!")
1134 if type(domain) != str:
1135 raise ValueError(f"Parameter domain[]={type(domain)} is not 'str'")
1137 raise ValueError(f"Parameter 'domain' cannot be empty")
1138 elif type(origin) != str and origin != None:
1139 raise ValueError(f"origin[]={type(origin)} is not 'str'")
1140 elif type(originator) != str:
1141 raise ValueError(f"originator[]={type(originator)} is not 'str'")
1142 elif originator == "":
1143 raise ValueError(f"originator cannot be empty")
1144 elif not validators.domain(domain.split("/")[0]):
1145 raise ValueError(f"Bad domain name='{domain}'")
1146 elif origin is not None and not validators.domain(origin.split("/")[0]):
1147 raise ValueError(f"Bad origin name='{origin}'")
1148 elif is_blacklisted(domain):
1149 raise Exception(f"domain='{domain}' is blacklisted, but method invoked")
1151 # DEBUG: print("DEBUG: domain,origin,originator,path:", domain, origin, originator, path)
1152 software = determine_software(domain, path)
1153 # DEBUG: print("DEBUG: Determined software:", software)
1155 print(f"INFO: Adding instance domain='{domain}' (origin='{origin}',software='{software}')")
1158 "INSERT INTO instances (domain, origin, originator, hash, software, first_seen) VALUES (?, ?, ?, ?, ?, ?)",
1169 cache.set_sub_key("is_registered", domain, True)
1171 if instances.has_pending_instance_data(domain):
1172 # DEBUG: print(f"DEBUG: domain='{domain}' has pending nodeinfo being updated ...")
1173 instances.set("last_status_code" , domain, None)
1174 instances.set("last_error_details", domain, None)
1175 instances.update_instance_data(domain)
1176 remove_pending_error(domain)
1178 if domain in pending_errors:
1179 # DEBUG: print("DEBUG: domain has pending error being updated:", domain)
1180 update_last_error(domain, pending_errors[domain])
1181 remove_pending_error(domain)
1183 except BaseException as e:
1184 print(f"ERROR: failed SQL query: domain='{domain}',exception[{type(e)}]:'{str(e)}'")
1187 # DEBUG: print("DEBUG: Updating nodeinfo for domain:", domain)
1188 update_last_nodeinfo(domain)
1190 # DEBUG: print("DEBUG: EXIT!")
1192 def send_bot_post(instance: str, blocks: dict):
1193 # DEBUG: print(f"DEBUG: instance={instance},blocks()={len(blocks)} - CALLED!")
1194 message = instance + " has blocked the following instances:\n\n"
1197 if len(blocks) > 20:
1199 blocks = blocks[0 : 19]
1201 for block in blocks:
1202 if block["reason"] == None or block["reason"] == '':
1203 message = message + block["blocked"] + " with unspecified reason\n"
1205 if len(block["reason"]) > 420:
1206 block["reason"] = block["reason"][0:419] + "[…]"
1208 message = message + block["blocked"] + ' for "' + block["reason"].replace("@", "@\u200b") + '"\n'
1211 message = message + "(the list has been truncated to the first 20 entries)"
1213 botheaders = {**api_headers, **{"Authorization": "Bearer " + config.get("bot_token")}}
1216 f"{config.get('bot_instance')}/api/v1/statuses",
1219 "visibility" : config.get('bot_visibility'),
1220 "content_type": "text/plain"
1228 def get_mastodon_blocks(domain: str) -> dict:
1229 # DEBUG: print(f"DEBUG: domain='{domain}' - CALLED!")
1230 if type(domain) != str:
1231 raise ValueError(f"Parameter domain[]={type(domain)} is not 'str'")
1233 raise ValueError(f"Parameter 'domain' cannot be empty")
1235 # DEBUG: print("DEBUG: Fetching mastodon blocks from domain:", domain)
1237 "Suspended servers": [],
1238 "Filtered media" : [],
1239 "Limited servers" : [],
1240 "Silenced servers" : [],
1244 doc = bs4.BeautifulSoup(
1245 get_response(domain, "/about", headers, (config.get("connection_timeout"), config.get("read_timeout"))).text,
1248 except BaseException as e:
1249 print("ERROR: Cannot fetch from domain:", domain, e)
1250 update_last_error(domain, e)
1253 for header in doc.find_all("h3"):
1254 header_text = tidyup_domain(header.text)
1256 if header_text in language_mapping:
1257 # DEBUG: print(f"DEBUG: header_text='{header_text}'")
1258 header_text = language_mapping[header_text]
1260 if header_text in blocks or header_text.lower() in blocks:
1261 # replaced find_next_siblings with find_all_next to account for instances that e.g. hide lists in dropdown menu
1262 for line in header.find_all_next("table")[0].find_all("tr")[1:]:
1263 blocks[header_text].append(
1265 "domain": tidyup_domain(line.find("span").text),
1266 "hash" : tidyup_domain(line.find("span")["title"][9:]),
1267 "reason": tidyup_domain(line.find_all("td")[1].text),
1271 # DEBUG: print("DEBUG: Returning blocks for domain:", domain)
1273 "reject" : blocks["Suspended servers"],
1274 "media_removal" : blocks["Filtered media"],
1275 "followers_only": blocks["Limited servers"] + blocks["Silenced servers"],
1278 def get_friendica_blocks(domain: str) -> dict:
1279 # DEBUG: print(f"DEBUG: domain='{domain}' - CALLED!")
1280 if type(domain) != str:
1281 raise ValueError(f"Parameter domain[]={type(domain)} is not 'str'")
1283 raise ValueError(f"Parameter 'domain' cannot be empty")
1285 # DEBUG: print("DEBUG: Fetching friendica blocks from domain:", domain)
1289 doc = bs4.BeautifulSoup(
1290 get_response(domain, "/friendica", headers, (config.get("connection_timeout"), config.get("read_timeout"))).text,
1293 except BaseException as e:
1294 print("WARNING: Failed to fetch /friendica from domain:", domain, e)
1295 update_last_error(domain, e)
1298 blocklist = doc.find(id="about_blocklist")
1300 # Prevents exceptions:
1301 if blocklist is None:
1302 # DEBUG: print("DEBUG: Instance has no block list:", domain)
1305 for line in blocklist.find("table").find_all("tr")[1:]:
1306 # DEBUG: print(f"DEBUG: line='{line}'")
1308 "domain": tidyup_domain(line.find_all("td")[0].text),
1309 "reason": tidyup_domain(line.find_all("td")[1].text)
1312 # DEBUG: print("DEBUG: Returning blocks() for domain:", domain, len(blocks))
1317 def get_misskey_blocks(domain: str) -> dict:
1318 # DEBUG: print(f"DEBUG: domain='{domain}' - CALLED!")
1319 if type(domain) != str:
1320 raise ValueError(f"Parameter domain[]={type(domain)} is not 'str'")
1322 raise ValueError(f"Parameter 'domain' cannot be empty")
1324 # DEBUG: print("DEBUG: Fetching misskey blocks from domain:", domain)
1331 step = config.get("misskey_limit")
1333 # iterating through all "suspended" (follow-only in its terminology)
1334 # instances page-by-page, since that troonware doesn't support
1335 # sending them all at once
1337 # DEBUG: print(f"DEBUG: Fetching offset='{offset}' from '{domain}' ...")
1339 # DEBUG: print("DEBUG: Sending JSON API request to domain,step,offset:", domain, step, offset)
1340 fetched = post_json_api(domain, "/api/federation/instances", json.dumps({
1349 # DEBUG: print("DEBUG: Sending JSON API request to domain,step,offset:", domain, step, offset)
1350 fetched = post_json_api(domain, "/api/federation/instances", json.dumps({
1355 "offset" : offset - 1
1360 # DEBUG: print("DEBUG: fetched():", len(fetched))
1361 if len(fetched) == 0:
1362 # DEBUG: print("DEBUG: Returned zero bytes, exiting loop:", domain)
1364 elif len(fetched) != config.get("misskey_limit"):
1365 # DEBUG: print(f"DEBUG: Fetched '{len(fetched)}' row(s) but expected: '{config.get('misskey_limit')}'")
1366 offset = offset + (config.get("misskey_limit") - len(fetched))
1368 # DEBUG: print("DEBUG: Raising offset by step:", step)
1369 offset = offset + step
1372 for instance in fetched:
1374 if instance["isSuspended"] and not has_key(blocks["suspended"], "domain", instance):
1376 blocks["suspended"].append(
1378 "domain": tidyup_domain(instance["host"]),
1379 # no reason field, nothing
1384 # DEBUG: print(f"DEBUG: count={count}")
1386 # DEBUG: print(f"DEBUG: API is no more returning new instances, aborting loop!")
1389 except BaseException as e:
1390 print("WARNING: Caught error, exiting loop:", domain, e)
1391 update_last_error(domain, e)
1396 # same shit, different asshole ("blocked" aka full suspend)
1399 # DEBUG: print("DEBUG: Sending JSON API request to domain,step,offset:", domain, step, offset)
1400 fetched = post_json_api(domain,"/api/federation/instances", json.dumps({
1409 # DEBUG: print("DEBUG: Sending JSON API request to domain,step,offset:", domain, step, offset)
1410 fetched = post_json_api(domain,"/api/federation/instances", json.dumps({
1415 "offset" : offset - 1
1420 # DEBUG: print("DEBUG: fetched():", len(fetched))
1421 if len(fetched) == 0:
1422 # DEBUG: print("DEBUG: Returned zero bytes, exiting loop:", domain)
1424 elif len(fetched) != config.get("misskey_limit"):
1425 # DEBUG: print(f"DEBUG: Fetched '{len(fetched)}' row(s) but expected: '{config.get('misskey_limit')}'")
1426 offset = offset + (config.get("misskey_limit") - len(fetched))
1428 # DEBUG: print("DEBUG: Raising offset by step:", step)
1429 offset = offset + step
1432 for instance in fetched:
1434 if instance["isBlocked"] and not has_key(blocks["blocked"], "domain", instance):
1436 blocks["blocked"].append({
1437 "domain": tidyup_domain(instance["host"]),
1441 # DEBUG: print(f"DEBUG: count={count}")
1443 # DEBUG: print(f"DEBUG: API is no more returning new instances, aborting loop!")
1446 except BaseException as e:
1447 print("ERROR: Exception during POST:", domain, e)
1448 update_last_error(domain, e)
1452 # DEBUG: print(f"DEBUG: Updating last_instance_fetch for domain='{domain}' ...")
1453 update_last_instance_fetch(domain)
1455 # DEBUG: print("DEBUG: Returning for domain,blocked(),suspended():", domain, len(blocks["blocked"]), len(blocks["suspended"]))
1457 "reject" : blocks["blocked"],
1458 "followers_only": blocks["suspended"]
1461 def tidyup_reason(reason: str) -> str:
1462 # DEBUG: print(f"DEBUG: reason='{reason}' - CALLED!")
1463 if type(reason) != str:
1464 raise ValueError(f"Parameter reason[]={type(reason)} is not expected")
1467 reason = reason.strip()
1470 reason = re.sub("â", "\"", reason)
1472 ## DEBUG: print(f"DEBUG: reason='{reason}' - EXIT!")
1475 def tidyup_domain(domain: str) -> str:
1476 # DEBUG: print(f"DEBUG: domain='{domain}' - CALLED!")
1477 if type(domain) != str:
1478 raise ValueError(f"Parameter domain[]={type(domain)} is not expected")
1480 # All lower-case and strip spaces out + last dot
1481 domain = domain.lower().strip().rstrip(".")
1484 domain = re.sub("\:\d+$", "", domain)
1486 # No protocol, sometimes without the slashes
1487 domain = re.sub("^https?\:(\/*)", "", domain)
1490 domain = re.sub("\/$", "", domain)
1493 domain = re.sub("^\@", "", domain)
1495 # No individual users in block lists
1496 domain = re.sub("(.+)\@", "", domain)
1498 # DEBUG: print(f"DEBUG: domain='{domain}' - EXIT!")
1501 def json_from_response(response: requests.models.Response) -> list:
1502 # DEBUG: print(f"DEBUG: response[]={type(response)} - CALLED!")
1503 if not isinstance(response, requests.models.Response):
1504 raise ValueError(f"Parameter response[]='{type(response)}' is not type of 'Response'")
1507 if response.text.strip() != "":
1508 # DEBUG: print(f"DEBUG: response.text()={len(response.text)} is not empty, invoking response.json() ...")
1510 data = response.json()
1511 except json.decoder.JSONDecodeError:
1514 # DEBUG: print(f"DEBUG: data[]={type(data)} - EXIT!")
1517 def get_response(domain: str, path: str, headers: dict, timeout: list) -> requests.models.Response:
1518 # DEBUG: print(f"DEBUG: domain='{domain}',path='{path}',headers()={len(headers)},timeout={timeout} - CALLED!")
1519 if type(domain) != str:
1520 raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
1522 raise ValueError("Parameter 'domain' cannot be empty")
1523 elif type(path) != str:
1524 raise ValueError(f"Parameter path[]='{type(path)}' is not 'str'")
1526 raise ValueError("Parameter 'path' cannot be empty")
1529 # DEBUG: print(f"DEBUG: Sending request to '{domain}{path}' ...")
1530 response = reqto.get(
1531 f"https://{domain}{path}",
1535 except requests.exceptions.ConnectionError as e:
1536 # DEBUG: print(f"DEBUG: Fetching '{path}' from '{domain}' failed. exception[{type(e)}]='{str(e)}'")
1537 update_last_error(domain, e)
1540 # DEBUG: print(f"DEBUG: response[]='{type(response)}' - EXXIT!")
1543 def has_key(keys: list, search: str, value: any) -> bool:
1544 # DEBUG: print(f"DEBUG: keys()={len(keys)},search='{search}',value[]='{type(value)}' - CALLED!")
1545 if type(keys) != list:
1546 raise ValueError(f"Parameter keys[]='{type(keys)}' is not 'list'")
1547 elif type(search) != str:
1548 raise ValueError(f"Parameter search[]='{type(search)}' is not 'str'")
1550 raise ValueError("Parameter 'search' cannot be empty")
1553 # DEBUG: print(f"DEBUG: Checking keys()={len(keys)} ...")
1555 # DEBUG: print(f"DEBUG: key['{type(key)}']={key}")
1556 if type(key) != dict:
1557 raise ValueError(f"key[]='{type(key)}' is not 'dict'")
1558 elif not search in key:
1559 raise KeyError(f"Cannot find search='{search}'")
1560 elif key[search] == value:
1564 # DEBUG: print(f"DEBUG: has={has} - EXIT!")
1567 def find_domains(tag: bs4.element.Tag) -> list:
1568 # DEBUG: print(f"DEBUG: tag[]={type(tag)} - CALLED!")
1569 if not isinstance(tag, bs4.element.Tag):
1570 raise ValueError(f"Parameter tag[]={type(tag)} is not type of bs4.element.Tag")
1571 elif not isinstance(tag, bs4.element.Tag):
1572 raise KeyError("Cannot find table with instances!")
1573 elif len(tag.select("tr")) == 0:
1574 raise KeyError("No table rows found in table!")
1577 for element in tag.select("tr"):
1578 # DEBUG: print(f"DEBUG: element[]={type(element)}")
1579 if not element.find("td"):
1580 # DEBUG: print("DEBUG: Skipping element, no <td> found")
1583 domain = tidyup_domain(element.find("td").text)
1584 reason = tidyup_reason(element.findAll("td")[1].text)
1586 # DEBUG: print(f"DEBUG: domain='{domain}',reason='{reason}'")
1588 if is_blacklisted(domain):
1589 print(f"WARNING: domain='{domain}' is blacklisted - skipped!")
1591 elif domain == "gab.com/.ai, develop.gab.com":
1592 # DEBUG: print(f"DEBUG: Multiple domains detected in one row")
1594 "domain": "gab.com",
1602 "domain": "develop.gab.com",
1606 elif not validators.domain(domain):
1607 print(f"WARNING: domain='{domain}' is not a valid domain - skipped!")
1610 # DEBUG: print(f"DEBUG: Adding domain='{domain}' ...")
1616 # DEBUG: print(f"DEBUG: domains()={len(domains)} - EXIT!")
1619 def get_url(url: str, headers: dict, timeout: list) -> requests.models.Response:
1620 # DEBUG: print(f"DEBUG: url='{url}',headers()={len(headers)},timeout={timeout} - CALLED!")
1621 if type(url) != str:
1622 raise ValueError(f"Parameter url[]='{type(url)}' is not 'str'")
1624 raise ValueError("Parameter 'url' cannot be empty")
1626 # DEBUG: print(f"DEBUG: Parsing url='{url}'")
1627 components = urlparse(url)
1629 # Invoke other function, avoid trailing ?
1630 # DEBUG: print(f"DEBUG: components[{type(components)}]={components}")
1631 if components.query != "":
1632 response = get_response(components.hostname, f"{components.path}?{components.query}", headers, timeout)
1634 response = get_response(components.hostname, f"{components.path}", headers, timeout)
1636 # DEBUG: print(f"DEBUG: response[]='{type(response)}' - EXXIT!")