1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU Affero General Public License for more details.
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <https://www.gnu.org/licenses/>.
28 from urllib.parse import urlparse
31 from fba import config
32 from fba import instances
34 # Don't check these, known trolls/flooders/testing/developing
36 # Floods network with fake nodes as "research" project
37 "activitypub-troll.cf",
43 "social.shrimpcam.pw",
45 "mastotroll.netz.org",
46 # Testing/developing installations
49 "misskeytest.chn.moe",
52 # Array with pending errors needed to be written to database
56 # "rel" identifiers (no real URLs)
57 nodeinfo_identifier = [
58 "https://nodeinfo.diaspora.software/ns/schema/2.1",
59 "https://nodeinfo.diaspora.software/ns/schema/2.0",
60 "https://nodeinfo.diaspora.software/ns/schema/1.1",
61 "https://nodeinfo.diaspora.software/ns/schema/1.0",
62 "http://nodeinfo.diaspora.software/ns/schema/2.1",
63 "http://nodeinfo.diaspora.software/ns/schema/2.0",
64 "http://nodeinfo.diaspora.software/ns/schema/1.1",
65 "http://nodeinfo.diaspora.software/ns/schema/1.0",
68 # HTTP headers for non-API requests
70 "User-Agent": config.get("useragent"),
73 # HTTP headers for API requests
75 "User-Agent": config.get("useragent"),
76 "Content-Type": "application/json",
81 "Silenced instances" : "Silenced servers",
82 "Suspended instances" : "Suspended servers",
83 "Limited instances" : "Limited servers",
84 "Filtered media" : "Filtered media",
85 # Mappuing German -> English
86 "Gesperrte Server" : "Suspended servers",
87 "Gefilterte Medien" : "Filtered media",
88 "Stummgeschaltete Server" : "Silenced servers",
90 "停止済みのサーバー" : "Suspended servers",
91 "制限中のサーバー" : "Limited servers",
92 "メディアを拒否しているサーバー": "Filtered media",
93 "サイレンス済みのサーバー" : "Silenced servers",
95 "שרתים מושעים" : "Suspended servers",
96 "מדיה מסוננת" : "Filtered media",
97 "שרתים מוגבלים" : "Silenced servers",
99 "Serveurs suspendus" : "Suspended servers",
100 "Médias filtrés" : "Filtered media",
101 "Serveurs limités" : "Limited servers",
102 "Serveurs modérés" : "Limited servers",
105 # URL for fetching peers
106 get_peers_url = "/api/v1/instance/peers"
108 # Connect to database
109 connection = sqlite3.connect("blocks.db")
110 cursor = connection.cursor()
112 # Pattern instance for version numbers
114 # semantic version number (with v|V) prefix)
115 re.compile("^(?P<version>v|V{0,1})(\.{0,1})(?P<major>0|[1-9]\d*)\.(?P<minor>0+|[1-9]\d*)(\.(?P<patch>0+|[1-9]\d*)(?:-(?P<prerelease>(?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*)(?:\.(?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*))*))?(?:\+(?P<buildmetadata>[0-9a-zA-Z-]+(?:\.[0-9a-zA-Z-]+)*))?)?$"),
116 # non-sematic, e.g. 1.2.3.4
117 re.compile("^(?P<version>v|V{0,1})(\.{0,1})(?P<major>0|[1-9]\d*)\.(?P<minor>0+|[1-9]\d*)(\.(?P<patch>0+|[1-9]\d*)(\.(?P<subpatch>0|[1-9]\d*))?)$"),
118 # non-sematic, e.g. 2023-05[-dev]
119 re.compile("^(?P<year>[1-9]{1}[0-9]{3})\.(?P<month>[0-9]{2})(-dev){0,1}$"),
120 # non-semantic, e.g. abcdef0
121 re.compile("^[a-f0-9]{7}$"),
124 ##### Other functions #####
126 def is_primitive(var: any) -> bool:
127 # DEBUG: print(f"DEBUG: var[]='{type(var)}' - CALLED!")
128 return type(var) in {int, str, float, bool} or var == None
130 def fetch_instances(domain: str, origin: str, software: str, script: str, path: str = None):
131 # DEBUG: print(f"DEBUG: domain='{domain}',origin='{origin}',software='{software}',path='{path}' - CALLED!")
132 if type(domain) != str:
133 raise ValueError(f"Parameter domain[]={type(domain)} is not 'str'")
135 raise ValueError(f"Parameter 'domain' cannot be empty")
136 elif type(origin) != str and origin != None:
137 raise ValueError(f"Parameter origin[]={type(origin)} is not 'str'")
138 elif type(script) != str:
139 raise ValueError(f"Parameter script[]={type(script)} is not 'str'")
141 raise ValueError(f"Parameter 'domain' cannot be empty")
143 if not is_instance_registered(domain):
144 # DEBUG: print("DEBUG: Adding new domain:", domain, origin)
145 add_instance(domain, origin, script, path)
147 # DEBUG: print("DEBUG: Fetching instances for domain:", domain, software)
148 peerlist = get_peers(domain, software)
150 if (peerlist is None):
151 print("ERROR: Cannot fetch peers:", domain)
153 elif instances.has_pending_instance_data(domain):
154 # DEBUG: print(f"DEBUG: domain='{domain}' has pending nodeinfo data, flushing ...")
155 instances.update_instance_data(domain)
157 print(f"INFO: Checking {len(peerlist)} instances from {domain} ...")
158 for instance in peerlist:
160 # Skip "None" types as tidup() cannot parse them
163 # DEBUG: print(f"DEBUG: instance='{instance}' - BEFORE")
164 instance = tidyup_domain(instance)
165 # DEBUG: print(f"DEBUG: instance='{instance}' - AFTER")
168 print("WARNING: Empty instance after tidyup_domain(), domain:", domain)
170 elif not validators.domain(instance.split("/")[0]):
171 print(f"WARNING: Bad instance='{instance}' from domain='{domain}',origin='{origin}',software='{software}'")
173 elif is_blacklisted(instance):
174 # DEBUG: print("DEBUG: instance is blacklisted:", instance)
177 # DEBUG: print("DEBUG: Handling instance:", instance)
179 if not is_instance_registered(instance):
180 # DEBUG: print("DEBUG: Adding new instance:", instance, domain)
181 add_instance(instance, domain, script)
182 except BaseException as e:
183 print(f"ERROR: instance='{instance}',exception[{type(e)}]:'{str(e)}'")
186 # DEBUG: print("DEBUG: EXIT!")
188 def add_peers(rows: dict) -> list:
189 # DEBUG: print(f"DEBUG: rows()={len(rows)} - CALLED!")
191 for key in ["linked", "allowed", "blocked"]:
192 # DEBUG: print(f"DEBUG: Checking key='{key}'")
193 if key in rows and rows[key] != None:
194 # DEBUG: print(f"DEBUG: Adding {len(rows[key])} peer(s) to peers list ...")
195 for peer in rows[key]:
196 # DEBUG: print(f"DEBUG: peer='{peer}' - BEFORE!")
197 peer = tidyup_domain(peer)
199 # DEBUG: print(f"DEBUG: peer='{peer}' - AFTER!")
200 if is_blacklisted(peer):
201 # DEBUG: print(f"DEBUG: peer='{peer}' is blacklisted, skipped!")
204 # DEBUG: print(f"DEBUG: Adding peer='{peer}' ...")
207 # DEBUG: print(f"DEBUG: peers()={len(peers)} - EXIT!")
210 def remove_version(software: str) -> str:
211 # DEBUG: print(f"DEBUG: software='{software}' - CALLED!")
212 if not "." in software and " " not in software:
213 print(f"WARNING: software='{software}' does not contain a version number.")
218 temp = software.split(";")[0]
219 elif "," in software:
220 temp = software.split(",")[0]
221 elif " - " in software:
222 temp = software.split(" - ")[0]
224 # DEBUG: print(f"DEBUG: software='{software}'")
227 version = temp.split(" ")[-1]
228 elif "/" in software:
229 version = temp.split("/")[-1]
230 elif "-" in software:
231 version = temp.split("-")[-1]
233 # DEBUG: print(f"DEBUG: Was not able to find common seperator, returning untouched software='{software}'")
238 # DEBUG: print(f"DEBUG: Checking {len(patterns)} patterns ...")
239 for pattern in patterns:
241 match = pattern.match(version)
243 # DEBUG: print(f"DEBUG: match[]={type(match)}")
244 if type(match) is re.Match:
247 # DEBUG: print(f"DEBUG: version[{type(version)}]='{version}',match='{match}'")
248 if type(match) is not re.Match:
249 print(f"WARNING: version='{version}' does not match regex, leaving software='{software}' untouched.")
252 # DEBUG: print(f"DEBUG: Found valid version number: '{version}', removing it ...")
253 end = len(temp) - len(version) - 1
255 # DEBUG: print(f"DEBUG: end[{type(end)}]={end}")
256 software = temp[0:end].strip()
257 if " version" in software:
258 # DEBUG: print(f"DEBUG: software='{software}' contains word ' version'")
259 software = strip_until(software, " version")
261 # DEBUG: print(f"DEBUG: software='{software}' - EXIT!")
264 def strip_powered_by(software: str) -> str:
265 # DEBUG: print(f"DEBUG: software='{software}' - CALLED!")
267 print(f"ERROR: Bad method call, 'software' is empty")
268 raise Exception("Parameter 'software' is empty")
269 elif not "powered by" in software:
270 print(f"WARNING: Cannot find 'powered by' in '{software}'!")
273 start = software.find("powered by ")
274 # DEBUG: print(f"DEBUG: start[{type(start)}]='{start}'")
276 software = software[start + 11:].strip()
277 # DEBUG: print(f"DEBUG: software='{software}'")
279 software = strip_until(software, " - ")
281 # DEBUG: print(f"DEBUG: software='{software}' - EXIT!")
284 def strip_hosted_on(software: str) -> str:
285 # DEBUG: print(f"DEBUG: software='{software}' - CALLED!")
287 print(f"ERROR: Bad method call, 'software' is empty")
288 raise Exception("Parameter 'software' is empty")
289 elif not "hosted on" in software:
290 print(f"WARNING: Cannot find 'hosted on' in '{software}'!")
293 end = software.find("hosted on ")
294 # DEBUG: print(f"DEBUG: end[{type(end)}]='{end}'")
296 software = software[0, start].strip()
297 # DEBUG: print(f"DEBUG: software='{software}'")
299 software = strip_until(software, " - ")
301 # DEBUG: print(f"DEBUG: software='{software}' - EXIT!")
304 def strip_until(software: str, until: str) -> str:
305 # DEBUG: print(f"DEBUG: software='{software}',until='{until}' - CALLED!")
307 print(f"ERROR: Bad method call, 'software' is empty")
308 raise Exception("Parameter 'software' is empty")
310 print(f"ERROR: Bad method call, 'until' is empty")
311 raise Exception("Parameter 'until' is empty")
312 elif not until in software:
313 print(f"WARNING: Cannot find '{until}' in '{software}'!")
316 # Next, strip until part
317 end = software.find(until)
319 # DEBUG: print(f"DEBUG: end[{type(end)}]='{end}'")
321 software = software[0:end].strip()
323 # DEBUG: print(f"DEBUG: software='{software}' - EXIT!")
326 def is_blacklisted(domain: str) -> bool:
327 if type(domain) != str:
328 raise ValueError(f"Parameter domain[]={type(domain)} is not 'str'")
330 raise ValueError(f"Parameter 'domain' cannot be empty")
333 for peer in blacklist:
339 def remove_pending_error(domain: str):
340 if type(domain) != str:
341 raise ValueError(f"Parameter domain[]={type(domain)} is not 'str'")
343 raise ValueError(f"Parameter 'domain' cannot be empty")
346 # Prevent updating any pending errors, nodeinfo was found
347 del pending_errors[domain]
352 # DEBUG: print("DEBUG: EXIT!")
354 def get_hash(domain: str) -> str:
355 if type(domain) != str:
356 raise ValueError(f"Parameter domain[]={type(domain)} is not 'str'")
358 raise ValueError(f"Parameter 'domain' cannot be empty")
360 return hashlib.sha256(domain.encode("utf-8")).hexdigest()
362 def update_last_blocked(domain: str):
363 if type(domain) != str:
364 raise ValueError(f"Parameter domain[]={type(domain)} is not 'str'")
366 raise ValueError(f"Parameter 'domain' cannot be empty")
368 # DEBUG: print("DEBUG: Updating last_blocked for domain", domain)
369 instances.set("last_blocked", domain, time.time())
371 # Running pending updated
372 # DEBUG: print(f"DEBUG: Invoking instances.update_instance_data({domain}) ...")
373 instances.update_instance_data(domain)
375 # DEBUG: print("DEBUG: EXIT!")
377 def log_error(domain: str, response: requests.models.Response):
378 # DEBUG: print("DEBUG: domain,response[]:", domain, type(response))
379 if type(domain) != str:
380 raise ValueError(f"Parameter domain[]={type(domain)} is not 'str'")
382 raise ValueError(f"Parameter 'domain' cannot be empty")
385 # DEBUG: print("DEBUG: BEFORE response[]:", type(response))
386 if isinstance(response, BaseException) or isinstance(response, json.decoder.JSONDecodeError):
387 response = str(response)
389 # DEBUG: print("DEBUG: AFTER response[]:", type(response))
390 if type(response) is str:
391 cursor.execute("INSERT INTO error_log (domain, error_code, error_message, created) VALUES (?, 999, ?, ?)",[
397 cursor.execute("INSERT INTO error_log (domain, error_code, error_message, created) VALUES (?, ?, ?, ?)",[
399 response.status_code,
404 # Cleanup old entries
405 # DEBUG: print(f"DEBUG: Purging old records (distance: {config.get('error_log_cleanup')})")
406 cursor.execute("DELETE FROM error_log WHERE created < ?", [time.time() - config.get("error_log_cleanup")])
407 except BaseException as e:
408 print(f"ERROR: failed SQL query: domain='{domain}',exception[{type(e)}]:'{str(e)}'")
411 # DEBUG: print("DEBUG: EXIT!")
413 def update_last_error(domain: str, response: requests.models.Response):
414 # DEBUG: print("DEBUG: domain,response[]:", domain, type(response))
415 if type(domain) != str:
416 raise ValueError(f"Parameter domain[]={type(domain)} is not 'str'")
418 raise ValueError(f"Parameter 'domain' cannot be empty")
420 # DEBUG: print("DEBUG: BEFORE response[]:", type(response))
421 if isinstance(response, BaseException) or isinstance(response, json.decoder.JSONDecodeError):
422 response = f"{type}:str(response)"
424 # DEBUG: print("DEBUG: AFTER response[]:", type(response))
425 if type(response) is str:
426 # DEBUG: print(f"DEBUG: Setting last_error_details='{response}'");
427 instances.set("last_status_code" , domain, 999)
428 instances.set("last_error_details", domain, response)
430 # DEBUG: print(f"DEBUG: Setting last_error_details='{response.reason}'");
431 instances.set("last_status_code" , domain, response.status_code)
432 instances.set("last_error_details", domain, response.reason)
434 # Running pending updated
435 # DEBUG: print(f"DEBUG: Invoking instances.update_instance_data({domain}) ...")
436 instances.update_instance_data(domain)
438 log_error(domain, response)
440 # DEBUG: print("DEBUG: EXIT!")
442 def update_last_instance_fetch(domain: str):
443 # DEBUG: print(f"DEBUG: domain='{domain}' - CALLED!")
444 if type(domain) != str:
445 raise ValueError(f"Parameter domain[]={type(domain)} is not 'str'")
447 raise ValueError(f"Parameter 'domain' cannot be empty")
449 # DEBUG: print("DEBUG: Updating last_instance_fetch for domain:", domain)
450 instances.set("last_instance_fetch", domain, time.time())
452 # Running pending updated
453 # DEBUG: print(f"DEBUG: Invoking instances.update_instance_data({domain}) ...")
454 instances.update_instance_data(domain)
456 # DEBUG: print("DEBUG: EXIT!")
458 def update_last_nodeinfo(domain: str):
459 # DEBUG: print(f"DEBUG: domain='{domain}' - CALLED!")
460 if type(domain) != str:
461 raise ValueError(f"Parameter domain[]={type(domain)} is not 'str'")
463 raise ValueError(f"Parameter 'domain' cannot be empty")
465 # DEBUG: print("DEBUG: Updating last_nodeinfo for domain:", domain)
466 instances.set("last_nodeinfo", domain, time.time())
467 instances.set("last_updated" , domain, time.time())
469 # Running pending updated
470 # DEBUG: print(f"DEBUG: Invoking instances.update_instance_data({domain}) ...")
471 instances.update_instance_data(domain)
473 # DEBUG: print("DEBUG: EXIT!")
475 def get_peers(domain: str, software: str) -> list:
476 # DEBUG: print(f"DEBUG: domain({len(domain)})={domain},software={software} - CALLED!")
477 if type(domain) != str:
478 raise ValueError(f"Parameter domain[]={type(domain)} is not 'str'")
480 raise ValueError(f"Parameter 'domain' cannot be empty")
481 elif type(software) != str and software != None:
482 raise ValueError(f"software[]={type(software)} is not 'str'")
486 if software == "misskey":
487 # DEBUG: print(f"DEBUG: domain='{domain}' is misskey, sending API POST request ...")
489 step = config.get("misskey_limit")
491 # iterating through all "suspended" (follow-only in its terminology)
492 # instances page-by-page, since that troonware doesn't support
493 # sending them all at once
495 # DEBUG: print(f"DEBUG: Fetching offset='{offset}' from '{domain}' ...")
497 fetched = post_json_api(domain, "/api/federation/instances", json.dumps({
505 fetched = post_json_api(domain, "/api/federation/instances", json.dumps({
514 # DEBUG: print(f"DEBUG: fetched()={len(fetched)}")
515 if len(fetched) == 0:
516 # DEBUG: print("DEBUG: Returned zero bytes, exiting loop:", domain)
518 elif len(fetched) != config.get("misskey_limit"):
519 # DEBUG: print(f"DEBUG: Fetched '{len(fetched)}' row(s) but expected: '{config.get('misskey_limit')}'")
520 offset = offset + (config.get("misskey_limit") - len(fetched))
522 # DEBUG: print("DEBUG: Raising offset by step:", step)
523 offset = offset + step
526 # DEBUG: print(f"DEBUG: fetched({len(fetched)})[]={type(fetched)}")
527 if isinstance(fetched, dict) and "error" in fetched and "message" in fetched["error"]:
528 print(f"WARNING: post_json_api() returned error: {fetched['error']['message']}")
529 update_last_error(domain, fetched["error"]["message"])
534 # DEBUG: print(f"DEBUG: row():{len(row)}")
535 if not "host" in row:
536 print(f"WARNING: row()={len(row)} does not contain key 'host': {row},domain='{domain}'")
538 elif type(row["host"]) != str:
539 print(f"WARNING: row[host][]={type(row['host'])} is not 'str'")
541 elif is_blacklisted(row["host"]):
542 # DEBUG: print(f"DEBUG: row[host]='{row['host']}' is blacklisted. domain='{domain}'")
544 elif row["host"] in peers:
545 # DEBUG: print(f"DEBUG: Not adding row[host]='{row['host']}', already found.")
546 already = already + 1
549 # DEBUG: print(f"DEBUG: Adding peer: '{row['host']}'")
550 peers.append(row["host"])
552 if already == len(fetched):
553 print(f"WARNING: Host returned same set of '{already}' instances, aborting loop!")
556 # DEBUG: print(f"DEBUG: Adding '{len(peers)}' for domain='{domain}'")
557 instances.set("total_peers", domain, len(peers))
559 # DEBUG: print(f"DEBUG: Updating last_instance_fetch for domain='{domain}' ...")
560 update_last_instance_fetch(domain)
562 # DEBUG: print("DEBUG: Returning peers[]:", type(peers))
564 elif software == "lemmy":
565 # DEBUG: print(f"DEBUG: domain='{domain}' is Lemmy, fetching JSON ...")
567 response = get_response(domain, "/api/v3/site", api_headers, (config.get("connection_timeout"), config.get("read_timeout")))
569 data = json_from_response(response)
571 # DEBUG: print(f"DEBUG: response.ok={response.ok},response.status_code='{response.status_code}',data[]='{type(data)}'")
572 if not response.ok or response.status_code >= 400:
573 print("WARNING: Could not reach any JSON API:", domain)
574 update_last_error(domain, response)
575 elif response.ok and isinstance(data, list):
576 # DEBUG: print(f"DEBUG: domain='{domain}' returned a list: '{data}'")
578 elif "federated_instances" in data:
579 # DEBUG: print(f"DEBUG: Found federated_instances for domain='{domain}'")
580 peers = peers + add_peers(data["federated_instances"])
581 # DEBUG: print("DEBUG: Added instance(s) to peers")
583 print("WARNING: JSON response does not contain 'federated_instances':", domain)
584 update_last_error(domain, response)
586 except BaseException as e:
587 print(f"WARNING: Exception during fetching JSON: domain='{domain}',exception[{type(e)}]:'{str(e)}'")
589 # DEBUG: print(f"DEBUG: Adding '{len(peers)}' for domain='{domain}'")
590 instances.set("total_peers", domain, len(peers))
592 # DEBUG: print(f"DEBUG: Updating last_instance_fetch for domain='{domain}' ...")
593 update_last_instance_fetch(domain)
595 # DEBUG: print("DEBUG: Returning peers[]:", type(peers))
597 elif software == "peertube":
598 # DEBUG: print(f"DEBUG: domain='{domain}' is a PeerTube, fetching JSON ...")
601 for mode in ["followers", "following"]:
602 # DEBUG: print(f"DEBUG: domain='{domain}',mode='{mode}'")
605 response = get_response(domain, "/api/v1/server/{mode}?start={start}&count=100", headers, (config.get("connection_timeout"), config.get("read_timeout")))
607 data = json_from_response(response)
608 # DEBUG: print(f"DEBUG: response.ok={response.ok},response.status_code='{response.status_code}',data[]='{type(data)}'")
609 if response.ok and isinstance(data, dict):
610 # DEBUG: print("DEBUG: Success, data:", len(data))
612 # DEBUG: print(f"DEBUG: Found {len(data['data'])} record(s).")
613 for record in data["data"]:
614 # DEBUG: print(f"DEBUG: record()={len(record)}")
615 if mode in record and "host" in record[mode]:
616 # DEBUG: print(f"DEBUG: Found host={record[mode]['host']}, adding ...")
617 peers.append(record[mode]["host"])
619 print(f"WARNING: record from '{domain}' has no '{mode}' or 'host' record: {record}")
621 if len(data["data"]) < 100:
622 # DEBUG: print("DEBUG: Reached end of JSON response:", domain)
625 # Continue with next row
628 except BaseException as e:
629 print(f"WARNING: Exception during fetching JSON: domain='{domain}',exception[{type(e)}]:'{str(e)}'")
631 # DEBUG: print(f"DEBUG: Adding '{len(peers)}' for domain='{domain}'")
632 instances.set("total_peers", domain, len(peers))
634 # DEBUG: print(f"DEBUG: Updating last_instance_fetch for domain='{domain}' ...")
635 update_last_instance_fetch(domain)
637 # DEBUG: print("DEBUG: Returning peers[]:", type(peers))
640 # DEBUG: print(f"DEBUG: Fetching get_peers_url='{get_peers_url}' from '{domain}' ...")
642 response = get_response(domain, get_peers_url, api_headers, (config.get("connection_timeout"), config.get("read_timeout")))
644 data = json_from_response(response)
646 # DEBUG: print(f"DEBUG: response.ok={response.ok},response.status_code={response.status_code},data[]='{type(data)}'")
647 if not response.ok or response.status_code >= 400:
648 # DEBUG: print(f"DEBUG: Was not able to fetch '{get_peers_url}', trying alternative ...")
649 response = get_response(domain, "/api/v3/site", api_headers, (config.get("connection_timeout"), config.get("read_timeout")))
651 data = json_from_response(response)
652 # DEBUG: print(f"DEBUG: response.ok={response.ok},response.status_code={response.status_code},data[]='{type(data)}'")
653 if not response.ok or response.status_code >= 400:
654 print("WARNING: Could not reach any JSON API:", domain)
655 update_last_error(domain, response)
656 elif response.ok and isinstance(data, list):
657 # DEBUG: print(f"DEBUG: domain='{domain}' returned a list: '{data}'")
659 elif "federated_instances" in data:
660 # DEBUG: print(f"DEBUG: Found federated_instances for domain='{domain}'")
661 peers = peers + add_peers(data["federated_instances"])
662 # DEBUG: print("DEBUG: Added instance(s) to peers")
664 print("WARNING: JSON response does not contain 'federated_instances':", domain)
665 update_last_error(domain, response)
667 # DEBUG: print("DEBUG: Querying API was successful:", domain, len(data))
670 except BaseException as e:
671 print("WARNING: Some error during get():", domain, e)
672 update_last_error(domain, e)
674 # DEBUG: print(f"DEBUG: Adding '{len(peers)}' for domain='{domain}'")
675 instances.set("total_peers", domain, len(peers))
677 # DEBUG: print(f"DEBUG: Updating last_instance_fetch for domain='{domain}' ...")
678 update_last_instance_fetch(domain)
680 # DEBUG: print("DEBUG: Returning peers[]:", type(peers))
683 def post_json_api(domain: str, path: str, parameter: str, extra_headers: dict = {}) -> dict:
684 if type(domain) != str:
685 raise ValueError(f"Parameter domain[]={type(domain)} is not 'str'")
687 raise ValueError(f"Parameter 'domain' cannot be empty")
688 elif type(path) != str:
689 raise ValueError(f"path[]={type(path)} is not 'str'")
691 raise ValueError("Parameter 'path' cannot be empty")
692 elif type(parameter) != str:
693 raise ValueError(f"parameter[]={type(parameter)} is not 'str'")
695 # DEBUG: print("DEBUG: Sending POST to domain,path,parameter:", domain, path, parameter, extra_headers)
698 response = reqto.post(
699 f"https://{domain}{path}",
701 headers={**api_headers, **extra_headers},
702 timeout=(config.get("connection_timeout"), config.get("read_timeout"))
705 data = json_from_response(response)
706 # DEBUG: print(f"DEBUG: response.ok={response.ok},response.status_code={response.status_code},data[]='{type(data)}'")
707 if not response.ok or response.status_code >= 400:
708 print(f"WARNING: Cannot query JSON API: domain='{domain}',path='{path}',parameter()={len(parameter)},response.status_code='{response.status_code}',data[]='{type(data)}'")
709 update_last_error(domain, response)
711 except BaseException as e:
712 print(f"WARNING: Some error during post(): domain='{domain}',path='{path}',parameter()={len(parameter)},exception[{type(e)}]:'{str(e)}'")
714 # DEBUG: print(f"DEBUG: Returning data({len(data)})=[]:{type(data)}")
717 def fetch_nodeinfo(domain: str, path: str = None) -> list:
718 # DEBUG: print(f"DEBUG: domain='{domain}',path={path} - CALLED!")
719 if type(domain) != str:
720 raise ValueError(f"Parameter domain[]={type(domain)} is not 'str'")
722 raise ValueError(f"Parameter 'domain' cannot be empty")
723 elif type(path) != str and path != None:
724 raise ValueError(f"Parameter path[]={type(path)} is not 'str'")
726 # DEBUG: print(f"DEBUG: Fetching nodeinfo from domain='{domain}' ...")
727 nodeinfo = fetch_wellknown_nodeinfo(domain)
729 # DEBUG: print(f"DEBUG: nodeinfo({len(nodeinfo)})={nodeinfo}")
730 if len(nodeinfo) > 0:
731 # DEBUG: print("DEBUG: nodeinfo()={len(nodeinfo))} - EXIT!")
735 "/nodeinfo/2.1.json",
737 "/nodeinfo/2.0.json",
744 for request in request_paths:
745 if path != None and path != "" and path != f"https://{domain}{path}":
746 # DEBUG: print(f"DEBUG: path='{path}' does not match request='{request}' - SKIPPED!")
750 # DEBUG: print(f"DEBUG: Fetching request='{request}' from domain='{domain}' ...")
751 response = get_response(domain, request, api_headers, (config.get("nodeinfo_connection_timeout"), config.get("nodeinfo_read_timeout")))
753 data = json_from_response(response)
754 # DEBUG: print(f"DEBUG: response.ok={response.ok},response.status_code={response.status_code},data[]='{type(data)}'")
755 if response.ok and isinstance(data, dict):
756 # DEBUG: print("DEBUG: Success:", request)
757 instances.set("detection_mode", domain, "STATIC_CHECK")
758 instances.set("nodeinfo_url" , domain, request)
760 elif response.ok and isinstance(data, list):
761 print(f"UNSUPPORTED: domain='{domain}' returned a list: '{data}'")
763 elif not response.ok or response.status_code >= 400:
764 print("WARNING: Failed fetching nodeinfo from domain:", domain)
765 update_last_error(domain, response)
768 except BaseException as e:
769 # DEBUG: print("DEBUG: Cannot fetch API request:", request)
770 update_last_error(domain, e)
773 # DEBUG: print(f"DEBUG: data()={len(data)} - EXIT!")
776 def fetch_wellknown_nodeinfo(domain: str) -> list:
777 # DEBUG: print(f"DEBUG: domain='{domain}' - CALLED!")
778 if type(domain) != str:
779 raise ValueError(f"Parameter domain[]={type(domain)} is not 'str'")
781 raise ValueError(f"Parameter 'domain' cannot be empty")
783 # DEBUG: print("DEBUG: Fetching .well-known info for domain:", domain)
787 response = get_response(domain, "/.well-known/nodeinfo", api_headers, (config.get("nodeinfo_connection_timeout"), config.get("nodeinfo_read_timeout")))
789 data = json_from_response(response)
790 # DEBUG: print("DEBUG: domain,response.ok,data[]:", domain, response.ok, type(data))
791 if response.ok and isinstance(data, dict):
793 # DEBUG: print("DEBUG: Found entries:", len(nodeinfo), domain)
794 if "links" in nodeinfo:
795 # DEBUG: print("DEBUG: Found links in nodeinfo():", len(nodeinfo["links"]))
796 for link in nodeinfo["links"]:
797 # DEBUG: print("DEBUG: rel,href:", link["rel"], link["href"])
798 if link["rel"] in nodeinfo_identifier:
799 # DEBUG: print("DEBUG: Fetching nodeinfo from:", link["href"])
800 response = get_url(link["href"], api_headers, (config.get("connection_timeout"), config.get("read_timeout")))
802 data = json_from_response(response)
803 # DEBUG: print("DEBUG: href,response.ok,response.status_code:", link["href"], response.ok, response.status_code)
804 if response.ok and isinstance(data, dict):
805 # DEBUG: print("DEBUG: Found JSON nodeinfo():", len(data))
806 instances.set("detection_mode", domain, "AUTO_DISCOVERY")
807 instances.set("nodeinfo_url" , domain, link["href"])
810 print("WARNING: Unknown 'rel' value:", domain, link["rel"])
812 print("WARNING: nodeinfo does not contain 'links':", domain)
814 except BaseException as e:
815 print("WARNING: Failed fetching .well-known info:", domain)
816 update_last_error(domain, e)
819 # DEBUG: print("DEBUG: Returning data[]:", type(data))
822 def fetch_generator_from_path(domain: str, path: str = "/") -> str:
823 # DEBUG: print(f"DEBUG: domain({len(domain)})={domain},path={path} - CALLED!")
824 if type(domain) != str:
825 raise ValueError(f"Parameter domain[]={type(domain)} is not 'str'")
827 raise ValueError(f"Parameter 'domain' cannot be empty")
828 elif type(path) != str:
829 raise ValueError(f"path[]={type(path)} is not 'str'")
831 raise ValueError(f"Parameter 'domain' cannot be empty")
833 # DEBUG: print(f"DEBUG: domain='{domain}',path='{path}' - CALLED!")
837 # DEBUG: print(f"DEBUG: Fetching path='{path}' from '{domain}' ...")
838 response = get_response(domain, path, headers, (config.get("connection_timeout"), config.get("read_timeout")))
840 # DEBUG: print("DEBUG: domain,response.ok,response.status_code,response.text[]:", domain, response.ok, response.status_code, type(response.text))
841 if response.ok and response.status_code < 300 and len(response.text) > 0:
842 # DEBUG: print("DEBUG: Search for <meta name='generator'>:", domain)
843 doc = bs4.BeautifulSoup(response.text, "html.parser")
845 # DEBUG: print("DEBUG: doc[]:", type(doc))
846 generator = doc.find("meta", {"name": "generator"})
847 site_name = doc.find("meta", {"property": "og:site_name"})
849 # DEBUG: print(f"DEBUG: generator='{generator}',site_name='{site_name}'")
850 if isinstance(generator, bs4.element.Tag):
851 # DEBUG: print("DEBUG: Found generator meta tag:", domain)
852 software = tidyup_domain(generator.get("content"))
853 print(f"INFO: domain='{domain}' is generated by '{software}'")
854 instances.set("detection_mode", domain, "GENERATOR")
855 remove_pending_error(domain)
856 elif isinstance(site_name, bs4.element.Tag):
857 # DEBUG: print("DEBUG: Found property=og:site_name:", domain)
858 sofware = tidyup_domain(site_name.get("content"))
859 print(f"INFO: domain='{domain}' has og:site_name='{software}'")
860 instances.set("detection_mode", domain, "SITE_NAME")
861 remove_pending_error(domain)
863 except BaseException as e:
864 # DEBUG: print(f"DEBUG: Cannot fetch / from '{domain}':", e)
865 update_last_error(domain, e)
868 # DEBUG: print(f"DEBUG: software[]={type(software)}")
869 if type(software) is str and software == "":
870 # DEBUG: print(f"DEBUG: Corrected empty string to None for software of domain='{domain}'")
872 elif type(software) is str and ("." in software or " " in software):
873 # DEBUG: print(f"DEBUG: software='{software}' may contain a version number, domain='{domain}', removing it ...")
874 software = remove_version(software)
876 # DEBUG: print(f"DEBUG: software[]={type(software)}")
877 if type(software) is str and " powered by " in software:
878 # DEBUG: print(f"DEBUG: software='{software}' has 'powered by' in it")
879 software = remove_version(strip_powered_by(software))
880 elif type(software) is str and " hosted on " in software:
881 # DEBUG: print(f"DEBUG: software='{software}' has 'hosted on' in it")
882 software = remove_version(strip_hosted_on(software))
883 elif type(software) is str and " by " in software:
884 # DEBUG: print(f"DEBUG: software='{software}' has ' by ' in it")
885 software = strip_until(software, " by ")
886 elif type(software) is str and " see " in software:
887 # DEBUG: print(f"DEBUG: software='{software}' has ' see ' in it")
888 software = strip_until(software, " see ")
890 # DEBUG: print(f"DEBUG: software='{software}' - EXIT!")
893 def determine_software(domain: str, path: str = None) -> str:
894 # DEBUG: print(f"DEBUG: domain({len(domain)})={domain},path={path} - CALLED!")
895 if type(domain) != str:
896 raise ValueError(f"Parameter domain[]={type(domain)} is not 'str'")
898 raise ValueError(f"Parameter 'domain' cannot be empty")
899 elif type(path) != str and path != None:
900 raise ValueError(f"Parameter path[]={type(path)} is not 'str'")
902 # DEBUG: print("DEBUG: Determining software for domain,path:", domain, path)
905 # DEBUG: print(f"DEBUG: Fetching nodeinfo from '{domain}' ...")
906 data = fetch_nodeinfo(domain, path)
908 # DEBUG: print("DEBUG: data[]:", type(data))
909 if not isinstance(data, dict) or len(data) == 0:
910 # DEBUG: print("DEBUG: Could not determine software type:", domain)
911 return fetch_generator_from_path(domain)
913 # DEBUG: print("DEBUG: data():", len(data), data)
914 if "status" in data and data["status"] == "error" and "message" in data:
915 print("WARNING: JSON response is an error:", data["message"])
916 update_last_error(domain, data["message"])
917 return fetch_generator_from_path(domain)
918 elif "message" in data:
919 print("WARNING: JSON response contains only a message:", data["message"])
920 update_last_error(domain, data["message"])
921 return fetch_generator_from_path(domain)
922 elif "software" not in data or "name" not in data["software"]:
923 # DEBUG: print(f"DEBUG: JSON response from domain='{domain}' does not include [software][name], fetching / ...")
924 software = fetch_generator_from_path(domain)
926 # DEBUG: print(f"DEBUG: Generator for domain='{domain}' is: {software}, EXIT!")
929 software = tidyup_domain(data["software"]["name"])
931 # DEBUG: print("DEBUG: sofware after tidyup_domain():", software)
932 if software in ["akkoma", "rebased"]:
933 # DEBUG: print("DEBUG: Setting pleroma:", domain, software)
935 elif software in ["hometown", "ecko"]:
936 # DEBUG: print("DEBUG: Setting mastodon:", domain, software)
937 software = "mastodon"
938 elif software in ["calckey", "groundpolis", "foundkey", "cherrypick", "meisskey"]:
939 # DEBUG: print("DEBUG: Setting misskey:", domain, software)
941 elif software.find("/") > 0:
942 print("WARNING: Spliting of slash:", software)
943 software = software.split("/")[-1];
944 elif software.find("|") > 0:
945 print("WARNING: Spliting of pipe:", software)
946 software = tidyup_domain(software.split("|")[0]);
947 elif "powered by" in software:
948 # DEBUG: print(f"DEBUG: software='{software}' has 'powered by' in it")
949 software = strip_powered_by(software)
950 elif type(software) is str and " by " in software:
951 # DEBUG: print(f"DEBUG: software='{software}' has ' by ' in it")
952 software = strip_until(software, " by ")
953 elif type(software) is str and " see " in software:
954 # DEBUG: print(f"DEBUG: software='{software}' has ' see ' in it")
955 software = strip_until(software, " see ")
957 # DEBUG: print(f"DEBUG: software[]={type(software)}")
959 print("WARNING: tidyup_domain() left no software name behind:", domain)
962 # DEBUG: print(f"DEBUG: software[]={type(software)}")
963 if str(software) == "":
964 # DEBUG: print(f"DEBUG: software for '{domain}' was not detected, trying generator ...")
965 software = fetch_generator_from_path(domain)
966 elif len(str(software)) > 0 and ("." in software or " " in software):
967 # DEBUG: print(f"DEBUG: software='{software}' may contain a version number, domain='{domain}', removing it ...")
968 software = remove_version(software)
970 # DEBUG: print(f"DEBUG: software[]={type(software)}")
971 if type(software) is str and "powered by" in software:
972 # DEBUG: print(f"DEBUG: software='{software}' has 'powered by' in it")
973 software = remove_version(strip_powered_by(software))
975 # DEBUG: print("DEBUG: Returning domain,software:", domain, software)
978 def update_block_reason(reason: str, blocker: str, blocked: str, block_level: str):
979 # DEBUG: print(f"DEBUG: reason='{reason}',blocker={blocker},blocked={blocked},block_level={block_level} - CALLED!")
980 if type(reason) != str and reason != None:
981 raise ValueError(f"Parameter reason[]='{type(reason)}' is not 'str'")
982 elif type(blocker) != str:
983 raise ValueError(f"Parameter blocker[]='{type(blocker)}' is not 'str'")
984 elif type(blocked) != str:
985 raise ValueError(f"Parameter blocked[]='{type(blocked)}' is not 'str'")
986 elif type(block_level) != str:
987 raise ValueError(f"Parameter block_level[]='{type(block_level)}' is not 'str'")
989 # DEBUG: print("DEBUG: Updating block reason:", reason, blocker, blocked, block_level)
992 "UPDATE blocks SET reason = ?, last_seen = ? WHERE blocker = ? AND blocked = ? AND block_level = ? AND reason IN ('','unknown') LIMIT 1",
1002 # DEBUG: print(f"DEBUG: cursor.rowcount={cursor.rowcount}")
1003 if cursor.rowcount == 0:
1004 # DEBUG: print(f"DEBUG: Did not update any rows: blocker='{blocker}',blocked='{blocked}',block_level='{block_level}',reason='{reason}' - EXIT!")
1007 except BaseException as e:
1008 print(f"ERROR: failed SQL query: reason='{reason}',blocker='{blocker}',blocked='{blocked}',block_level='{block_level}',exception[{type(e)}]:'{str(e)}'")
1011 # DEBUG: print("DEBUG: EXIT!")
1013 def update_last_seen(blocker: str, blocked: str, block_level: str):
1014 # DEBUG: print("DEBUG: Updating last_seen for:", blocker, blocked, block_level)
1017 "UPDATE blocks SET last_seen = ? WHERE blocker = ? AND blocked = ? AND block_level = ? LIMIT 1",
1026 # DEBUG: print(f"DEBUG: cursor.rowcount={cursor.rowcount}")
1027 if cursor.rowcount == 0:
1028 # DEBUG: print(f"DEBUG: Did not update any rows: blocker='{blocker}',blocked='{blocked}',block_level='{block_level}' - EXIT!")
1031 except BaseException as e:
1032 print(f"ERROR: failed SQL query: blocker='{blocker}',blocked='{blocked}',block_level='{block_level}',exception[{type(e)}]:'{str(e)}'")
1035 # DEBUG: print("DEBUG: EXIT!")
1037 def is_instance_blocked(blocker: str, blocked: str, block_level: str) -> bool:
1038 # DEBUG: print(f"DEBUG: blocker={blocker},blocked={blocked},block_level={block_level} - CALLED!")
1039 if type(blocker) != str:
1040 raise ValueError(f"Parameter blocker[]={type(blocker)} is not of type 'str'")
1042 raise ValueError("Parameter 'blocker' cannot be empty")
1043 elif type(blocked) != str:
1044 raise ValueError(f"Parameter blocked[]={type(blocked)} is not of type 'str'")
1046 raise ValueError("Parameter 'blocked' cannot be empty")
1047 elif type(block_level) != str:
1048 raise ValueError(f"Parameter block_level[]={type(block_level)} is not of type 'str'")
1049 elif block_level == "":
1050 raise ValueError("Parameter 'block_level' cannot be empty")
1053 "SELECT * FROM blocks WHERE blocker = ? AND blocked = ? AND block_level = ? LIMIT 1",
1061 is_blocked = cursor.fetchone() != None
1063 # DEBUG: print(f"DEBUG: is_blocked='{is_blocked}' - EXIT!")
1066 def block_instance(blocker: str, blocked: str, reason: str, block_level: str):
1067 # DEBUG: print("DEBUG: blocker,blocked,reason,block_level:", blocker, blocked, reason, block_level)
1068 if type(blocker) != str:
1069 raise ValueError(f"Parameter blocker[]={type(blocker)} is not 'str'")
1071 raise ValueError(f"Parameter 'blocker' cannot be empty")
1072 elif not validators.domain(blocker.split("/")[0]):
1073 raise ValueError(f"Bad blocker='{blocker}'")
1074 elif type(blocked) != str:
1075 raise ValueError(f"Parameter blocked[]={type(blocked)} is not 'str'")
1077 raise ValueError(f"Parameter 'blocked' cannot be empty")
1078 elif not validators.domain(blocked.split("/")[0]):
1079 raise ValueError(f"Bad blocked='{blocked}'")
1080 elif is_blacklisted(blocker):
1081 raise Exception(f"blocker='{blocker}' is blacklisted but function invoked")
1082 elif is_blacklisted(blocked):
1083 raise Exception(f"blocked='{blocked}' is blacklisted but function invoked")
1086 # Maybe needs cleaning
1087 reason = tidyup_reason(reason)
1089 print(f"INFO: New block: blocker='{blocker}',blocked='{blocked}', reason='{reason}', block_level='{block_level}'")
1092 "INSERT INTO blocks (blocker, blocked, reason, block_level, first_seen, last_seen) VALUES(?, ?, ?, ?, ?, ?)",
1102 except BaseException as e:
1103 print(f"ERROR: failed SQL query: blocker='{blocker}',blocked='{blocked}',reason='{reason}',block_level='{block_level}',exception[{type(e)}]:'{str(e)}'")
1106 # DEBUG: print("DEBUG: EXIT!")
1108 def is_instance_registered(domain: str) -> bool:
1109 # DEBUG: print(f"DEBUG: domain='{domain}' - CALLED!")
1110 if type(domain) != str:
1111 raise ValueError(f"Parameter domain[]={type(domain)} is not 'str'")
1113 raise ValueError(f"Parameter 'domain' cannot be empty")
1115 # DEBUG: print(f"DEBUG: domain='{domain}' - CALLED!")
1116 if not cache.key_exists("is_registered"):
1117 # DEBUG: print(f"DEBUG: Cache for 'is_registered' not initialized, fetching all rows ...")
1119 cursor.execute("SELECT domain FROM instances")
1122 cache.set_all("is_registered", cursor.fetchall(), True)
1123 except BaseException as e:
1124 print(f"ERROR: failed SQL query: domain='{domain}',exception[{type(e)}]:'{str(e)}'")
1128 registered = cache.sub_key_exists("is_registered", domain)
1130 # DEBUG: print(f"DEBUG: registered='{registered}' - EXIT!")
1133 def add_instance(domain: str, origin: str, originator: str, path: str = None):
1134 # DEBUG: print(f"DEBUG: domain='{domain}',origin='{origin}',originator='{originator}',path='{path}' - CALLED!")
1135 if type(domain) != str:
1136 raise ValueError(f"Parameter domain[]={type(domain)} is not 'str'")
1138 raise ValueError(f"Parameter 'domain' cannot be empty")
1139 elif type(origin) != str and origin != None:
1140 raise ValueError(f"origin[]={type(origin)} is not 'str'")
1141 elif type(originator) != str:
1142 raise ValueError(f"originator[]={type(originator)} is not 'str'")
1143 elif originator == "":
1144 raise ValueError(f"originator cannot be empty")
1145 elif not validators.domain(domain.split("/")[0]):
1146 raise ValueError(f"Bad domain name='{domain}'")
1147 elif origin is not None and not validators.domain(origin.split("/")[0]):
1148 raise ValueError(f"Bad origin name='{origin}'")
1149 elif is_blacklisted(domain):
1150 raise Exception(f"domain='{domain}' is blacklisted, but method invoked")
1152 # DEBUG: print("DEBUG: domain,origin,originator,path:", domain, origin, originator, path)
1153 software = determine_software(domain, path)
1154 # DEBUG: print("DEBUG: Determined software:", software)
1156 print(f"INFO: Adding instance domain='{domain}' (origin='{origin}',software='{software}')")
1159 "INSERT INTO instances (domain, origin, originator, hash, software, first_seen) VALUES (?, ?, ?, ?, ?, ?)",
1170 cache.set_sub_key("is_registered", domain, True)
1172 if instances.has_pending_instance_data(domain):
1173 # DEBUG: print(f"DEBUG: domain='{domain}' has pending nodeinfo being updated ...")
1174 instances.set("last_status_code" , domain, None)
1175 instances.set("last_error_details", domain, None)
1176 instances.update_instance_data(domain)
1177 remove_pending_error(domain)
1179 if domain in pending_errors:
1180 # DEBUG: print("DEBUG: domain has pending error being updated:", domain)
1181 update_last_error(domain, pending_errors[domain])
1182 remove_pending_error(domain)
1184 except BaseException as e:
1185 print(f"ERROR: failed SQL query: domain='{domain}',exception[{type(e)}]:'{str(e)}'")
1188 # DEBUG: print("DEBUG: Updating nodeinfo for domain:", domain)
1189 update_last_nodeinfo(domain)
1191 # DEBUG: print("DEBUG: EXIT!")
1193 def send_bot_post(instance: str, blocks: dict):
1194 # DEBUG: print(f"DEBUG: instance={instance},blocks()={len(blocks)} - CALLED!")
1195 message = instance + " has blocked the following instances:\n\n"
1198 if len(blocks) > 20:
1200 blocks = blocks[0 : 19]
1202 for block in blocks:
1203 if block["reason"] == None or block["reason"] == '':
1204 message = message + block["blocked"] + " with unspecified reason\n"
1206 if len(block["reason"]) > 420:
1207 block["reason"] = block["reason"][0:419] + "[…]"
1209 message = message + block["blocked"] + ' for "' + block["reason"].replace("@", "@\u200b") + '"\n'
1212 message = message + "(the list has been truncated to the first 20 entries)"
1214 botheaders = {**api_headers, **{"Authorization": "Bearer " + config.get("bot_token")}}
1217 f"{config.get('bot_instance')}/api/v1/statuses",
1220 "visibility" : config.get('bot_visibility'),
1221 "content_type": "text/plain"
1229 def get_mastodon_blocks(domain: str) -> dict:
1230 # DEBUG: print(f"DEBUG: domain='{domain}' - CALLED!")
1231 if type(domain) != str:
1232 raise ValueError(f"Parameter domain[]={type(domain)} is not 'str'")
1234 raise ValueError(f"Parameter 'domain' cannot be empty")
1236 # DEBUG: print("DEBUG: Fetching mastodon blocks from domain:", domain)
1238 "Suspended servers": [],
1239 "Filtered media" : [],
1240 "Limited servers" : [],
1241 "Silenced servers" : [],
1245 doc = bs4.BeautifulSoup(
1246 get_response(domain, "/about/more", headers, (config.get("connection_timeout"), config.get("read_timeout"))).text,
1249 except BaseException as e:
1250 print("ERROR: Cannot fetch from domain:", domain, e)
1251 update_last_error(domain, e)
1254 for header in doc.find_all("h3"):
1255 header_text = tidyup_reason(header.text)
1257 # DEBUG: print(f"DEBUG: header_text='{header_text}'")
1258 if header_text in language_mapping:
1259 # DEBUG: print(f"DEBUG: header_text='{header_text}'")
1260 header_text = language_mapping[header_text]
1262 print(f"WARNING: header_text='{header_text}' not found in language mapping table")
1264 if header_text in blocks or header_text.lower() in blocks:
1265 # replaced find_next_siblings with find_all_next to account for instances that e.g. hide lists in dropdown menu
1266 for line in header.find_all_next("table")[0].find_all("tr")[1:]:
1267 blocks[header_text].append(
1269 "domain": tidyup_domain(line.find("span").text),
1270 "hash" : tidyup_domain(line.find("span")["title"][9:]),
1271 "reason": tidyup_domain(line.find_all("td")[1].text),
1275 print(f"WARNING: header_text='{header_text}' not found in blocks()={len(blocks)}")
1277 # DEBUG: print("DEBUG: Returning blocks for domain:", domain)
1279 "reject" : blocks["Suspended servers"],
1280 "media_removal" : blocks["Filtered media"],
1281 "followers_only": blocks["Limited servers"] + blocks["Silenced servers"],
1284 def get_friendica_blocks(domain: str) -> dict:
1285 # DEBUG: print(f"DEBUG: domain='{domain}' - CALLED!")
1286 if type(domain) != str:
1287 raise ValueError(f"Parameter domain[]={type(domain)} is not 'str'")
1289 raise ValueError(f"Parameter 'domain' cannot be empty")
1291 # DEBUG: print("DEBUG: Fetching friendica blocks from domain:", domain)
1295 doc = bs4.BeautifulSoup(
1296 get_response(domain, "/friendica", headers, (config.get("connection_timeout"), config.get("read_timeout"))).text,
1299 except BaseException as e:
1300 print("WARNING: Failed to fetch /friendica from domain:", domain, e)
1301 update_last_error(domain, e)
1304 blocklist = doc.find(id="about_blocklist")
1306 # Prevents exceptions:
1307 if blocklist is None:
1308 # DEBUG: print("DEBUG: Instance has no block list:", domain)
1311 for line in blocklist.find("table").find_all("tr")[1:]:
1312 # DEBUG: print(f"DEBUG: line='{line}'")
1314 "domain": tidyup_domain(line.find_all("td")[0].text),
1315 "reason": tidyup_domain(line.find_all("td")[1].text)
1318 # DEBUG: print("DEBUG: Returning blocks() for domain:", domain, len(blocks))
1323 def get_misskey_blocks(domain: str) -> dict:
1324 # DEBUG: print(f"DEBUG: domain='{domain}' - CALLED!")
1325 if type(domain) != str:
1326 raise ValueError(f"Parameter domain[]={type(domain)} is not 'str'")
1328 raise ValueError(f"Parameter 'domain' cannot be empty")
1330 # DEBUG: print("DEBUG: Fetching misskey blocks from domain:", domain)
1337 step = config.get("misskey_limit")
1339 # iterating through all "suspended" (follow-only in its terminology)
1340 # instances page-by-page, since that troonware doesn't support
1341 # sending them all at once
1343 # DEBUG: print(f"DEBUG: Fetching offset='{offset}' from '{domain}' ...")
1345 # DEBUG: print("DEBUG: Sending JSON API request to domain,step,offset:", domain, step, offset)
1346 fetched = post_json_api(domain, "/api/federation/instances", json.dumps({
1355 # DEBUG: print("DEBUG: Sending JSON API request to domain,step,offset:", domain, step, offset)
1356 fetched = post_json_api(domain, "/api/federation/instances", json.dumps({
1361 "offset" : offset - 1
1366 # DEBUG: print("DEBUG: fetched():", len(fetched))
1367 if len(fetched) == 0:
1368 # DEBUG: print("DEBUG: Returned zero bytes, exiting loop:", domain)
1370 elif len(fetched) != config.get("misskey_limit"):
1371 # DEBUG: print(f"DEBUG: Fetched '{len(fetched)}' row(s) but expected: '{config.get('misskey_limit')}'")
1372 offset = offset + (config.get("misskey_limit") - len(fetched))
1374 # DEBUG: print("DEBUG: Raising offset by step:", step)
1375 offset = offset + step
1378 for instance in fetched:
1380 if instance["isSuspended"] and not has_key(blocks["suspended"], "domain", instance):
1382 blocks["suspended"].append(
1384 "domain": tidyup_domain(instance["host"]),
1385 # no reason field, nothing
1390 # DEBUG: print(f"DEBUG: count={count}")
1392 # DEBUG: print(f"DEBUG: API is no more returning new instances, aborting loop!")
1395 except BaseException as e:
1396 print("WARNING: Caught error, exiting loop:", domain, e)
1397 update_last_error(domain, e)
1402 # same shit, different asshole ("blocked" aka full suspend)
1405 # DEBUG: print("DEBUG: Sending JSON API request to domain,step,offset:", domain, step, offset)
1406 fetched = post_json_api(domain,"/api/federation/instances", json.dumps({
1415 # DEBUG: print("DEBUG: Sending JSON API request to domain,step,offset:", domain, step, offset)
1416 fetched = post_json_api(domain,"/api/federation/instances", json.dumps({
1421 "offset" : offset - 1
1426 # DEBUG: print("DEBUG: fetched():", len(fetched))
1427 if len(fetched) == 0:
1428 # DEBUG: print("DEBUG: Returned zero bytes, exiting loop:", domain)
1430 elif len(fetched) != config.get("misskey_limit"):
1431 # DEBUG: print(f"DEBUG: Fetched '{len(fetched)}' row(s) but expected: '{config.get('misskey_limit')}'")
1432 offset = offset + (config.get("misskey_limit") - len(fetched))
1434 # DEBUG: print("DEBUG: Raising offset by step:", step)
1435 offset = offset + step
1438 for instance in fetched:
1440 if instance["isBlocked"] and not has_key(blocks["blocked"], "domain", instance):
1442 blocks["blocked"].append({
1443 "domain": tidyup_domain(instance["host"]),
1447 # DEBUG: print(f"DEBUG: count={count}")
1449 # DEBUG: print(f"DEBUG: API is no more returning new instances, aborting loop!")
1452 except BaseException as e:
1453 print("ERROR: Exception during POST:", domain, e)
1454 update_last_error(domain, e)
1458 # DEBUG: print(f"DEBUG: Updating last_instance_fetch for domain='{domain}' ...")
1459 update_last_instance_fetch(domain)
1461 # DEBUG: print("DEBUG: Returning for domain,blocked(),suspended():", domain, len(blocks["blocked"]), len(blocks["suspended"]))
1463 "reject" : blocks["blocked"],
1464 "followers_only": blocks["suspended"]
1467 def tidyup_reason(reason: str) -> str:
1468 # DEBUG: print(f"DEBUG: reason='{reason}' - CALLED!")
1469 if type(reason) != str:
1470 raise ValueError(f"Parameter reason[]={type(reason)} is not expected")
1473 reason = reason.strip()
1476 reason = re.sub("â", "\"", reason)
1478 ## DEBUG: print(f"DEBUG: reason='{reason}' - EXIT!")
1481 def tidyup_domain(domain: str) -> str:
1482 # DEBUG: print(f"DEBUG: domain='{domain}' - CALLED!")
1483 if type(domain) != str:
1484 raise ValueError(f"Parameter domain[]={type(domain)} is not expected")
1486 # All lower-case and strip spaces out + last dot
1487 domain = domain.lower().strip().rstrip(".")
1490 domain = re.sub("\:\d+$", "", domain)
1492 # No protocol, sometimes without the slashes
1493 domain = re.sub("^https?\:(\/*)", "", domain)
1496 domain = re.sub("\/$", "", domain)
1499 domain = re.sub("^\@", "", domain)
1501 # No individual users in block lists
1502 domain = re.sub("(.+)\@", "", domain)
1504 # DEBUG: print(f"DEBUG: domain='{domain}' - EXIT!")
1507 def json_from_response(response: requests.models.Response) -> list:
1508 # DEBUG: print(f"DEBUG: response[]={type(response)} - CALLED!")
1509 if not isinstance(response, requests.models.Response):
1510 raise ValueError(f"Parameter response[]='{type(response)}' is not type of 'Response'")
1513 if response.text.strip() != "":
1514 # DEBUG: print(f"DEBUG: response.text()={len(response.text)} is not empty, invoking response.json() ...")
1516 data = response.json()
1517 except json.decoder.JSONDecodeError:
1520 # DEBUG: print(f"DEBUG: data[]={type(data)} - EXIT!")
1523 def get_response(domain: str, path: str, headers: dict, timeout: list) -> requests.models.Response:
1524 # DEBUG: print(f"DEBUG: domain='{domain}',path='{path}',headers()={len(headers)},timeout={timeout} - CALLED!")
1525 if type(domain) != str:
1526 raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
1528 raise ValueError("Parameter 'domain' cannot be empty")
1529 elif type(path) != str:
1530 raise ValueError(f"Parameter path[]='{type(path)}' is not 'str'")
1532 raise ValueError("Parameter 'path' cannot be empty")
1535 # DEBUG: print(f"DEBUG: Sending request to '{domain}{path}' ...")
1536 response = reqto.get(
1537 f"https://{domain}{path}",
1541 except requests.exceptions.ConnectionError as e:
1542 # DEBUG: print(f"DEBUG: Fetching '{path}' from '{domain}' failed. exception[{type(e)}]='{str(e)}'")
1543 update_last_error(domain, e)
1546 # DEBUG: print(f"DEBUG: response[]='{type(response)}' - EXXIT!")
1549 def has_key(keys: list, search: str, value: any) -> bool:
1550 # DEBUG: print(f"DEBUG: keys()={len(keys)},search='{search}',value[]='{type(value)}' - CALLED!")
1551 if type(keys) != list:
1552 raise ValueError(f"Parameter keys[]='{type(keys)}' is not 'list'")
1553 elif type(search) != str:
1554 raise ValueError(f"Parameter search[]='{type(search)}' is not 'str'")
1556 raise ValueError("Parameter 'search' cannot be empty")
1559 # DEBUG: print(f"DEBUG: Checking keys()={len(keys)} ...")
1561 # DEBUG: print(f"DEBUG: key['{type(key)}']={key}")
1562 if type(key) != dict:
1563 raise ValueError(f"key[]='{type(key)}' is not 'dict'")
1564 elif not search in key:
1565 raise KeyError(f"Cannot find search='{search}'")
1566 elif key[search] == value:
1570 # DEBUG: print(f"DEBUG: has={has} - EXIT!")
1573 def find_domains(tag: bs4.element.Tag) -> list:
1574 # DEBUG: print(f"DEBUG: tag[]={type(tag)} - CALLED!")
1575 if not isinstance(tag, bs4.element.Tag):
1576 raise ValueError(f"Parameter tag[]={type(tag)} is not type of bs4.element.Tag")
1577 elif not isinstance(tag, bs4.element.Tag):
1578 raise KeyError("Cannot find table with instances!")
1579 elif len(tag.select("tr")) == 0:
1580 raise KeyError("No table rows found in table!")
1583 for element in tag.select("tr"):
1584 # DEBUG: print(f"DEBUG: element[]={type(element)}")
1585 if not element.find("td"):
1586 # DEBUG: print("DEBUG: Skipping element, no <td> found")
1589 domain = tidyup_domain(element.find("td").text)
1590 reason = tidyup_reason(element.findAll("td")[1].text)
1592 # DEBUG: print(f"DEBUG: domain='{domain}',reason='{reason}'")
1594 if is_blacklisted(domain):
1595 print(f"WARNING: domain='{domain}' is blacklisted - skipped!")
1597 elif domain == "gab.com/.ai, develop.gab.com":
1598 # DEBUG: print(f"DEBUG: Multiple domains detected in one row")
1600 "domain": "gab.com",
1608 "domain": "develop.gab.com",
1612 elif not validators.domain(domain):
1613 print(f"WARNING: domain='{domain}' is not a valid domain - skipped!")
1616 # DEBUG: print(f"DEBUG: Adding domain='{domain}' ...")
1622 # DEBUG: print(f"DEBUG: domains()={len(domains)} - EXIT!")
1625 def get_url(url: str, headers: dict, timeout: list) -> requests.models.Response:
1626 # DEBUG: print(f"DEBUG: url='{url}',headers()={len(headers)},timeout={timeout} - CALLED!")
1627 if type(url) != str:
1628 raise ValueError(f"Parameter url[]='{type(url)}' is not 'str'")
1630 raise ValueError("Parameter 'url' cannot be empty")
1632 # DEBUG: print(f"DEBUG: Parsing url='{url}'")
1633 components = urlparse(url)
1635 # Invoke other function, avoid trailing ?
1636 # DEBUG: print(f"DEBUG: components[{type(components)}]={components}")
1637 if components.query != "":
1638 response = get_response(components.hostname, f"{components.path}?{components.query}", headers, timeout)
1640 response = get_response(components.hostname, f"{components.path}", headers, timeout)
1642 # DEBUG: print(f"DEBUG: response[]='{type(response)}' - EXXIT!")