1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU Affero General Public License for more details.
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <https://www.gnu.org/licenses/>.
28 from urllib.parse import urlparse
30 from fba import blacklist
32 from fba import config
33 from fba import instances
35 from fba.federation import lemmy
36 from fba.federation import misskey
37 from fba.federation import peertube
39 # Array with pending errors needed to be written to database
43 # "rel" identifiers (no real URLs)
44 nodeinfo_identifier = [
45 "https://nodeinfo.diaspora.software/ns/schema/2.1",
46 "https://nodeinfo.diaspora.software/ns/schema/2.0",
47 "https://nodeinfo.diaspora.software/ns/schema/1.1",
48 "https://nodeinfo.diaspora.software/ns/schema/1.0",
49 "http://nodeinfo.diaspora.software/ns/schema/2.1",
50 "http://nodeinfo.diaspora.software/ns/schema/2.0",
51 "http://nodeinfo.diaspora.software/ns/schema/1.1",
52 "http://nodeinfo.diaspora.software/ns/schema/1.0",
55 # HTTP headers for non-API requests
57 "User-Agent": config.get("useragent"),
60 # HTTP headers for API requests
62 "User-Agent": config.get("useragent"),
63 "Content-Type": "application/json",
67 connection = sqlite3.connect("blocks.db")
68 cursor = connection.cursor()
70 # Pattern instance for version numbers
72 # semantic version number (with v|V) prefix)
73 re.compile("^(?P<version>v|V{0,1})(\.{0,1})(?P<major>0|[1-9]\d*)\.(?P<minor>0+|[1-9]\d*)(\.(?P<patch>0+|[1-9]\d*)(?:-(?P<prerelease>(?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*)(?:\.(?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*))*))?(?:\+(?P<buildmetadata>[0-9a-zA-Z-]+(?:\.[0-9a-zA-Z-]+)*))?)?$"),
74 # non-sematic, e.g. 1.2.3.4
75 re.compile("^(?P<version>v|V{0,1})(\.{0,1})(?P<major>0|[1-9]\d*)\.(?P<minor>0+|[1-9]\d*)(\.(?P<patch>0+|[1-9]\d*)(\.(?P<subpatch>0|[1-9]\d*))?)$"),
76 # non-sematic, e.g. 2023-05[-dev]
77 re.compile("^(?P<year>[1-9]{1}[0-9]{3})\.(?P<month>[0-9]{2})(-dev){0,1}$"),
78 # non-semantic, e.g. abcdef0
79 re.compile("^[a-f0-9]{7}$"),
82 ##### Other functions #####
84 def is_primitive(var: any) -> bool:
85 # DEBUG: print(f"DEBUG: var[]='{type(var)}' - CALLED!")
86 return type(var) in {int, str, float, bool} or var == None
88 def fetch_instances(domain: str, origin: str, software: str, script: str, path: str = None):
89 # DEBUG: print(f"DEBUG: domain='{domain}',origin='{origin}',software='{software}',path='{path}' - CALLED!")
90 if type(domain) != str:
91 raise ValueError(f"Parameter domain[]={type(domain)} is not 'str'")
93 raise ValueError(f"Parameter 'domain' is empty")
94 elif type(origin) != str and origin != None:
95 raise ValueError(f"Parameter origin[]={type(origin)} is not 'str'")
96 elif software == None:
97 print(f"DEBUG: software for domain='{domain}' is not set, determining ...")
98 software = determine_software(domain, path)
99 print(f"DEBUG: Determined software='{software}' for domain='{domain}'")
100 elif type(software) != str:
101 raise ValueError(f"Parameter software[]={type(software)} is not 'str'")
102 elif type(script) != str:
103 raise ValueError(f"Parameter script[]={type(script)} is not 'str'")
105 raise ValueError(f"Parameter 'domain' is empty")
107 if not instances.is_registered(domain):
108 # DEBUG: print("DEBUG: Adding new domain:", domain, origin)
109 instances.add(domain, origin, script, path)
111 # DEBUG: print("DEBUG: Fetching instances for domain:", domain, software)
112 peerlist = fetch_peers(domain, software)
114 if (peerlist is None):
115 print("ERROR: Cannot fetch peers:", domain)
117 elif instances.has_pending_instance_data(domain):
118 # DEBUG: print(f"DEBUG: domain='{domain}' has pending nodeinfo data, flushing ...")
119 instances.update_data(domain)
121 print(f"INFO: Checking {len(peerlist)} instances from {domain} ...")
122 for instance in peerlist:
124 # Skip "None" types as tidup() cannot parse them
127 # DEBUG: print(f"DEBUG: instance='{instance}' - BEFORE")
128 instance = tidyup_domain(instance)
129 # DEBUG: print(f"DEBUG: instance='{instance}' - AFTER")
132 print("WARNING: Empty instance after tidyup_domain(), domain:", domain)
134 elif not validators.domain(instance.split("/")[0]):
135 print(f"WARNING: Bad instance='{instance}' from domain='{domain}',origin='{origin}',software='{software}'")
137 elif blacklist.is_blacklisted(instance):
138 # DEBUG: print("DEBUG: instance is blacklisted:", instance)
141 # DEBUG: print("DEBUG: Handling instance:", instance)
143 if not instances.is_registered(instance):
144 # DEBUG: print("DEBUG: Adding new instance:", instance, domain)
145 instances.add(instance, domain, script)
146 except BaseException as e:
147 print(f"ERROR: instance='{instance}',exception[{type(e)}]:'{str(e)}'")
150 # DEBUG: print("DEBUG: EXIT!")
152 def add_peers(rows: dict) -> list:
153 # DEBUG: print(f"DEBUG: rows()={len(rows)} - CALLED!")
155 for key in ["linked", "allowed", "blocked"]:
156 # DEBUG: print(f"DEBUG: Checking key='{key}'")
157 if key in rows and rows[key] != None:
158 # DEBUG: print(f"DEBUG: Adding {len(rows[key])} peer(s) to peers list ...")
159 for peer in rows[key]:
160 # DEBUG: print(f"DEBUG: peer='{peer}' - BEFORE!")
161 peer = tidyup_domain(peer)
163 # DEBUG: print(f"DEBUG: peer='{peer}' - AFTER!")
164 if blacklist.is_blacklisted(peer):
165 # DEBUG: print(f"DEBUG: peer='{peer}' is blacklisted, skipped!")
168 # DEBUG: print(f"DEBUG: Adding peer='{peer}' ...")
171 # DEBUG: print(f"DEBUG: peers()={len(peers)} - EXIT!")
174 def remove_version(software: str) -> str:
175 # DEBUG: print(f"DEBUG: software='{software}' - CALLED!")
176 if not "." in software and " " not in software:
177 print(f"WARNING: software='{software}' does not contain a version number.")
182 temp = software.split(";")[0]
183 elif "," in software:
184 temp = software.split(",")[0]
185 elif " - " in software:
186 temp = software.split(" - ")[0]
188 # DEBUG: print(f"DEBUG: software='{software}'")
191 version = temp.split(" ")[-1]
192 elif "/" in software:
193 version = temp.split("/")[-1]
194 elif "-" in software:
195 version = temp.split("-")[-1]
197 # DEBUG: print(f"DEBUG: Was not able to find common seperator, returning untouched software='{software}'")
202 # DEBUG: print(f"DEBUG: Checking {len(patterns)} patterns ...")
203 for pattern in patterns:
205 match = pattern.match(version)
207 # DEBUG: print(f"DEBUG: match[]={type(match)}")
208 if type(match) is re.Match:
211 # DEBUG: print(f"DEBUG: version[{type(version)}]='{version}',match='{match}'")
212 if type(match) is not re.Match:
213 print(f"WARNING: version='{version}' does not match regex, leaving software='{software}' untouched.")
216 # DEBUG: print(f"DEBUG: Found valid version number: '{version}', removing it ...")
217 end = len(temp) - len(version) - 1
219 # DEBUG: print(f"DEBUG: end[{type(end)}]={end}")
220 software = temp[0:end].strip()
221 if " version" in software:
222 # DEBUG: print(f"DEBUG: software='{software}' contains word ' version'")
223 software = strip_until(software, " version")
225 # DEBUG: print(f"DEBUG: software='{software}' - EXIT!")
228 def strip_powered_by(software: str) -> str:
229 # DEBUG: print(f"DEBUG: software='{software}' - CALLED!")
231 print(f"ERROR: Bad method call, 'software' is empty")
232 raise Exception("Parameter 'software' is empty")
233 elif not "powered by" in software:
234 print(f"WARNING: Cannot find 'powered by' in '{software}'!")
237 start = software.find("powered by ")
238 # DEBUG: print(f"DEBUG: start[{type(start)}]='{start}'")
240 software = software[start + 11:].strip()
241 # DEBUG: print(f"DEBUG: software='{software}'")
243 software = strip_until(software, " - ")
245 # DEBUG: print(f"DEBUG: software='{software}' - EXIT!")
248 def strip_hosted_on(software: str) -> str:
249 # DEBUG: print(f"DEBUG: software='{software}' - CALLED!")
251 print(f"ERROR: Bad method call, 'software' is empty")
252 raise Exception("Parameter 'software' is empty")
253 elif not "hosted on" in software:
254 print(f"WARNING: Cannot find 'hosted on' in '{software}'!")
257 end = software.find("hosted on ")
258 # DEBUG: print(f"DEBUG: end[{type(end)}]='{end}'")
260 software = software[0, start].strip()
261 # DEBUG: print(f"DEBUG: software='{software}'")
263 software = strip_until(software, " - ")
265 # DEBUG: print(f"DEBUG: software='{software}' - EXIT!")
268 def strip_until(software: str, until: str) -> str:
269 # DEBUG: print(f"DEBUG: software='{software}',until='{until}' - CALLED!")
271 print(f"ERROR: Bad method call, 'software' is empty")
272 raise Exception("Parameter 'software' is empty")
274 print(f"ERROR: Bad method call, 'until' is empty")
275 raise Exception("Parameter 'until' is empty")
276 elif not until in software:
277 print(f"WARNING: Cannot find '{until}' in '{software}'!")
280 # Next, strip until part
281 end = software.find(until)
283 # DEBUG: print(f"DEBUG: end[{type(end)}]='{end}'")
285 software = software[0:end].strip()
287 # DEBUG: print(f"DEBUG: software='{software}' - EXIT!")
290 def remove_pending_error(domain: str):
291 if type(domain) != str:
292 raise ValueError(f"Parameter domain[]={type(domain)} is not 'str'")
294 raise ValueError(f"Parameter 'domain' is empty")
297 # Prevent updating any pending errors, nodeinfo was found
298 del pending_errors[domain]
303 # DEBUG: print("DEBUG: EXIT!")
305 def get_hash(domain: str) -> str:
306 if type(domain) != str:
307 raise ValueError(f"Parameter domain[]={type(domain)} is not 'str'")
309 raise ValueError(f"Parameter 'domain' is empty")
311 return hashlib.sha256(domain.encode("utf-8")).hexdigest()
313 def log_error(domain: str, response: requests.models.Response):
314 # DEBUG: print("DEBUG: domain,response[]:", domain, type(response))
315 if type(domain) != str:
316 raise ValueError(f"Parameter domain[]={type(domain)} is not 'str'")
318 raise ValueError(f"Parameter 'domain' is empty")
321 # DEBUG: print("DEBUG: BEFORE response[]:", type(response))
322 if isinstance(response, BaseException) or isinstance(response, json.decoder.JSONDecodeError):
323 response = str(response)
325 # DEBUG: print("DEBUG: AFTER response[]:", type(response))
326 if type(response) is str:
327 cursor.execute("INSERT INTO error_log (domain, error_code, error_message, created) VALUES (?, 999, ?, ?)",[
333 cursor.execute("INSERT INTO error_log (domain, error_code, error_message, created) VALUES (?, ?, ?, ?)",[
335 response.status_code,
340 # Cleanup old entries
341 # DEBUG: print(f"DEBUG: Purging old records (distance: {config.get('error_log_cleanup')})")
342 cursor.execute("DELETE FROM error_log WHERE created < ?", [time.time() - config.get("error_log_cleanup")])
343 except BaseException as e:
344 print(f"ERROR: failed SQL query: domain='{domain}',exception[{type(e)}]:'{str(e)}'")
347 # DEBUG: print("DEBUG: EXIT!")
349 def fetch_peers(domain: str, software: str) -> list:
350 # DEBUG: print(f"DEBUG: domain({len(domain)})={domain},software={software} - CALLED!")
351 if type(domain) != str:
352 raise ValueError(f"Parameter domain[]={type(domain)} is not 'str'")
354 raise ValueError(f"Parameter 'domain' is empty")
355 elif type(software) != str and software != None:
356 raise ValueError(f"software[]={type(software)} is not 'str'")
358 if software == "misskey":
359 # DEBUG: print(f"DEBUG: Invoking misskey.fetch_peers({domain}) ...")
360 return misskey.fetch_peers(domain)
361 elif software == "lemmy":
362 # DEBUG: print(f"DEBUG: Invoking lemmy.fetch_peers({domain}) ...")
363 return lemmy.fetch_peers(domain)
364 elif software == "peertube":
365 # DEBUG: print(f"DEBUG: Invoking peertube.fetch_peers({domain}) ...")
366 return peertube.fetch_peers(domain)
368 # DEBUG: print(f"DEBUG: Fetching peers from '{domain}',software='{software}' ...")
371 response = get_response(domain, "/api/v1/instance/peers", api_headers, (config.get("connection_timeout"), config.get("read_timeout")))
373 data = json_from_response(response)
375 # DEBUG: print(f"DEBUG: response.ok={response.ok},response.status_code={response.status_code},data[]='{type(data)}'")
376 if not response.ok or response.status_code >= 400:
377 # DEBUG: print(f"DEBUG: Was not able to fetch peers, trying alternative ...")
378 response = get_response(domain, "/api/v3/site", api_headers, (config.get("connection_timeout"), config.get("read_timeout")))
380 data = json_from_response(response)
381 # DEBUG: print(f"DEBUG: response.ok={response.ok},response.status_code={response.status_code},data[]='{type(data)}'")
382 if not response.ok or response.status_code >= 400:
383 print("WARNING: Could not reach any JSON API:", domain)
384 instances.update_last_error(domain, response)
385 elif response.ok and isinstance(data, list):
386 # DEBUG: print(f"DEBUG: domain='{domain}' returned a list: '{data}'")
388 elif "federated_instances" in data:
389 # DEBUG: print(f"DEBUG: Found federated_instances for domain='{domain}'")
390 peers = peers + add_peers(data["federated_instances"])
391 # DEBUG: print("DEBUG: Added instance(s) to peers")
393 print("WARNING: JSON response does not contain 'federated_instances':", domain)
394 instances.update_last_error(domain, response)
396 # DEBUG: print("DEBUG: Querying API was successful:", domain, len(data))
399 except BaseException as e:
400 print("WARNING: Some error during get():", domain, e)
401 instances.update_last_error(domain, e)
403 # DEBUG: print(f"DEBUG: Adding '{len(peers)}' for domain='{domain}'")
404 instances.set("total_peers", domain, len(peers))
406 # DEBUG: print(f"DEBUG: Updating last_instance_fetch for domain='{domain}' ...")
407 instances.update_last_instance_fetch(domain)
409 # DEBUG: print("DEBUG: Returning peers[]:", type(peers))
412 def post_json_api(domain: str, path: str, parameter: str, extra_headers: dict = {}) -> dict:
413 # DEBUG: print(f"DEBUG: domain='{domain}',path='{path}',parameter='{parameter}',extra_headers()={len(extra_headers)} - CALLED!")
414 if type(domain) != str:
415 raise ValueError(f"Parameter domain[]={type(domain)} is not 'str'")
417 raise ValueError(f"Parameter 'domain' is empty")
418 elif type(path) != str:
419 raise ValueError(f"path[]={type(path)} is not 'str'")
421 raise ValueError("Parameter 'path' cannot be empty")
422 elif type(parameter) != str:
423 raise ValueError(f"parameter[]={type(parameter)} is not 'str'")
425 # DEBUG: print("DEBUG: Sending POST to domain,path,parameter:", domain, path, parameter, extra_headers)
428 response = reqto.post(
429 f"https://{domain}{path}",
431 headers={**api_headers, **extra_headers},
432 timeout=(config.get("connection_timeout"), config.get("read_timeout"))
435 data = json_from_response(response)
436 # DEBUG: print(f"DEBUG: response.ok={response.ok},response.status_code={response.status_code},data[]='{type(data)}'")
437 if not response.ok or response.status_code >= 400:
438 print(f"WARNING: Cannot query JSON API: domain='{domain}',path='{path}',parameter()={len(parameter)},response.status_code='{response.status_code}',data[]='{type(data)}'")
439 instances.update_last_error(domain, response)
441 except BaseException as e:
442 print(f"WARNING: Some error during post(): domain='{domain}',path='{path}',parameter()={len(parameter)},exception[{type(e)}]:'{str(e)}'")
444 # DEBUG: print(f"DEBUG: Returning data({len(data)})=[]:{type(data)}")
447 def fetch_nodeinfo(domain: str, path: str = None) -> list:
448 # DEBUG: print(f"DEBUG: domain='{domain}',path={path} - CALLED!")
449 if type(domain) != str:
450 raise ValueError(f"Parameter domain[]={type(domain)} is not 'str'")
452 raise ValueError(f"Parameter 'domain' is empty")
453 elif type(path) != str and path != None:
454 raise ValueError(f"Parameter path[]={type(path)} is not 'str'")
456 # DEBUG: print(f"DEBUG: Fetching nodeinfo from domain='{domain}' ...")
457 nodeinfo = fetch_wellknown_nodeinfo(domain)
459 # DEBUG: print(f"DEBUG: nodeinfo({len(nodeinfo)})={nodeinfo}")
460 if len(nodeinfo) > 0:
461 # DEBUG: print("DEBUG: nodeinfo()={len(nodeinfo))} - EXIT!")
465 "/nodeinfo/2.1.json",
467 "/nodeinfo/2.0.json",
474 for request in request_paths:
475 if path != None and path != "" and path != f"https://{domain}{path}":
476 # DEBUG: print(f"DEBUG: path='{path}' does not match request='{request}' - SKIPPED!")
480 # DEBUG: print(f"DEBUG: Fetching request='{request}' from domain='{domain}' ...")
481 response = get_response(domain, request, api_headers, (config.get("nodeinfo_connection_timeout"), config.get("nodeinfo_read_timeout")))
483 data = json_from_response(response)
484 # DEBUG: print(f"DEBUG: response.ok={response.ok},response.status_code={response.status_code},data[]='{type(data)}'")
485 if response.ok and isinstance(data, dict):
486 # DEBUG: print("DEBUG: Success:", request)
487 instances.set("detection_mode", domain, "STATIC_CHECK")
488 instances.set("nodeinfo_url" , domain, request)
490 elif response.ok and isinstance(data, list):
491 print(f"UNSUPPORTED: domain='{domain}' returned a list: '{data}'")
493 elif not response.ok or response.status_code >= 400:
494 print("WARNING: Failed fetching nodeinfo from domain:", domain)
495 instances.update_last_error(domain, response)
498 except BaseException as e:
499 # DEBUG: print("DEBUG: Cannot fetch API request:", request)
500 instances.update_last_error(domain, e)
503 # DEBUG: print(f"DEBUG: data()={len(data)} - EXIT!")
506 def fetch_wellknown_nodeinfo(domain: str) -> list:
507 # DEBUG: print(f"DEBUG: domain='{domain}' - CALLED!")
508 if type(domain) != str:
509 raise ValueError(f"Parameter domain[]={type(domain)} is not 'str'")
511 raise ValueError(f"Parameter 'domain' is empty")
513 # DEBUG: print("DEBUG: Fetching .well-known info for domain:", domain)
517 response = get_response(domain, "/.well-known/nodeinfo", api_headers, (config.get("nodeinfo_connection_timeout"), config.get("nodeinfo_read_timeout")))
519 data = json_from_response(response)
520 # DEBUG: print("DEBUG: domain,response.ok,data[]:", domain, response.ok, type(data))
521 if response.ok and isinstance(data, dict):
523 # DEBUG: print("DEBUG: Found entries:", len(nodeinfo), domain)
524 if "links" in nodeinfo:
525 # DEBUG: print("DEBUG: Found links in nodeinfo():", len(nodeinfo["links"]))
526 for link in nodeinfo["links"]:
527 # DEBUG: print("DEBUG: rel,href:", link["rel"], link["href"])
528 if link["rel"] in nodeinfo_identifier:
529 # DEBUG: print("DEBUG: Fetching nodeinfo from:", link["href"])
530 response = fetch_url(link["href"], api_headers, (config.get("connection_timeout"), config.get("read_timeout")))
532 data = json_from_response(response)
533 # DEBUG: print("DEBUG: href,response.ok,response.status_code:", link["href"], response.ok, response.status_code)
534 if response.ok and isinstance(data, dict):
535 # DEBUG: print("DEBUG: Found JSON nodeinfo():", len(data))
536 instances.set("detection_mode", domain, "AUTO_DISCOVERY")
537 instances.set("nodeinfo_url" , domain, link["href"])
540 print("WARNING: Unknown 'rel' value:", domain, link["rel"])
542 print("WARNING: nodeinfo does not contain 'links':", domain)
544 except BaseException as e:
545 print("WARNING: Failed fetching .well-known info:", domain)
546 instances.update_last_error(domain, e)
549 # DEBUG: print("DEBUG: Returning data[]:", type(data))
552 def fetch_generator_from_path(domain: str, path: str = "/") -> str:
553 # DEBUG: print(f"DEBUG: domain({len(domain)})={domain},path={path} - CALLED!")
554 if type(domain) != str:
555 raise ValueError(f"Parameter domain[]={type(domain)} is not 'str'")
557 raise ValueError(f"Parameter 'domain' is empty")
558 elif type(path) != str:
559 raise ValueError(f"path[]={type(path)} is not 'str'")
561 raise ValueError(f"Parameter 'domain' is empty")
563 # DEBUG: print(f"DEBUG: domain='{domain}',path='{path}' - CALLED!")
567 # DEBUG: print(f"DEBUG: Fetching path='{path}' from '{domain}' ...")
568 response = get_response(domain, path, headers, (config.get("connection_timeout"), config.get("read_timeout")))
570 # DEBUG: print("DEBUG: domain,response.ok,response.status_code,response.text[]:", domain, response.ok, response.status_code, type(response.text))
571 if response.ok and response.status_code < 300 and len(response.text) > 0:
572 # DEBUG: print("DEBUG: Search for <meta name='generator'>:", domain)
573 doc = bs4.BeautifulSoup(response.text, "html.parser")
575 # DEBUG: print("DEBUG: doc[]:", type(doc))
576 generator = doc.find("meta", {"name": "generator"})
577 site_name = doc.find("meta", {"property": "og:site_name"})
579 # DEBUG: print(f"DEBUG: generator='{generator}',site_name='{site_name}'")
580 if isinstance(generator, bs4.element.Tag):
581 # DEBUG: print("DEBUG: Found generator meta tag:", domain)
582 software = tidyup_domain(generator.get("content"))
583 print(f"INFO: domain='{domain}' is generated by '{software}'")
584 instances.set("detection_mode", domain, "GENERATOR")
585 remove_pending_error(domain)
586 elif isinstance(site_name, bs4.element.Tag):
587 # DEBUG: print("DEBUG: Found property=og:site_name:", domain)
588 sofware = tidyup_domain(site_name.get("content"))
589 print(f"INFO: domain='{domain}' has og:site_name='{software}'")
590 instances.set("detection_mode", domain, "SITE_NAME")
591 remove_pending_error(domain)
593 except BaseException as e:
594 # DEBUG: print(f"DEBUG: Cannot fetch / from '{domain}':", e)
595 instances.update_last_error(domain, e)
598 # DEBUG: print(f"DEBUG: software[]={type(software)}")
599 if type(software) is str and software == "":
600 # DEBUG: print(f"DEBUG: Corrected empty string to None for software of domain='{domain}'")
602 elif type(software) is str and ("." in software or " " in software):
603 # DEBUG: print(f"DEBUG: software='{software}' may contain a version number, domain='{domain}', removing it ...")
604 software = remove_version(software)
606 # DEBUG: print(f"DEBUG: software[]={type(software)}")
607 if type(software) is str and " powered by " in software:
608 # DEBUG: print(f"DEBUG: software='{software}' has 'powered by' in it")
609 software = remove_version(strip_powered_by(software))
610 elif type(software) is str and " hosted on " in software:
611 # DEBUG: print(f"DEBUG: software='{software}' has 'hosted on' in it")
612 software = remove_version(strip_hosted_on(software))
613 elif type(software) is str and " by " in software:
614 # DEBUG: print(f"DEBUG: software='{software}' has ' by ' in it")
615 software = strip_until(software, " by ")
616 elif type(software) is str and " see " in software:
617 # DEBUG: print(f"DEBUG: software='{software}' has ' see ' in it")
618 software = strip_until(software, " see ")
620 # DEBUG: print(f"DEBUG: software='{software}' - EXIT!")
623 def determine_software(domain: str, path: str = None) -> str:
624 # DEBUG: print(f"DEBUG: domain({len(domain)})={domain},path={path} - CALLED!")
625 if type(domain) != str:
626 raise ValueError(f"Parameter domain[]={type(domain)} is not 'str'")
628 raise ValueError(f"Parameter 'domain' is empty")
629 elif type(path) != str and path != None:
630 raise ValueError(f"Parameter path[]={type(path)} is not 'str'")
632 # DEBUG: print("DEBUG: Determining software for domain,path:", domain, path)
635 # DEBUG: print(f"DEBUG: Fetching nodeinfo from '{domain}' ...")
636 data = fetch_nodeinfo(domain, path)
638 # DEBUG: print("DEBUG: data[]:", type(data))
639 if not isinstance(data, dict) or len(data) == 0:
640 # DEBUG: print("DEBUG: Could not determine software type:", domain)
641 return fetch_generator_from_path(domain)
643 # DEBUG: print("DEBUG: data():", len(data), data)
644 if "status" in data and data["status"] == "error" and "message" in data:
645 print("WARNING: JSON response is an error:", data["message"])
646 instances.update_last_error(domain, data["message"])
647 return fetch_generator_from_path(domain)
648 elif "message" in data:
649 print("WARNING: JSON response contains only a message:", data["message"])
650 instances.update_last_error(domain, data["message"])
651 return fetch_generator_from_path(domain)
652 elif "software" not in data or "name" not in data["software"]:
653 # DEBUG: print(f"DEBUG: JSON response from domain='{domain}' does not include [software][name], fetching / ...")
654 software = fetch_generator_from_path(domain)
656 # DEBUG: print(f"DEBUG: Generator for domain='{domain}' is: {software}, EXIT!")
659 software = tidyup_domain(data["software"]["name"])
661 # DEBUG: print("DEBUG: sofware after tidyup_domain():", software)
662 if software in ["akkoma", "rebased"]:
663 # DEBUG: print("DEBUG: Setting pleroma:", domain, software)
665 elif software in ["hometown", "ecko"]:
666 # DEBUG: print("DEBUG: Setting mastodon:", domain, software)
667 software = "mastodon"
668 elif software in ["calckey", "groundpolis", "foundkey", "cherrypick", "meisskey"]:
669 # DEBUG: print("DEBUG: Setting misskey:", domain, software)
671 elif software.find("/") > 0:
672 print("WARNING: Spliting of slash:", software)
673 software = tidup_domain(software.split("/")[-1]);
674 elif software.find("|") > 0:
675 print("WARNING: Spliting of pipe:", software)
676 software = tidyup_domain(software.split("|")[0]);
677 elif "powered by" in software:
678 # DEBUG: print(f"DEBUG: software='{software}' has 'powered by' in it")
679 software = strip_powered_by(software)
680 elif type(software) is str and " by " in software:
681 # DEBUG: print(f"DEBUG: software='{software}' has ' by ' in it")
682 software = strip_until(software, " by ")
683 elif type(software) is str and " see " in software:
684 # DEBUG: print(f"DEBUG: software='{software}' has ' see ' in it")
685 software = strip_until(software, " see ")
687 # DEBUG: print(f"DEBUG: software[]={type(software)}")
689 print("WARNING: tidyup_domain() left no software name behind:", domain)
692 # DEBUG: print(f"DEBUG: software[]={type(software)}")
693 if str(software) == "":
694 # DEBUG: print(f"DEBUG: software for '{domain}' was not detected, trying generator ...")
695 software = fetch_generator_from_path(domain)
696 elif len(str(software)) > 0 and ("." in software or " " in software):
697 # DEBUG: print(f"DEBUG: software='{software}' may contain a version number, domain='{domain}', removing it ...")
698 software = remove_version(software)
700 # DEBUG: print(f"DEBUG: software[]={type(software)}")
701 if type(software) is str and "powered by" in software:
702 # DEBUG: print(f"DEBUG: software='{software}' has 'powered by' in it")
703 software = remove_version(strip_powered_by(software))
705 # DEBUG: print("DEBUG: Returning domain,software:", domain, software)
708 def send_bot_post(instance: str, blocklist: dict):
709 # DEBUG: print(f"DEBUG: instance={instance},blocklist()={len(blocklist)} - CALLED!")
710 if type(domain) != str:
711 raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
713 raise ValueError("Parameter 'domain' is empty")
714 elif type(blocklist) != dict:
715 raise ValueError(f"Parameter blocklist[]='{type(blocklist)}' is not 'dict'")
717 message = instance + " has blocked the following instances:\n\n"
720 if len(blocklist) > 20:
722 blocklist = blocklist[0 : 19]
724 # DEBUG: print(f"DEBUG: blocklist()={len(blocklist)}")
725 for block in blocklist:
726 # DEBUG: print(f"DEBUG: block['{type(block)}']={block}")
727 if block["reason"] == None or block["reason"] == '':
728 message = message + block["blocked"] + " with unspecified reason\n"
730 if len(block["reason"]) > 420:
731 block["reason"] = block["reason"][0:419] + "[…]"
733 message = message + block["blocked"] + ' for "' + block["reason"].replace("@", "@\u200b") + '"\n'
736 message = message + "(the list has been truncated to the first 20 entries)"
738 botheaders = {**api_headers, **{"Authorization": "Bearer " + config.get("bot_token")}}
741 f"{config.get('bot_instance')}/api/v1/statuses",
744 "visibility" : config.get('bot_visibility'),
745 "content_type": "text/plain"
753 def fetch_friendica_blocks(domain: str) -> dict:
754 # DEBUG: print(f"DEBUG: domain='{domain}' - CALLED!")
755 if type(domain) != str:
756 raise ValueError(f"Parameter domain[]={type(domain)} is not 'str'")
758 raise ValueError(f"Parameter 'domain' is empty")
760 # DEBUG: print("DEBUG: Fetching friendica blocks from domain:", domain)
764 doc = bs4.BeautifulSoup(
765 get_response(domain, "/friendica", headers, (config.get("connection_timeout"), config.get("read_timeout"))).text,
768 except BaseException as e:
769 print("WARNING: Failed to fetch /friendica from domain:", domain, e)
770 instances.update_last_error(domain, e)
773 blocklist = doc.find(id="about_blocklist")
775 # Prevents exceptions:
776 if blocklist is None:
777 # DEBUG: print("DEBUG: Instance has no block list:", domain)
780 table = blocklist.find("table")
782 # DEBUG: print(f"DEBUG: table[]='{type(table)}'")
783 if table.find("tbody"):
784 rows = table.find("tbody").find_all("tr")
786 rows = table.find_all("tr")
788 # DEBUG: print(f"DEBUG: Found rows()={len(rows)}")
790 # DEBUG: print(f"DEBUG: line='{line}'")
792 "domain": tidyup_domain(line.find_all("td")[0].text),
793 "reason": tidyup_reason(line.find_all("td")[1].text)
795 # DEBUG: print("DEBUG: Next!")
797 # DEBUG: print("DEBUG: Returning blocklist() for domain:", domain, len(blocklist))
802 def tidyup_reason(reason: str) -> str:
803 # DEBUG: print(f"DEBUG: reason='{reason}' - CALLED!")
804 if type(reason) != str:
805 raise ValueError(f"Parameter reason[]={type(reason)} is not 'str'")
808 reason = reason.strip()
811 reason = re.sub("â", "\"", reason)
813 # DEBUG: print(f"DEBUG: reason='{reason}' - EXIT!")
816 def tidyup_domain(domain: str) -> str:
817 # DEBUG: print(f"DEBUG: domain='{domain}' - CALLED!")
818 if type(domain) != str:
819 raise ValueError(f"Parameter domain[]={type(domain)} is not 'str'")
821 # All lower-case and strip spaces out + last dot
822 domain = domain.lower().strip().rstrip(".")
825 domain = re.sub("\:\d+$", "", domain)
827 # No protocol, sometimes without the slashes
828 domain = re.sub("^https?\:(\/*)", "", domain)
831 domain = re.sub("\/$", "", domain)
834 domain = re.sub("^\@", "", domain)
836 # No individual users in block lists
837 domain = re.sub("(.+)\@", "", domain)
838 if domain.find("/profile/"):
839 domain = domain.split("/profile/")[0]
840 elif domain.find("/users/"):
841 domain = domain.split("/users/")[0]
843 # DEBUG: print(f"DEBUG: domain='{domain}' - EXIT!")
846 def json_from_response(response: requests.models.Response) -> list:
847 # DEBUG: print(f"DEBUG: response[]={type(response)} - CALLED!")
848 if not isinstance(response, requests.models.Response):
849 raise ValueError(f"Parameter response[]='{type(response)}' is not type of 'Response'")
852 if response.text.strip() != "":
853 # DEBUG: print(f"DEBUG: response.text()={len(response.text)} is not empty, invoking response.json() ...")
855 data = response.json()
856 except json.decoder.JSONDecodeError:
859 # DEBUG: print(f"DEBUG: data[]={type(data)} - EXIT!")
862 def get_response(domain: str, path: str, headers: dict, timeout: list) -> requests.models.Response:
863 # DEBUG: print(f"DEBUG: domain='{domain}',path='{path}',headers()={len(headers)},timeout={timeout} - CALLED!")
864 if type(domain) != str:
865 raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
867 raise ValueError("Parameter 'domain' is empty")
868 elif type(path) != str:
869 raise ValueError(f"Parameter path[]='{type(path)}' is not 'str'")
871 raise ValueError("Parameter 'path' is empty")
874 # DEBUG: print(f"DEBUG: Sending request to '{domain}{path}' ...")
875 response = reqto.get(
876 f"https://{domain}{path}",
880 except requests.exceptions.ConnectionError as e:
881 # DEBUG: print(f"DEBUG: Fetching '{path}' from '{domain}' failed. exception[{type(e)}]='{str(e)}'")
882 instances.update_last_error(domain, e)
885 # DEBUG: print(f"DEBUG: response[]='{type(response)}' - EXXIT!")
888 def has_key(keys: list, search: str, value: any) -> bool:
889 # DEBUG: print(f"DEBUG: keys()={len(keys)},search='{search}',value[]='{type(value)}' - CALLED!")
890 if type(keys) != list:
891 raise ValueError(f"Parameter keys[]='{type(keys)}' is not 'list'")
892 elif type(search) != str:
893 raise ValueError(f"Parameter search[]='{type(search)}' is not 'str'")
895 raise ValueError("Parameter 'search' is empty")
898 # DEBUG: print(f"DEBUG: Checking keys()={len(keys)} ...")
900 # DEBUG: print(f"DEBUG: key['{type(key)}']={key}")
901 if type(key) != dict:
902 raise ValueError(f"key[]='{type(key)}' is not 'dict'")
903 elif not search in key:
904 raise KeyError(f"Cannot find search='{search}'")
905 elif key[search] == value:
909 # DEBUG: print(f"DEBUG: has={has} - EXIT!")
912 def find_domains(tag: bs4.element.Tag) -> list:
913 # DEBUG: print(f"DEBUG: tag[]={type(tag)} - CALLED!")
914 if not isinstance(tag, bs4.element.Tag):
915 raise ValueError(f"Parameter tag[]={type(tag)} is not type of bs4.element.Tag")
916 elif not isinstance(tag, bs4.element.Tag):
917 raise KeyError("Cannot find table with instances!")
918 elif len(tag.select("tr")) == 0:
919 raise KeyError("No table rows found in table!")
922 for element in tag.select("tr"):
923 # DEBUG: print(f"DEBUG: element[]={type(element)}")
924 if not element.find("td"):
925 # DEBUG: print("DEBUG: Skipping element, no <td> found")
928 domain = tidyup_domain(element.find("td").text)
929 reason = tidyup_reason(element.findAll("td")[1].text)
931 # DEBUG: print(f"DEBUG: domain='{domain}',reason='{reason}'")
933 if blacklist.is_blacklisted(domain):
934 print(f"WARNING: domain='{domain}' is blacklisted - skipped!")
936 elif domain == "gab.com/.ai, develop.gab.com":
937 # DEBUG: print(f"DEBUG: Multiple domains detected in one row")
947 "domain": "develop.gab.com",
951 elif not validators.domain(domain):
952 print(f"WARNING: domain='{domain}' is not a valid domain - skipped!")
955 # DEBUG: print(f"DEBUG: Adding domain='{domain}' ...")
961 # DEBUG: print(f"DEBUG: domains()={len(domains)} - EXIT!")
964 def fetch_url(url: str, headers: dict, timeout: list) -> requests.models.Response:
965 # DEBUG: print(f"DEBUG: url='{url}',headers()={len(headers)},timeout={timeout} - CALLED!")
967 raise ValueError(f"Parameter url[]='{type(url)}' is not 'str'")
969 raise ValueError("Parameter 'url' is empty")
971 # DEBUG: print(f"DEBUG: Parsing url='{url}'")
972 components = urlparse(url)
974 # Invoke other function, avoid trailing ?
975 # DEBUG: print(f"DEBUG: components[{type(components)}]={components}")
976 if components.query != "":
977 response = get_response(components.hostname, f"{components.path}?{components.query}", headers, timeout)
979 response = get_response(components.hostname, f"{components.path}", headers, timeout)
981 # DEBUG: print(f"DEBUG: response[]='{type(response)}' - EXXIT!")