return blacklisted
+def remove_pending_error(domain: str):
+ try:
+ # Prevent updating any pending errors, nodeinfo was found
+ del pending_errors[domain]
+
+ except:
+ pass
+
def get_hash(domain: str) -> str:
return hashlib.sha256(domain.encode("utf-8")).hexdigest()
try:
res = reqto.get(f"https://{domain}{get_peers_url}", headers=headers, timeout=(config["connection_timeout"], config["read_timeout"]))
+ # NOISY-DEBUG: print("DEBUG: res.ok,res.json[]:", res.ok, type(res.json()))
if not res.ok or res.status_code >= 400:
res = reqto.get(f"https://{domain}/api/v3/site", headers=headers, timeout=(config["connection_timeout"], config["read_timeout"]))
print("WARNING: Could not reach any JSON API:", domain)
update_last_error(domain, res)
else:
- # NOISY-DEBUG: print("DEBUG: Querying API was successful:", domain, len(res.json()))
+ # NOISY-DEBUG: print("DEBUG:Querying API was successful:", domain, len(res.json()))
peers = res.json()
nodeinfos["get_peers_url"][domain] = get_peers_url
try:
res = reqto.post(f"https://{domain}{path}", data=data, headers=headers, timeout=(config["connection_timeout"], config["read_timeout"]))
+ # NOISY-DEBUG: print("DEBUG: res.ok,res.json[]:", res.ok, type(res.json()))
if not res.ok or res.status_code >= 400:
print("WARNING: Cannot query JSON API:", domain, path, data, res.status_code)
update_last_error(domain, res)
# NOISY-DEBUG: print("DEBUG: Fetching nodeinfo from domain:", domain)
nodeinfo = fetch_wellknown_nodeinfo(domain)
- # NOISY-DEBUG: print("DEBUG: nodeinfo:", len(nodeinfo))
+ # NOISY-DEBUG: print("DEBUG:nodeinfo:", len(nodeinfo))
if len(nodeinfo) > 0:
# NOISY-DEBUG: print("DEBUG: Returning auto-discovered nodeinfo:", len(nodeinfo))
update_last_error(domain, e)
pass
- # NOISY-DEBUG: print("DEBUG: json[]:", type(json))
- if not isinstance(json, dict) or len(json) == 0:
- print("WARNING: Failed fetching nodeinfo from domain:", domain)
-
# NOISY-DEBUG: print("DEBUG: Returning json[]:", type(json))
return json
# NOISY-DEBUG: print("DEBUG: Returning json[]:", type(json))
return json
+def fetch_generator_from_path(domain: str, path: str = "/") -> str:
+ # NOISY-DEBUG: print(f"DEBUG: domain='{domain}',path='{path}' - CALLED!")
+ software = None
+
+ try:
+ # NOISY-DEBUG: print(f"DEBUG: Fetching '{path}' from '{domain}' ...")
+ res = reqto.get(f"https://{domain}{path}", headers=headers, timeout=(config["connection_timeout"], config["read_timeout"]))
+
+ # NOISY-DEBUG: print("DEBUG: domain,res.ok,res.status_code:", domain, res.ok, res.status_code)
+ if res.ok and res.status_code < 300 and len(res.text) > 0:
+ # NOISY-DEBUG: print("DEBUG: Search for <meta name='generator'>:", domain)
+ doc = bs4.BeautifulSoup(res.text, "html.parser")
+
+ # NOISY-DEBUG: print("DEBUG: doc[]:", type(doc))
+ tag = doc.find("meta", {"name": "generator"})
+
+ # NOISY-DEBUG: print(f"DEBUG: tag[{type(tag)}: {tag}")
+ if isinstance(tag, bs4.element.Tag):
+ # NOISY-DEBUG: print("DEBUG: Found generator meta tag: ", domain)
+ software = tidyup(tag.get("content"))
+ # NOISY-DEBUG: print(f"DEBUG: software='{software}'")
+ remove_pending_error(domain)
+
+ except BaseException as e:
+ print(f"WARNING: Cannot fetch / from '{domain}':", e)
+ update_last_error(domain, e)
+ pass
+
+ # NOISY-DEBUG: print(f"DEBUG: software='{software}' - EXIT!")
+ return software
+
def determine_software(domain: str) -> str:
# NOISY-DEBUG: print("DEBUG: Determining software for domain:", domain)
software = None
+ # NOISY-DEBUG: print(f"DEBUG: Fetching nodeinfo from '{domain}' ...")
json = fetch_nodeinfo(domain)
- # NOISY-DEBUG: print("DEBUG: json[]:", type(json))
+ # NOISY-DEBUG: print("DEBUG: json[]:", type(json))
if not isinstance(json, dict) or len(json) == 0:
# NOISY-DEBUG: print("DEBUG: Could not determine software type:", domain)
- return None
+ return fetch_generator_from_path(domain)
# NOISY-DEBUG: print("DEBUG: json():", len(json), json)
if "status" in json and json["status"] == "error" and "message" in json:
print("WARNING: JSON response is an error:", json["message"])
update_last_error(domain, json["message"])
- return None
+ return fetch_generator_from_path(domain)
elif "software" not in json or "name" not in json["software"]:
# NOISY-DEBUG: print(f"DEBUG: JSON response from {domain} does not include [software][name], fetching / ...")
- try:
- res = reqto.get(f"https://{domain}/", headers=headers, timeout=(config["connection_timeout"], config["read_timeout"]))
-
- # NOISY-DEBUG: print("DEBUG: domain,res.ok,res.status_code:", domain, res.ok, res.status_code)
- if res.ok and res.status_code < 300 and len(res.text) > 0:
- # NOISY-DEBUG: print("DEBUG: Search for <meta name='generator'>:", domain)
-
- doc = bs4.BeautifulSoup(res.text, "html.parser")
- # NOISY-DEBUG: print("DEBUG: doc[]:", type(doc))
-
- tag = doc.find("meta", {"name": "generator"})
- # NOISY-DEBUG: print(f"DEBUG: tag[{type(tag)}: {tag}")
- if isinstance(tag, bs4.element.Tag):
- # NOISY-DEBUG: print("DEBUG: Found generator meta tag:", domain)
- software = tidyup(tag.get("content"))
-
- except BaseException as e:
- print(f"WARNING: Cannot fetch / from '{domain}':", e)
- update_last_error(domain, e)
- pass
+ software = fetch_generator_from_path(domain)
# NOISY-DEBUG: print(f"DEBUG: Generator for domain='{domain}' is: {software}, EXIT!")
return software
software = tidyup(json["software"]["name"])
- # NOISY-DEBUG: print("DEBUG: tidyup software:", software)
+ # NOISY-DEBUG: print("DEBUG: sofware after tidyup():", software)
if software in ["akkoma", "rebased"]:
# NOISY-DEBUG: print("DEBUG: Setting pleroma:", domain, software)
software = "pleroma"
# NOISY-DEBUG: print("DEBUG: Setting misskey:", domain, software)
software = "misskey"
elif software.find("/") > 0:
- print("WARNING: Spliting of path:", software)
+ print("WARNING: Spliting of slash:", software)
software = software.split("/")[-1];
elif software.find("|") > 0:
- print("WARNING: Spliting of path:", software)
- software = software.split("|")[0].strip();
+ print("WARNING: Spliting of pipe:", software)
+ software = tidyup(software.split("|")[0]);
+ # NOISY-DEBUG: print(f"DEBUG: software[]={type(software)}")
if software == "":
print("WARNING: tidyup() left no software name behind:", domain)
software = None
+ # NOISY-DEBUG: print(f"DEBUG: software[]={type(software)}")
+ if str(software) == "":
+ # NOISY-DEBUG: print(f"DEBUG: software for '{domain}' was not detected, trying generator ...")
+ software = fetch_generator_from_path(domain)
+
# NOISY-DEBUG: print("DEBUG: Returning domain,software:", domain, software)
return software
),
)
+ # NOISY-DEBUG: print(f"DEBUG: cursor.rowcount={cursor.rowcount}")
if cursor.rowcount == 0:
print("WARNING: Did not update any rows:", domain)
print("ERROR: failed SQL query:", reason, blocker, blocked, block_level, e)
sys.exit(255)
+ # NOISY-DEBUG: print("DEBUG: EXIT!")
+
def update_last_seen(blocker: str, blocked: str, block_level: str):
# NOISY-DEBUG: print("DEBUG: Updating last_seen for:", blocker, blocked, block_level)
try:
if domain in nodeinfos["nodeinfo_url"]:
# NOISY-DEBUG # NOISY-DEBUG: print("DEBUG: domain has pending nodeinfo being updated:", domain)
update_nodeinfos(domain)
- try:
- # Prevent updating any pending errors, nodeinfo was found
- del pending_errors[domain]
- except:
- pass
+ remove_pending_error(domain)
elif domain in pending_errors:
# NOISY-DEBUG: print("DEBUG: domain has pending error being updated:", domain)
update_last_error(domain, pending_errors[domain])
- del pending_errors[domain]
+ remove_pending_error(domain)
except BaseException as e:
print("ERROR: failed SQL query:", domain, e)
# Prevents exceptions:
if blocklist is None:
- # NOISY-DEBUG: print("DEBUG: Instance has no block list:", domain)
+ # NOISY-DEBUG: print("DEBUG:Instance has no block list:", domain)
return {}
for line in blocklist.find("table").find_all("tr")[1:]: