return key in cache
def set_all_cache_key(key: str, rows: list, value: any):
- # DEBUG: print(f"DEBUG: key='{key}',rows()={len(rows)},value[]={type(value)} - CALLED!")
+ # NOISY-DEBUG: print(f"DEBUG: key='{key}',rows()={len(rows)},value[]={type(value)} - CALLED!")
if not is_cache_initialized(key):
- # DEBUG: print(f"DEBUG: Cache for key='{key}' not initialized.")
+ # NOISY-DEBUG: print(f"DEBUG: Cache for key='{key}' not initialized.")
cache[key] = {}
for sub in rows:
- # DEBUG: print(f"DEBUG: Setting key='{key}',sub[{type(sub)}]='{sub}'")
+ # NOISY-DEBUG: print(f"DEBUG: Setting key='{key}',sub[{type(sub)}]='{sub}'")
if isinstance(sub, tuple):
cache[key][sub[0]] = value
else:
print(f"WARNING: Unsupported type row[]='{type(row)}'")
- # DEBUG: print("DEBUG: EXIT!")
+ # NOISY-DEBUG: print("DEBUG: EXIT!")
def set_cache_key(key: str, sub: str, value: any):
if not is_cache_initialized(key):
# DEBUG: print(f"DEBUG: software='{software}' - EXIT!")
return software
+def strip_hosted_on(software: str) -> str:
+ # DEBUG: print(f"DEBUG: software='{software}' - CALLED!")
+ if software == "":
+ print(f"ERROR: Bad method call, 'software' is empty")
+ raise Exception("Parameter 'software' is empty")
+ elif not "hosted on" in software:
+ print(f"WARNING: Cannot find 'hosted on' in '{software}'!")
+ return software
+
+ end = software.find("hosted on ")
+ # DEBUG: print(f"DEBUG: end[{type(end)}]='{end}'")
+
+ software = software[0, start].strip()
+ # DEBUG: print(f"DEBUG: software='{software}'")
+
+ software = strip_until(software, " - ")
+
+ # DEBUG: print(f"DEBUG: software='{software}' - EXIT!")
+ return software
+
def strip_until(software: str, until: str) -> str:
# DEBUG: print(f"DEBUG: software='{software}',until='{until}' - CALLED!")
if software == "":
print(f"ERROR: Bad method call, 'until' is empty")
raise Exception("Parameter 'until' is empty")
elif not until in software:
- print(f"WARNING: Cannot find 'powered by' in '{software}'!")
+ print(f"WARNING: Cannot find '{until}' in '{software}'!")
return software
# Next, strip until part
])
# Cleanup old entries
- # DEBUG: print(f"DEBUG: Purging old records (distance: {config['error_log_cleanup'])")
+ # DEBUG: print(f"DEBUG: Purging old records (distance: {config['error_log_cleanup']})")
cursor.execute("DELETE FROM error_log WHERE created < ?", [time.time() - config["error_log_cleanup"]])
except BaseException as e:
print(f"ERROR: failed SQL query: domain='{domain}',exception[{type(e)}]:'{str(e)}'")
# DEBUG: print("DEBUG: EXIT!")
def update_last_instance_fetch(domain: str):
- #print("DEBUG: Updating last_instance_fetch for domain:", domain)
+ # DEBUG: print("DEBUG: Updating last_instance_fetch for domain:", domain)
try:
cursor.execute("UPDATE instances SET last_instance_fetch = ?, last_updated = ? WHERE domain = ? LIMIT 1", [
time.time(),
sys.exit(255)
connection.commit()
- #print("DEBUG: EXIT!")
+ # DEBUG: print("DEBUG: EXIT!")
def update_last_nodeinfo(domain: str):
# DEBUG: print("DEBUG: Updating last_nodeinfo for domain:", domain)
# DEBUG: print(f"DEBUG: Adding peer: '{row['host']}'")
peers.append(row["host"])
- #print(f"DEBUG: Updating last_instance_fetch for domain='{domain}' ...")
+ # DEBUG: print(f"DEBUG: Updating last_instance_fetch for domain='{domain}' ...")
update_last_instance_fetch(domain)
# DEBUG: print("DEBUG: Returning peers[]:", type(peers))
except BaseException as e:
print(f"WARNING: Exception during fetching JSON: domain='{domain}',exception[{type(e)}]:'{str(e)}'")
- #print(f"DEBUG: Updating last_instance_fetch for domain='{domain}' ...")
+ # DEBUG: print(f"DEBUG: Updating last_instance_fetch for domain='{domain}' ...")
update_last_instance_fetch(domain)
# DEBUG: print("DEBUG: Returning peers[]:", type(peers))
except BaseException as e:
print(f"WARNING: Exception during fetching JSON: domain='{domain}',exception[{type(e)}]:'{str(e)}'")
- #print(f"DEBUG: Updating last_instance_fetch for domain='{domain}' ...")
+ # DEBUG: print(f"DEBUG: Updating last_instance_fetch for domain='{domain}' ...")
update_last_instance_fetch(domain)
# DEBUG: print("DEBUG: Returning peers[]:", type(peers))
print("WARNING: Could not reach any JSON API:", domain)
update_last_error(domain, res)
elif res.ok and isinstance(data, list):
- print(f"DEBUG: domain='{domain}' returned a list: '{data}'")
+ # DEBUG: print(f"DEBUG: domain='{domain}' returned a list: '{data}'")
sys.exit(255)
elif "federated_instances" in data:
# DEBUG: print(f"DEBUG: Found federated_instances for domain='{domain}'")
print("WARNING: Some error during get():", domain, e)
update_last_error(domain, e)
- #print(f"DEBUG: Updating last_instance_fetch for domain='{domain}' ...")
+ # DEBUG: print(f"DEBUG: Updating last_instance_fetch for domain='{domain}' ...")
update_last_instance_fetch(domain)
# DEBUG: print("DEBUG: Returning peers[]:", type(peers))
# DEBUG: print("DEBUG: Fetching nodeinfo from domain,path:", domain, path)
nodeinfo = fetch_wellknown_nodeinfo(domain)
- # DEBUG: print("DEBUG: nodeinfo:", len(nodeinfo))
+ # DEBUG: print("DEBUG: nodeinfo:", nodeinfo)
if len(nodeinfo) > 0:
# DEBUG: print("DEBUG: Returning auto-discovered nodeinfo:", len(nodeinfo))
data = {}
for request in requests:
if path != None and path != "" and request != path:
- print(f"DEBUG: path='{path}' does not match request='{request}' - SKIPPED!")
+ # DEBUG: print(f"DEBUG: path='{path}' does not match request='{request}' - SKIPPED!")
continue
try:
doc = bs4.BeautifulSoup(res.text, "html.parser")
# DEBUG: print("DEBUG: doc[]:", type(doc))
- tag = doc.find("meta", {"name": "generator"})
+ generator = doc.find("meta", {"name": "generator"})
+ site_name = doc.find("meta", {"property": "og:site_name"})
- # DEBUG: print(f"DEBUG: tag[{type(tag)}: {tag}")
- if isinstance(tag, bs4.element.Tag):
- # DEBUG: print("DEBUG: Found generator meta tag: ", domain)
- software = tidyup(tag.get("content"))
+ # DEBUG: print(f"DEBUG: generator='{generator}',site_name='{site_name}'")
+ if isinstance(generator, bs4.element.Tag):
+ # DEBUG: print("DEBUG: Found generator meta tag:", domain)
+ software = tidyup(generator.get("content"))
print(f"INFO: domain='{domain}' is generated by '{software}'")
nodeinfos["detection_mode"][domain] = "GENERATOR"
remove_pending_error(domain)
+ elif isinstance(site_name, bs4.element.Tag):
+ # DEBUG: print("DEBUG: Found property=og:site_name:", domain)
+ sofware = tidyup(site_name.get("content"))
+ print(f"INFO: domain='{domain}' has og:site_name='{software}'")
+ nodeinfos["detection_mode"][domain] = "SITE_NAME"
+ remove_pending_error(domain)
except BaseException as e:
# DEBUG: print(f"DEBUG: Cannot fetch / from '{domain}':", e)
software = remove_version(software)
# DEBUG: print(f"DEBUG: software[]={type(software)}")
- if type(software) is str and "powered by" in software:
+ if type(software) is str and " powered by " in software:
# DEBUG: print(f"DEBUG: software='{software}' has 'powered by' in it")
software = remove_version(strip_powered_by(software))
+ elif type(software) is str and " hosted on " in software:
+ # DEBUG: print(f"DEBUG: software='{software}' has 'hosted on' in it")
+ software = remove_version(strip_hosted_on(software))
elif type(software) is str and " by " in software:
# DEBUG: print(f"DEBUG: software='{software}' has ' by ' in it")
software = strip_until(software, " by ")
print("WARNING: JSON response is an error:", data["message"])
update_last_error(domain, data["message"])
return fetch_generator_from_path(domain)
+ elif "message" in data:
+ print("WARNING: JSON response contains only a message:", data["message"])
+ update_last_error(domain, data["message"])
+ return fetch_generator_from_path(domain)
elif "software" not in data or "name" not in data["software"]:
- # DEBUG: print(f"DEBUG: JSON response from {domain} does not include [software][name], fetching / ...")
+ # DEBUG: print(f"DEBUG: JSON response from domain='{domain}' does not include [software][name], fetching / ...")
software = fetch_generator_from_path(domain)
# DEBUG: print(f"DEBUG: Generator for domain='{domain}' is: {software}, EXIT!")
# DEBUG: print("DEBUG: EXIT!")
def is_instance_registered(domain: str) -> bool:
- # DEBUG: print(f"DEBUG: domain='{domain}' - CALLED!")
+ # NOISY-DEBUG: print(f"DEBUG: domain='{domain}' - CALLED!")
if not is_cache_initialized("is_registered"):
- # DEBUG: print(f"DEBUG: Cache for 'is_registered' not initialized, fetching all rows ...")
+ # NOISY-DEBUG: print(f"DEBUG: Cache for 'is_registered' not initialized, fetching all rows ...")
try:
cursor.execute("SELECT domain FROM instances")
# Is cache found?
registered = is_cache_key_set("is_registered", domain)
- # DEBUG: print(f"DEBUG: registered='{registered}' - EXIT!")
+ # NOISY-DEBUG: print(f"DEBUG: registered='{registered}' - EXIT!")
return registered
def add_instance(domain: str, origin: str, originator: str, path: str = None):
software = determine_software(domain, path)
# DEBUG: print("DEBUG: Determined software:", software)
- print(f"INFO: Adding instance {domain} (origin: {origin})")
+ print(f"INFO: Adding instance domain='{domain}' (origin='{origin}',software='{software}')")
try:
cursor.execute(
"INSERT INTO instances (domain, origin, originator, hash, software, first_seen) VALUES (?, ?, ?, ?, ?, ?)",
offset = 0
break
- #print(f"DEBUG: Updating last_instance_fetch for domain='{domain}' ...")
+ # DEBUG: print(f"DEBUG: Updating last_instance_fetch for domain='{domain}' ...")
update_last_instance_fetch(domain)
# DEBUG: print("DEBUG: Returning for domain,blocked(),suspended():", domain, len(blocks["blocked"]), len(blocks["suspended"]))