X-Git-Url: https://git.mxchange.org/?a=blobdiff_plain;f=fba%2Fmodels%2Finstances.py;h=8a4103ef518d45bf01221270541c6c76a507a5bc;hb=8d598a643e074bba4b83ff42a74a7c8f42c5b22d;hp=4ff0c188a80de48b628d38c309b5508403e55323;hpb=3886e93412b151e0cb67049632515f3098f8001b;p=fba.git diff --git a/fba/models/instances.py b/fba/models/instances.py index 4ff0c18..8a4103e 100644 --- a/fba/models/instances.py +++ b/fba/models/instances.py @@ -37,10 +37,11 @@ from fba.models import error_log logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) +#logger.setLevel(logging.DEBUG) # Found info from node, such as nodeinfo URL, detection mode that needs to be # written to database. Both arrays must be filled at the same time or else -# update_data() will fail +# update() will fail _pending = { # Detection mode # NULL means all detection methods have failed (maybe still reachable instance) @@ -49,6 +50,10 @@ _pending = { "nodeinfo_url" : {}, # Found total peers "total_peers" : {}, + # Found total blocks + "total_blocks" : {}, + # Obfuscated domains + "obfuscated_blocks" : {}, # Last fetched instances "last_instance_fetch": {}, # Last updated @@ -57,23 +62,32 @@ _pending = { "last_blocked" : {}, # Last nodeinfo (fetched) "last_nodeinfo" : {}, + # Last response time + "last_response_time" : {}, # Last status code "last_status_code" : {}, # Last error details "last_error_details" : {}, # Wether obfuscation has been used "has_obfuscation" : {}, + # Original software + "original_software" : {}, + # Aliased software + "software" : {}, } def _set_data(key: str, domain: str, value: any): logger.debug("key='%s',domain='%s',value[]='%s' - CALLED!", key, domain, type(value)) domain_helper.raise_on(domain) + if not isinstance(key, str): - raise ValueError(f"Parameter key[]='{type(key)}' is not 'str'") + raise ValueError(f"Parameter key[]='{type(key)}' is not of type 'str'") elif key == "": raise ValueError("Parameter 'key' is empty") elif not key in _pending: raise ValueError(f"key='{key}' not found in _pending") + elif blacklist.is_blacklisted(domain): + raise Exception(f"domain='{domain}' is blacklisted but function has been invoked") elif not utils.is_primitive(value): raise ValueError(f"value[]='{type(value)}' is not a primitive type") @@ -86,27 +100,38 @@ def has_pending(domain: str) -> bool: logger.debug("domain='%s' - CALLED!", domain) domain_helper.raise_on(domain) + if not is_registered(domain): + raise ValueError(f"domain='{domain}' is not registered but function was invoked.") + elif blacklist.is_blacklisted(domain): + raise Exception(f"domain='{domain}' is blacklisted but function has been invoked") + has = False + logger.debug("Checking %d _pending array elements ...", len(_pending)) for key in _pending: - logger.debug("key='%s',domain='%s',_pending[key]()=%d", key, domain, len(_pending[key])) + logger.debug("domain='%s',_pending[%s]()=%d", domain, key, len(_pending[key])) if domain in _pending[key]: + logger.debug("domain='%s' at key='%s' has pending data ...", domain, key) has = True break logger.debug("has='%s' - EXIT!", has) return has -def update_data(domain: str): +def update(domain: str): logger.debug("domain='%s' - CALLED!", domain) domain_helper.raise_on(domain) - if not has_pending(domain): - raise Exception(f"domain='{domain}' has no pending instance data, but function invoked") - elif not is_registered(domain): + + if not is_registered(domain): raise Exception(f"domain='{domain}' cannot be updated while not being registered") + elif not has_pending(domain): + raise Exception(f"domain='{domain}' has no pending instance data, but function invoked") + elif blacklist.is_blacklisted(domain): + raise Exception(f"domain='{domain}' is blacklisted but function has been invoked") - logger.debug("Updating instance data for domain='%s' ...", domain) sql_string = "" fields = list() + + logger.debug("Checking %d _pending array elements ...", len(_pending)) for key in _pending: logger.debug("Checking key='%s',domain='%s'", key, domain) if domain in _pending[key]: @@ -114,14 +139,15 @@ def update_data(domain: str): fields.append(_pending[key][domain]) sql_string += f" {key} = ?," - logger.debug("sql_string()=%d", len(sql_string)) + logger.debug("sql_string(%d)='%s'", len(sql_string), sql_string) if sql_string == "": - raise ValueError(f"No fields have been set, but method invoked, domain='{domain}'") + raise ValueError(f"No fields have been set, but function invoked, domain='{domain}'") # Set last_updated to current timestamp fields.append(time.time()) # For WHERE statement + logger.debug("Setting domain='%s' for WHERE statement ...", domain) fields.append(domain) logger.debug("sql_string='%s',fields()=%d", sql_string, len(fields)) @@ -151,25 +177,27 @@ def add(domain: str, origin: str, command: str, path: str = None, software: str domain_helper.raise_on(domain) if not isinstance(origin, str) and origin is not None: - raise ValueError(f"origin[]='{type(origin)}' is not 'str'") + raise ValueError(f"origin[]='{type(origin)}' is not of type 'str'") elif origin == "": raise ValueError("Parameter 'origin' is empty") elif not isinstance(command, str): - raise ValueError(f"command[]='{type(command)}' is not 'str'") + raise ValueError(f"command[]='{type(command)}' is not of type 'str'") elif command == "": raise ValueError("Parameter 'command' is empty") elif not isinstance(path, str) and path is not None: - raise ValueError(f"path[]='{type(path)}' is not 'str'") + raise ValueError(f"path[]='{type(path)}' is not of type 'str'") elif path == "": raise ValueError("Parameter 'path' is empty") + elif path is not None and not path.startswith("/"): + raise ValueError(f"path='{path}' does not start with / but should") elif not isinstance(software, str) and software is not None: - raise ValueError(f"software[]='{type(software)}' is not 'str'") + raise ValueError(f"software[]='{type(software)}' is not of type 'str'") elif software == "": raise ValueError("Parameter 'software' is empty") elif origin is not None and not validators.domain(origin.split("/")[0]): raise ValueError(f"Bad origin name='{origin}'") elif blacklist.is_blacklisted(domain): - raise Exception(f"domain='{domain}' is blacklisted, but method invoked") + raise Exception(f"domain='{domain}' is blacklisted, but function invoked") elif domain.find("/profile/") > 0 or domain.find("/users/") > 0 or (is_registered(domain.split("/")[0]) and domain.find("/c/") > 0): raise Exception(f"domain='{domain}' is a single user") elif domain.find("/tag/") > 0: @@ -186,19 +214,22 @@ def add(domain: str, origin: str, command: str, path: str = None, software: str logger.debug("Determined software='%s'", software) if software == "lemmy" and domain.find("/c/") > 0: domain = domain.split("/c/")[0] + + logger.debug("domain='%s' - LEMMY /c/ !", domain) if is_registered(domain): logger.warning("domain='%s' already registered after cutting off user part. - EXIT!", domain) return - logger.info("Adding instance domain='%s',origin='%s',software='%s',command='%s'", domain, origin, software, command) + logger.info("Adding instance domain='%s',origin='%s',software='%s',command='%s' ...", domain, origin, software, command) database.cursor.execute( - "INSERT INTO instances (domain, origin, command, hash, software, first_seen) VALUES (?, ?, ?, ?, ?, ?)", + "INSERT INTO instances (domain, origin, command, hash, software, original_software, first_seen) VALUES (?, ?, ?, ?, ?, ?, ?)", ( domain, origin, command, utils.get_hash(domain), software, + software, time.time() ), ) @@ -209,7 +240,7 @@ def add(domain: str, origin: str, command: str, path: str = None, software: str logger.debug("Checking if domain='%s' has pending updates ...", domain) if has_pending(domain): logger.debug("Flushing updates for domain='%s' ...", domain) - update_data(domain) + update(domain) logger.debug("EXIT!") @@ -245,6 +276,14 @@ def set_last_error(domain: str, error: dict): logger.debug("Setting last_error_details='%s' (error_message)", error['error_message']) _set_data("last_status_code" , domain, error["status_code"]) _set_data("last_error_details", domain, error["error_message"] if error["error_message"] != "" else None) + elif "json" in error and "error" in error["json"] and "msg" in error["json"]: + logger.debug("Setting last_error_details='%s' (json,error)", error["json"]["msg"]) + _set_data("last_status_code" , domain, error["status_code"]) + _set_data("last_error_details", domain, error["json"]["msg"] if error["json"]["msg"] != "" else None) + elif "json" in error and "error" in error["json"] and "message" in error["json"]["error"]: + logger.debug("Setting last_error_details='%s' (json,error)", error["json"]["error"]["message"]) + _set_data("last_status_code" , domain, error["status_code"]) + _set_data("last_error_details", domain, error["json"]["error"]["message"] if error["json"]["error"]["message"] != "" else None) elif "json" in error and "error" in error["json"]: logger.debug("Setting last_error_details='%s' (json,error)", error["json"]["error"]) _set_data("last_status_code" , domain, error["status_code"]) @@ -265,10 +304,16 @@ def set_success(domain: str): logger.debug("EXIT!") -def is_registered(domain: str) -> bool: - logger.debug("domain='%s' - CALLED!", domain) +def is_registered(domain: str, skip_raise = False) -> bool: + logger.debug("domain='%s',skip_raise='%s' - CALLED!", domain, skip_raise) domain_helper.raise_on(domain) + if not isinstance(skip_raise, bool): + raise ValueError(f"skip_raise[]='{type(skip_raise)}' is not type of 'bool'") + + if not skip_raise: + domain_helper.raise_on(domain) + logger.debug("domain='%s' - CALLED!", domain) if not cache.key_exists("is_registered"): logger.debug("Cache for 'is_registered' not initialized, fetching all rows ...") @@ -288,21 +333,29 @@ def is_recent(domain: str, column: str = "last_instance_fetch") -> bool: domain_helper.raise_on(domain) if not isinstance(column, str): - raise ValueError(f"Parameter column[]='{type(column)}' is not 'str'") - elif column not in ["last_instance_fetch", "last_blocked"]: + raise ValueError(f"Parameter column[]='{type(column)}' is not of type 'str'") + elif not column.startswith("last_"): raise ValueError(f"Parameter column='{column}' is not expected") elif not is_registered(domain): logger.debug("domain='%s' is not registered, returning False - EXIT!", domain) return False + key = "recheck_instance" + if column == "last_blocked": + key = "recheck_block" + # Query database database.cursor.execute(f"SELECT {column} FROM instances WHERE domain = ? LIMIT 1", [domain]) # Fetch row - fetched = database.cursor.fetchone()[0] + row = database.cursor.fetchone() + + fetched = float(row[column]) if row[column] is not None else 0.0 + + diff = (time.time() - fetched) - logger.debug("fetched[%s]='%s'", type(fetched), fetched) - recently = isinstance(fetched, float) and time.time() - fetched <= config.get("recheck_instance") + logger.debug("fetched[%s]='%s',key='%s',diff=%f", type(fetched), fetched, key, diff) + recently = bool(diff < config.get(key)) logger.debug("recently='%s' - EXIT!", recently) return recently @@ -310,26 +363,16 @@ def is_recent(domain: str, column: str = "last_instance_fetch") -> bool: def deobfuscate(char: str, domain: str, blocked_hash: str = None) -> tuple: logger.debug("char='%s',domain='%s',blocked_hash='%s' - CALLED!", char, domain, blocked_hash) - if not isinstance(domain, str): - raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'") - elif domain == "": - raise ValueError("Parameter 'domain' is empty") - elif domain.lower() != domain: - raise ValueError(f"Parameter domain='{domain}' must be all lower-case") - elif domain.endswith(".arpa"): - raise ValueError(f"domain='{domain}' is a domain for reversed IP addresses, please don't crawl them!") - elif domain.endswith(".onion"): - raise ValueError(f"domain='{domain}' is a TOR domain, please don't crawl them!") - elif domain.endswith(".tld"): - raise ValueError(f"domain='{domain}' is a fake domain, please don't crawl them!") - elif not isinstance(char, str): - raise ValueError(f"Parameter char[]='{type(char)}' is not 'str'") + if not isinstance(char, str): + raise ValueError(f"Parameter char[]='{type(char)}' is not of type 'str'") elif char == "": raise ValueError("Parameter 'char' is empty") elif not char in domain: raise ValueError(f"char='{char}' not found in domain='{domain}' but function invoked") + elif not isinstance(domain, str): + raise ValueError(f"Parameter domain[]='{type(domain)}'") elif not isinstance(blocked_hash, str) and blocked_hash is not None: - raise ValueError(f"Parameter blocked_hash[]='{type(blocked_hash)}' is not 'str'") + raise ValueError(f"Parameter blocked_hash[]='{type(blocked_hash)}' is not of type 'str'") # Init row row = None @@ -353,7 +396,7 @@ def deobfuscate(char: str, domain: str, blocked_hash: str = None) -> tuple: logger.debug("domain='%s' - AFTER!", domain) if domain == "": - debug.warning("domain is empty after tidyup - EXIT!") + logger.warning("domain is empty after tidyup - EXIT!") return None search = domain.replace(char, "_") @@ -385,25 +428,64 @@ def set_last_instance_fetch(domain: str): _set_data("last_instance_fetch", domain, time.time()) logger.debug("EXIT!") +def set_last_response_time(domain: str, response_time: float): + logger.debug("domain='%s',response_time=%d - CALLED!", domain, response_time) + domain_helper.raise_on(domain) + + if not isinstance(response_time, float): + raise ValueError(f"response_time[]='{type(response_time)}' is not of type 'float'") + elif response_time < 0: + raise ValueError(f"response_time={response_time} is below zero") + + # Set timestamp + _set_data("last_response_time", domain, response_time) + logger.debug("EXIT!") + def set_total_peers(domain: str, peers: list): logger.debug("domain='%s',peers()=%d - CALLED!", domain, len(peers)) domain_helper.raise_on(domain) if not isinstance(peers, list): - raise ValueError(f"Parameter peers[]='{type(peers)}' is not 'list'") + raise ValueError(f"Parameter peers[]='{type(peers)}' is not of type 'list'") # Set timestamp _set_data("total_peers", domain, len(peers)) logger.debug("EXIT!") +def set_total_blocks(domain: str, blocks: list): + logger.debug("domain='%s',blocks()=%d - CALLED!", domain, len(blocks)) + domain_helper.raise_on(domain) + + if not isinstance(blocks, list): + raise ValueError(f"Parameter blocks[]='{type(blocks)}' is not of type 'list'") + + # Set timestamp + _set_data("total_blocks", domain, len(blocks)) + logger.debug("EXIT!") + +def set_obfuscated_blocks(domain: str, obfuscated: int): + logger.debug("domain='%s',obfuscated=%d - CALLED!", domain, obfuscated) + domain_helper.raise_on(domain) + + if not isinstance(obfuscated, int): + raise ValueError(f"Parameter obfuscated[]='{type(obfuscated)}' is not of type 'int'") + elif obfuscated < 0: + raise ValueError(f"Parameter obfuscated={obfuscated} is not valid") + + # Set timestamp + _set_data("obfuscated_blocks", domain, obfuscated) + logger.debug("EXIT!") + def set_nodeinfo_url(domain: str, url: str): logger.debug("domain='%s',url='%s' - CALLED!", domain, url) domain_helper.raise_on(domain) if not isinstance(url, str) and url is not None: - raise ValueError(f"Parameter url[]='{type(url)}' is not 'str'") + raise ValueError(f"Parameter url[]='{type(url)}' is not of type 'str'") elif url == "": raise ValueError("Parameter 'url' is empty") + elif url is not None and not validators.url(url): + raise ValueError(f"Parameter url='{url}' is not a valid URL") # Set timestamp _set_data("nodeinfo_url", domain, url) @@ -414,7 +496,7 @@ def set_detection_mode(domain: str, mode: str): domain_helper.raise_on(domain) if not isinstance(mode, str) and mode is not None: - raise ValueError(f"Parameter mode[]='{type(mode)}' is not 'str'") + raise ValueError(f"Parameter mode[]='{type(mode)}' is not of type 'str'") elif mode == "": raise ValueError("Parameter 'mode' is empty") @@ -423,12 +505,95 @@ def set_detection_mode(domain: str, mode: str): logger.debug("EXIT!") def set_has_obfuscation(domain: str, status: bool): - logger.debug("domain(%d)='%s',status='%s' - CALLED!", len(domain), domain, status) + logger.debug("domain='%s',status='%s' - CALLED!", domain, status) domain_helper.raise_on(domain) if not isinstance(status, bool): - raise ValueError(f"Parameter status[]='{type(status)}' is not 'bool'") + raise ValueError(f"Parameter status[]='{type(status)}' is not of type 'bool'") # Set timestamp _set_data("has_obfuscation", domain, status) logger.debug("EXIT!") + +def set_original_software(domain: str, software: str): + logger.debug("domain='%s',software='%s' - CALLED!", domain, software) + domain_helper.raise_on(domain) + + if not isinstance(software, str) and software is not None: + raise ValueError(f"Parameter software[]='{type(software)}' is not of type 'str'") + elif software == "": + raise ValueError("Parameter 'software' is empty") + + # Set original software + _set_data("original_software", domain, software) + logger.debug("EXIT!") + + +def set_software(domain: str, software: str): + logger.debug("domain='%s',software='%s' - CALLED!", domain, software) + domain_helper.raise_on(domain) + + if not isinstance(software, str) and software is not None: + raise ValueError(f"Parameter software[]='{type(software)}' is not of type 'str'") + elif software == "": + raise ValueError("Parameter 'software' is empty") + + # Set software (maybe aliased to generic name) + _set_data("software", domain, software) + logger.debug("EXIT!") + +def valid(value: str, column: str) -> bool: + logger.debug("value='%s' - CALLED!", value) + if not isinstance(value, str): + raise ValueError(f"Parameter value[]='{type(value)}' is not of type 'str'") + elif value == "": + raise ValueError("Parameter 'value' is empty") + elif not isinstance(column, str): + raise ValueError(f"Parameter column[]='{type(column)}' is not of type 'str'") + elif column == "": + raise ValueError("Parameter 'column' is empty") + + # Query database + database.cursor.execute( + f"SELECT {column} FROM instances WHERE {column} = ? LIMIT 1", [value] + ) + + is_valid = database.cursor.fetchone() is not None + + logger.debug("is_valid='%s' - EXIT!", is_valid) + return is_valid + +def translate_idnas(rows: list, column: str): + logger.debug("rows[]='%s' - CALLED!", type(rows)) + + if not isinstance(rows, list): + raise ValueError("rows[]='{type(rows)}' is not of type 'list'") + elif len(rows) == 0: + raise ValueError("Parameter 'rows' is an empty list") + elif not isinstance(column, str): + raise ValueError(f"column='{type(column)}' is not of type 'str'") + elif column == "": + raise ValueError("Parameter 'column' is empty") + elif column not in ["domain", "origin"]: + raise ValueError(f"column='{column}' is not supported") + + logger.info("Checking/converting %d domain names ...", len(rows)) + for row in rows: + logger.debug("row[]='%s'", type(row)) + + translated = row[column].encode("idna").decode("utf-8") + logger.debug("translated='%s',row[%s]='%s'", translated, column, row[column]) + + if translated != row[column]: + logger.info("Translated row[%s]='%s' to '%s'", column, row[column], translated) + if is_registered(translated, True): + logger.warning("Deleting row[%s]='%s' as translated='%s' already exist", column, row[column], translated) + database.cursor.execute(f"DELETE FROM instances WHERE {column} = ? LIMIT 1", [row[column]]) + else: + logger.debug("Updating row[%s]='%s' to translated='%s' ...", column, row[column], translated) + database.cursor.execute(f"UPDATE instances SET {column} = ? WHERE {column} = ? LIMIT 1", [translated, row[column]]) + + logger.debug("Invoking commit() ...") + database.connection.commit() + + logger.debug("EXIT!")