From 51517e688cf189e49d5b603b1413f3594870d90a Mon Sep 17 00:00:00 2001 From: =?utf8?q?Roland=20H=C3=A4der?= Date: Tue, 28 Nov 2023 17:12:38 +0100 Subject: [PATCH] Continued: - cached function software.alias() - moved cached initialization to file header --- fba/helpers/domain.py | 30 +++++++++++++++++------------- fba/helpers/software.py | 14 ++++++++++++++ 2 files changed, 31 insertions(+), 13 deletions(-) diff --git a/fba/helpers/domain.py b/fba/helpers/domain.py index 9ab00d0..da553d2 100644 --- a/fba/helpers/domain.py +++ b/fba/helpers/domain.py @@ -29,7 +29,16 @@ logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) # In-function cache -_cache = {} +_cache = { + # Cache for function is_in_url() + "is_in_url": {}, + + # Cache for function is_wanted() + "is_wanted": {}, + + # Cache for function raise_on() + "raise_on": {}, +} def raise_on(domain: str): logger.debug("domain='%s' - CALLED!", domain) @@ -38,7 +47,7 @@ def raise_on(domain: str): raise ValueError(f"Parameter domain[]='{type(domain)}' is not of type 'str'") elif domain == "": raise ValueError("Parameter 'domain' is empty") - elif "raise_on" in _cache and domain in _cache["raise_on"]: + elif domain in _cache["raise_on"]: logger.debug("Returning cached is_found='%s' - EXIT!", _cache["raise_on"][domain]) return _cache["raise_on"][domain] elif domain.lower() != domain: @@ -53,9 +62,6 @@ def raise_on(domain: str): raise ValueError(f"domain='{domain}' is a domain for reversed IP addresses, please don't crawl them!") elif domain.endswith(".tld"): raise ValueError(f"domain='{domain}' is a fake domain, please don't crawl them!") - elif not "raise_on" in _cache: - logger.debug("Initializing cache for function 'raise_on' ...") - _cache["raise_on"] = {} _cache["raise_on"][domain] = True logger.debug("EXIT!") @@ -68,12 +74,9 @@ def is_in_url(domain: str, url: str) -> bool: raise ValueError(f"Parameter url[]='{type(url)}' is not of type 'str'") elif url == "": raise ValueError("Parameter 'url' is empty") - elif "is_in_url" in _cache and domain + url in _cache["is_in_url"]: + elif domain + url in _cache["is_in_url"]: logger.debug("Returning cached is_found='%s' - EXIT!", _cache["is_in_url"][domain + url]) return _cache["is_in_url"][domain + url] - elif "is_in_url" not in _cache: - logger.debug("Initializing cache for function 'is_in_url' ...") - _cache["is_in_url"] = {} punycode = domain.encode("idna").decode("utf-8") @@ -81,6 +84,8 @@ def is_in_url(domain: str, url: str) -> bool: logger.debug("components[]='%s',punycode='%s'", type(components), punycode) is_found = (punycode in [components.netloc, components.hostname]) + + # Set cache _cache["is_in_url"][domain + url] = is_found logger.debug("is_found='%s' - EXIT!", is_found) @@ -93,12 +98,9 @@ def is_wanted(domain: str) -> bool: raise ValueError(f"Parameter domain[]='{type(domain)}' is not of type 'str'") elif domain == "": raise ValueError("Parameter 'domain' is empty") - elif "is_wanted" in _cache and domain in _cache["is_wanted"]: + elif domain in _cache["is_wanted"]: logger.debug("Returning cached is_found='%s' - EXIT!", _cache["is_wanted"][domain]) return _cache["is_wanted"][domain] - elif "is_wanted" not in _cache: - logger.debug("Initializing cache for function 'is_wanted' ...") - _cache["is_wanted"] = {} wanted = True if domain.lower() != domain: @@ -129,6 +131,8 @@ def is_wanted(domain: str) -> bool: logger.debug("domain='%s' is a tag", domain) wanted = False + # Set cache _cache["is_wanted"][domain] = wanted + logger.debug("wanted='%s' - EXIT!", wanted) return wanted diff --git a/fba/helpers/software.py b/fba/helpers/software.py index f224ba4..3eca4fa 100644 --- a/fba/helpers/software.py +++ b/fba/helpers/software.py @@ -29,6 +29,12 @@ relays = [ "pub-relay" ] +# In-function cache +_cache = { + # Cache for function alias() + "alias" : {}, +} + def alias(software: str) -> str: logger.debug("software='%s'- CALLED!", software) @@ -36,6 +42,11 @@ def alias(software: str) -> str: raise ValueError(f"software[]='{type(software)}' is not type 'str'") elif software == "": raise ValueError("Parameter 'software' is empty") + elif software in _cache["alias"]: + logger.debug("Returning cached value='%s' for function 'alias' - EXIT!", _cache["alias"][software]) + return _cache["alias"][software] + + key = software logger.debug("software='%s'- BEFORE!", software) software = tidyup.domain(software) @@ -126,6 +137,9 @@ def alias(software: str) -> str: logger.debug("software='%s' is being cleaned up further ...") software = software.rstrip("!").strip() + # Set cache + _cache["alias"][key] = software + logger.debug("software[%s]='%s' - EXIT!", type(software), software) return software -- 2.39.5