logger = logging.getLogger(__name__)
# In-function cache
-_cache = {}
+_cache = {
+ # Cache for function is_in_url()
+ "is_in_url": {},
+
+ # Cache for function is_wanted()
+ "is_wanted": {},
+
+ # Cache for function raise_on()
+ "raise_on": {},
+}
def raise_on(domain: str):
logger.debug("domain='%s' - CALLED!", domain)
raise ValueError(f"Parameter domain[]='{type(domain)}' is not of type 'str'")
elif domain == "":
raise ValueError("Parameter 'domain' is empty")
- elif "raise_on" in _cache and domain in _cache["raise_on"]:
+ elif domain in _cache["raise_on"]:
logger.debug("Returning cached is_found='%s' - EXIT!", _cache["raise_on"][domain])
return _cache["raise_on"][domain]
elif domain.lower() != domain:
raise ValueError(f"domain='{domain}' is a domain for reversed IP addresses, please don't crawl them!")
elif domain.endswith(".tld"):
raise ValueError(f"domain='{domain}' is a fake domain, please don't crawl them!")
- elif not "raise_on" in _cache:
- logger.debug("Initializing cache for function 'raise_on' ...")
- _cache["raise_on"] = {}
_cache["raise_on"][domain] = True
logger.debug("EXIT!")
raise ValueError(f"Parameter url[]='{type(url)}' is not of type 'str'")
elif url == "":
raise ValueError("Parameter 'url' is empty")
- elif "is_in_url" in _cache and domain + url in _cache["is_in_url"]:
+ elif domain + url in _cache["is_in_url"]:
logger.debug("Returning cached is_found='%s' - EXIT!", _cache["is_in_url"][domain + url])
return _cache["is_in_url"][domain + url]
- elif "is_in_url" not in _cache:
- logger.debug("Initializing cache for function 'is_in_url' ...")
- _cache["is_in_url"] = {}
punycode = domain.encode("idna").decode("utf-8")
logger.debug("components[]='%s',punycode='%s'", type(components), punycode)
is_found = (punycode in [components.netloc, components.hostname])
+
+ # Set cache
_cache["is_in_url"][domain + url] = is_found
logger.debug("is_found='%s' - EXIT!", is_found)
raise ValueError(f"Parameter domain[]='{type(domain)}' is not of type 'str'")
elif domain == "":
raise ValueError("Parameter 'domain' is empty")
- elif "is_wanted" in _cache and domain in _cache["is_wanted"]:
+ elif domain in _cache["is_wanted"]:
logger.debug("Returning cached is_found='%s' - EXIT!", _cache["is_wanted"][domain])
return _cache["is_wanted"][domain]
- elif "is_wanted" not in _cache:
- logger.debug("Initializing cache for function 'is_wanted' ...")
- _cache["is_wanted"] = {}
wanted = True
if domain.lower() != domain:
logger.debug("domain='%s' is a tag", domain)
wanted = False
+ # Set cache
_cache["is_wanted"][domain] = wanted
+
logger.debug("wanted='%s' - EXIT!", wanted)
return wanted
"pub-relay"
]
+# In-function cache
+_cache = {
+ # Cache for function alias()
+ "alias" : {},
+}
+
def alias(software: str) -> str:
logger.debug("software='%s'- CALLED!", software)
raise ValueError(f"software[]='{type(software)}' is not type 'str'")
elif software == "":
raise ValueError("Parameter 'software' is empty")
+ elif software in _cache["alias"]:
+ logger.debug("Returning cached value='%s' for function 'alias' - EXIT!", _cache["alias"][software])
+ return _cache["alias"][software]
+
+ key = software
logger.debug("software='%s'- BEFORE!", software)
software = tidyup.domain(software)
logger.debug("software='%s' is being cleaned up further ...")
software = software.rstrip("!").strip()
+ # Set cache
+ _cache["alias"][key] = software
+
logger.debug("software[%s]='%s' - EXIT!", type(software), software)
return software