X-Git-Url: https://git.mxchange.org/?a=blobdiff_plain;ds=sidebyside;f=fba%2Fhelpers%2Fdomain.py;h=39fbb7797acee74c7c5207ed0b3b60c507276b2f;hb=aabcda256a3ba353453eb96c75918270a0aba1b7;hp=2df92e8f9733005978e0f9ce5b6ce951009fc1c2;hpb=f6902e370dd0c156d66fac95160f0b9db0daf5a7;p=fba.git diff --git a/fba/helpers/domain.py b/fba/helpers/domain.py index 2df92e8..39fbb77 100644 --- a/fba/helpers/domain.py +++ b/fba/helpers/domain.py @@ -16,6 +16,7 @@ import logging +from functools import lru_cache from urllib.parse import urlparse import validators @@ -28,18 +29,6 @@ from fba.models import instances logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) -# In-function cache -_cache = { - # Cache for function is_in_url() - "is_in_url": {}, - - # Cache for function is_wanted() - "is_wanted": {}, - - # Cache for function raise_on() - "raise_on": {}, -} - def raise_on(domain: str): logger.debug("domain='%s' - CALLED!", domain) @@ -47,9 +36,6 @@ def raise_on(domain: str): raise ValueError(f"Parameter domain[]='{type(domain)}' is not of type 'str'") elif domain == "": raise ValueError("Parameter 'domain' is empty") - elif domain in _cache["raise_on"]: - logger.debug("Returning cached raised_on='%s' - EXIT!", _cache["raise_on"][domain]) - return _cache["raise_on"][domain] elif domain.lower() != domain: raise ValueError(f"Parameter domain='{domain}' must be all lower-case") elif not validators.domain(domain.split("/")[0]): @@ -63,9 +49,9 @@ def raise_on(domain: str): elif domain.endswith(".tld"): raise ValueError(f"domain='{domain}' is a fake domain, please don't crawl them!") - _cache["raise_on"][domain] = True logger.debug("EXIT!") +@lru_cache def is_in_url(domain: str, url: str) -> bool: logger.debug("domain='%s',url='%s' - CALLED!", domain, url) raise_on(domain) @@ -78,9 +64,6 @@ def is_in_url(domain: str, url: str) -> bool: raise ValueError("Parameter 'url' is empty") elif not validators.url(url): raise ValueError(f"Parameter url='{url}' is not a valid URL") - elif domain + url in _cache["is_in_url"]: - logger.debug("Returning cached is_in_url='%s' - EXIT!", _cache["is_in_url"][domain + url]) - return _cache["is_in_url"][domain + url] punycode = domain.encode("idna").decode("utf-8") @@ -89,12 +72,10 @@ def is_in_url(domain: str, url: str) -> bool: is_found = (punycode in [components.netloc, components.hostname]) - # Set cache - _cache["is_in_url"][domain + url] = is_found - logger.debug("is_found='%s' - EXIT!", is_found) return is_found +@lru_cache def is_wanted(domain: str) -> bool: logger.debug("domain='%s' - CALLED!", domain) @@ -102,9 +83,6 @@ def is_wanted(domain: str) -> bool: raise ValueError(f"Parameter domain[]='{type(domain)}' is not of type 'str'") elif domain == "": raise ValueError("Parameter 'domain' is empty") - elif domain in _cache["is_wanted"]: - logger.debug("Returning cached is_found='%s' - EXIT!", _cache["is_wanted"][domain]) - return _cache["is_wanted"][domain] wanted = True if domain.lower() != domain: @@ -135,8 +113,5 @@ def is_wanted(domain: str) -> bool: logger.debug("domain='%s' is a tag", domain) wanted = False - # Set cache - _cache["is_wanted"][domain] = wanted - logger.debug("wanted='%s' - EXIT!", wanted) return wanted