import logging
+from functools import lru_cache
from urllib.parse import urlparse
import validators
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
-# In-function cache
-_cache = {}
-
def raise_on(domain: str):
logger.debug("domain='%s' - CALLED!", domain)
logger.debug("EXIT!")
+@lru_cache
def is_in_url(domain: str, url: str) -> bool:
logger.debug("domain='%s',url='%s' - CALLED!", domain, url)
raise_on(domain)
- if not isinstance(url, str):
+ if blacklist.is_blacklisted(domain):
+ raise ValueError(f"domain='{domain}' is blacklisted but function was invoked")
+ elif not isinstance(url, str):
raise ValueError(f"Parameter url[]='{type(url)}' is not of type 'str'")
elif url == "":
raise ValueError("Parameter 'url' is empty")
- elif "is_in_url" in _cache and domain + url in _cache["is_in_url"]:
- logger.debug("Returning cached is_found='%s' - EXIT!", _cache["is_in_url"][domain + url])
- return _cache["is_in_url"][domain + url]
- elif "is_in_url" not in _cache:
- logger.debug("Initializing cache for function 'is_in_url' ...")
- _cache["is_in_url"] = {}
+ elif not validators.url(url):
+ raise ValueError(f"Parameter url='{url}' is not a valid URL")
punycode = domain.encode("idna").decode("utf-8")
logger.debug("components[]='%s',punycode='%s'", type(components), punycode)
is_found = (punycode in [components.netloc, components.hostname])
- _cache["is_in_url"][domain + url] = is_found
logger.debug("is_found='%s' - EXIT!", is_found)
return is_found
+@lru_cache
def is_wanted(domain: str) -> bool:
logger.debug("domain='%s' - CALLED!", domain)
raise ValueError(f"Parameter domain[]='{type(domain)}' is not of type 'str'")
elif domain == "":
raise ValueError("Parameter 'domain' is empty")
- elif "is_wanted" in _cache and domain in _cache["is_wanted"]:
- logger.debug("Returning cached is_found='%s' - EXIT!", _cache["is_wanted"][domain])
- return _cache["is_wanted"][domain]
- elif "is_wanted" not in _cache:
- logger.debug("Initializing cache for function 'is_wanted' ...")
- _cache["is_wanted"] = {}
wanted = True
if domain.lower() != domain:
logger.debug("domain='%s' is a tag", domain)
wanted = False
- _cache["is_wanted"][domain] = wanted
logger.debug("wanted='%s' - EXIT!", wanted)
return wanted