logger = logging.getLogger(__name__)
# In-function cache
-_cache = {}
+_cache = {
+ # Cache for function is_in_url()
+ "is_in_url": {},
+
+ # Cache for function is_wanted()
+ "is_wanted": {},
+
+ # Cache for function raise_on()
+ "raise_on": {},
+}
def raise_on(domain: str):
logger.debug("domain='%s' - CALLED!", domain)
raise ValueError(f"Parameter domain[]='{type(domain)}' is not of type 'str'")
elif domain == "":
raise ValueError("Parameter 'domain' is empty")
- elif "raise_on" in _cache and domain in _cache["raise_on"]:
- logger.debug("Returning cached is_found='%s' - EXIT!", _cache["raise_on"][domain])
+ elif domain in _cache["raise_on"]:
+ logger.debug("Returning cached raised_on='%s' - EXIT!", _cache["raise_on"][domain])
return _cache["raise_on"][domain]
elif domain.lower() != domain:
raise ValueError(f"Parameter domain='{domain}' must be all lower-case")
raise ValueError(f"domain='{domain}' is a domain for reversed IP addresses, please don't crawl them!")
elif domain.endswith(".tld"):
raise ValueError(f"domain='{domain}' is a fake domain, please don't crawl them!")
- elif not "raise_on" in _cache:
- logger.debug("Initializing cache for function 'raise_on' ...")
- _cache["raise_on"] = {}
_cache["raise_on"][domain] = True
logger.debug("EXIT!")
raise ValueError(f"Parameter url[]='{type(url)}' is not of type 'str'")
elif url == "":
raise ValueError("Parameter 'url' is empty")
- elif "is_in_url" in _cache and domain + url in _cache["is_in_url"]:
- logger.debug("Returning cached is_found='%s' - EXIT!", _cache["is_in_url"][domain + url])
+ elif domain + url in _cache["is_in_url"]:
+ logger.debug("Returning cached is_in_url='%s' - EXIT!", _cache["is_in_url"][domain + url])
return _cache["is_in_url"][domain + url]
- elif "is_in_url" not in _cache:
- logger.debug("Initializing cache for function 'is_in_url' ...")
- _cache["is_in_url"] = {}
punycode = domain.encode("idna").decode("utf-8")
logger.debug("components[]='%s',punycode='%s'", type(components), punycode)
is_found = (punycode in [components.netloc, components.hostname])
+
+ # Set cache
_cache["is_in_url"][domain + url] = is_found
logger.debug("is_found='%s' - EXIT!", is_found)
raise ValueError(f"Parameter domain[]='{type(domain)}' is not of type 'str'")
elif domain == "":
raise ValueError("Parameter 'domain' is empty")
- elif "is_wanted" in _cache and domain in _cache["is_wanted"]:
+ elif domain in _cache["is_wanted"]:
logger.debug("Returning cached is_found='%s' - EXIT!", _cache["is_wanted"][domain])
return _cache["is_wanted"][domain]
- elif "is_wanted" not in _cache:
- logger.debug("Initializing cache for function 'is_wanted' ...")
- _cache["is_wanted"] = {}
wanted = True
if domain.lower() != domain:
logger.debug("domain='%s' is a tag", domain)
wanted = False
+ # Set cache
_cache["is_wanted"][domain] = wanted
+
logger.debug("wanted='%s' - EXIT!", wanted)
return wanted