import logging
+from functools import lru_cache
from urllib.parse import urlparse
import validators
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
-# In-function cache
-_cache = {
- # Cache for function is_in_url()
- "is_in_url": {},
-
- # Cache for function is_wanted()
- "is_wanted": {},
-
- # Cache for function raise_on()
- "raise_on": {},
-}
-
def raise_on(domain: str):
logger.debug("domain='%s' - CALLED!", domain)
raise ValueError(f"Parameter domain[]='{type(domain)}' is not of type 'str'")
elif domain == "":
raise ValueError("Parameter 'domain' is empty")
- elif domain in _cache["raise_on"]:
- logger.debug("Returning cached raised_on='%s' - EXIT!", _cache["raise_on"][domain])
- return _cache["raise_on"][domain]
elif domain.lower() != domain:
raise ValueError(f"Parameter domain='{domain}' must be all lower-case")
elif not validators.domain(domain.split("/")[0]):
elif domain.endswith(".tld"):
raise ValueError(f"domain='{domain}' is a fake domain, please don't crawl them!")
- _cache["raise_on"][domain] = True
logger.debug("EXIT!")
+@lru_cache
def is_in_url(domain: str, url: str) -> bool:
logger.debug("domain='%s',url='%s' - CALLED!", domain, url)
raise_on(domain)
raise ValueError("Parameter 'url' is empty")
elif not validators.url(url):
raise ValueError(f"Parameter url='{url}' is not a valid URL")
- elif domain + url in _cache["is_in_url"]:
- logger.debug("Returning cached is_in_url='%s' - EXIT!", _cache["is_in_url"][domain + url])
- return _cache["is_in_url"][domain + url]
punycode = domain.encode("idna").decode("utf-8")
is_found = (punycode in [components.netloc, components.hostname])
- # Set cache
- _cache["is_in_url"][domain + url] = is_found
-
logger.debug("is_found='%s' - EXIT!", is_found)
return is_found
+@lru_cache
def is_wanted(domain: str) -> bool:
logger.debug("domain='%s' - CALLED!", domain)
raise ValueError(f"Parameter domain[]='{type(domain)}' is not of type 'str'")
elif domain == "":
raise ValueError("Parameter 'domain' is empty")
- elif domain in _cache["is_wanted"]:
- logger.debug("Returning cached is_found='%s' - EXIT!", _cache["is_wanted"][domain])
- return _cache["is_wanted"][domain]
wanted = True
if domain.lower() != domain:
logger.debug("domain='%s' is a tag", domain)
wanted = False
- # Set cache
- _cache["is_wanted"][domain] = wanted
-
logger.debug("wanted='%s' - EXIT!", wanted)
return wanted
import logging
+from functools import lru_cache
+
from fba.helpers import tidyup
logging.basicConfig(level=logging.INFO)
"pub-relay"
]
-# In-function cache
-_cache = {
- # Cache for function alias()
- "alias" : {},
-}
-
+@lru_cache
def alias(software: str) -> str:
logger.debug("software='%s'- CALLED!", software)
raise ValueError(f"software[]='{type(software)}' is not type 'str'")
elif software == "":
raise ValueError("Parameter 'software' is empty")
- elif software in _cache["alias"]:
- logger.debug("Returning cached alias='%s' - EXIT!", _cache["alias"][software])
- return _cache["alias"][software]
key = software
logger.debug("software='%s' is being cleaned up further ...")
software = software.rstrip("!").strip()
- # Set cache
- _cache["alias"][key] = software
-
logger.debug("software[%s]='%s' - EXIT!", type(software), software)
return software