import logging
+from urllib.parse import urlparse
+
import validators
logging.basicConfig(level=logging.INFO)
def raise_on(domain: str):
logger.debug("domain='%s' - CALLED!", domain)
if not isinstance(domain, str):
- raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
+ raise ValueError(f"Parameter domain[]='{type(domain)}' is not of type 'str'")
elif domain == "":
raise ValueError("Parameter 'domain' is empty")
elif domain.lower() != domain:
raise ValueError(f"domain='{domain}' is a fake domain, please don't crawl them!")
logger.debug("EXIT!")
+
+def is_in_url(domain: str, url: str) -> bool:
+ logger.debug("domain='%s',url='%s' - CALLED!", domain, url)
+ raise_on(domain)
+
+ if not isinstance(url, str):
+ raise ValueError(f"Parameter url[]='{type(url)}' is not of type 'str'")
+ elif url == "":
+ raise ValueError("Parameter 'url' is empty")
+
+ punycode = domain.encode("idna").decode("utf-8")
+
+ components = urlparse(url)
+ logger.debug("components[]='%s',punycode='%s'", type(components), punycode)
+
+ is_found = (punycode in [components.netloc, components.hostname])
+
+ logger.debug("is_found='%s' - EXIT!", is_found)
+ return is_found