allow_redirects=True
)
- logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
- if ((response.ok and response.status_code == 200) or response.status_code == 410) and response.text.find("<html") > 0 and domain_helper.is_in_url(domain, response.url.split("#")[0]):
+ response_url = response.url.split("#")[0]
+
+ logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d,response_url='%s'", response.ok, response.status_code, len(response.text), response_url)
+ if ((response.ok and response.status_code == 200) or response.status_code == 410) and response.text.find("<html") > 0 and validators.domain(response_url) and domain_helper.is_in_url(domain, response_url):
logger.debug("Parsing response.text()=%d Bytes ...", len(response.text))
doc = bs4.BeautifulSoup(response.text, "html.parser")
if software is not None and software != "":
logger.debug("domain='%s' has og:site_name='%s' - Setting detection_mode=SITE_NAME ...", domain, software)
instances.set_detection_mode(domain, "SITE_NAME")
- elif not domain_helper.is_in_url(domain, response.url.split("#")[0]):
+ elif validators.domain(response_url) and not domain_helper.is_in_url(domain, response_url):
logger.warning("domain='%s' doesn't match response.url='%s', maybe redirect to other domain?", domain, response.url)
components = urlparse(response.url)