elif row["software"] is not None and software_helper.is_relay(row["software"]) and instances.is_registered(row["domain"]):
logger.warning("row[domain]='%s' is of software type '%s' which is not supported by this command. Please invoke fetch_relays instead - SKIPPED!", row["domain"], row["software"])
continue
- elif not args.force and not args.software in [None, ""]and instances.is_recent(row["domain"]):
+ elif not args.force and not args.software in [None, ""] and instances.is_recent(row["domain"]):
logger.debug("row[domain]='%s' has recently been crawled - SKIPPED!", row["domain"])
continue
raise ValueError("Parameter 'domain' is empty")
elif domain.lower() != domain:
raise ValueError(f"Parameter domain='{domain}' must be all lower-case")
+ elif "?" in domain:
+ raise ValueError(f"Parameter domain='{domain}' contains a question-mark")
elif not validators.domain(domain.split("/")[0]):
raise ValueError(f"domain='{domain}' is not a valid domain")
elif domain.endswith(".onion"):
logger.debug("instance='%s' - BEFORE!", instance)
instance = instance.encode("idna").decode("utf-8")
+ instance = instance.split("?")[0]
logger.debug("instance='%s' - AFTER!", instance)
if not domain_helper.is_wanted(instance):
)
logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
- response_url = response.url.split("#")[0], response_url)
- logger.debug("response_url='%s'"
+ response_url = response.url.split("#")[0]
+ logger.debug("response_url='%s'", response_url)
if ((response.ok and response.status_code == 200) or response.status_code == 410) and response.text.find("<html") >= 0 and validators.url(response_url) and domain_helper.is_in_url(domain, response_url):
logger.debug("Parsing response.text()=%d Bytes ...", len(response.text))
_set_data("original_software", domain, software)
logger.debug("EXIT!")
-
def set_software(domain: str, software: str):
logger.debug("domain='%s',software='%s' - CALLED!", domain, software)
domain_helper.raise_on(domain)