import validators
from fba import csrf
+from fba import utils
from fba.helpers import blacklist
from fba.helpers import config
if instance == "":
logger.warning(f"Empty instance after tidyup.domain(), domain='{domain}'")
continue
- elif not validators.domain(instance.split("/")[0]):
- logger.warning(f"Bad instance='{instance}' from domain='{domain}',origin='{origin}'")
- continue
- elif instance.endswith(".arpa"):
- logger.warning(f"instance='{instance}' is a reversed .arpa domain and should not be used generally.")
- continue
- elif blacklist.is_blacklisted(instance):
- logger.debug("instance is blacklisted:", instance)
+ elif not utils.is_domain_wanted((instance):
+ logger.debug("instance='%s' is not wanted - SKIPPED!", instance)
continue
elif instance.find("/profile/") > 0 or instance.find("/users/") > 0:
- logger.debug(f"instance='{instance}' is a link to a single user profile - SKIPPED!")
- continue
- elif instance.endswith(".tld"):
- logger.debug(f"instance='{instance}' is a fake domain - SKIPPED!")
+ logger.debug("instance='%s' is a link to a single user profile - SKIPPED!", instance)
continue
elif not instances.is_registered(instance):
logger.debug("Adding new instance:", instance, domain)
headers = tuple()
try:
- logger.debug(f"Checking CSRF for domain='{domain}'")
+ logger.debug("Checking CSRF for domain='%s'", domain)
headers = csrf.determine(domain, dict())
except network.exceptions as exception:
logger.warning(f"Exception '{type(exception)}' during checking CSRF (fetch_peers,{__name__}) - EXIT!")
logger.debug("Added instance(s) to peers")
else:
message = "JSON response does not contain 'federated_instances' or 'error_message'"
- logger.warning(f"{message},domain='{domain}'")
+ logger.warning("message='%s',domain='%s'", message, domain)
instances.set_last_error(domain, message)
elif isinstance(data["json"], list):
- # DEBUG print("DEBUG: Querying API was successful:", domain, len(data['json']))
+ logger.debug("Querying API was successful: domain='%s',data[json]()=%d", domain, len(data['json']))
peers = data["json"]
else:
- logger.warning(f"Cannot parse data[json][]='{type(data['json'])}'")
+ logger.warning("Cannot parse data[json][]='%s'", type(data['json']))
logger.debug(f"Adding '{len(peers)}' for domain='{domain}'")
instances.set_total_peers(domain, peers)
data = dict()
try:
- logger.debug(f"Checking CSRF for domain='{domain}'")
+ logger.debug("Checking CSRF for domain='%s'", domain)
headers = csrf.determine(domain, dict())
except network.exceptions as exception:
logger.warning(f"Exception '{type(exception)}' during checking CSRF (nodeinfo,{__name__}) - EXIT!")
headers = tuple()
try:
- logger.debug(f"Checking CSRF for domain='{domain}'")
+ logger.debug("Checking CSRF for domain='%s'", domain)
headers = csrf.determine(domain, dict())
except network.exceptions as exception:
logger.warning(f"Exception '{type(exception)}' during checking CSRF (fetch_wellknown_nodeinfo,{__name__}) - EXIT!")
url = f"https://{domain}{url}"
components = urlparse(url)
- if not validators.domain(components.netloc):
- logger.warning(f"components.netloc='{components.netloc}' is not a valid domain - SKIPPED!")
- continue
- elif domain.endswith(".arpa"):
- logger.warning("domain='%s' is a domain for reversed IP addresses - SKIPPED!", domain)
- continue
- elif domain.endswith(".tld"):
- logger.warning("domain='%s' is a fake domain - SKIPPED!", domain)
- continue
- elif blacklist.is_blacklisted(components.netloc):
- logger.debug(f"components.netloc='{components.netloc}' is blacklisted - SKIPPED!")
+ if not utils.is_domain_wanted((components.netloc):
+ logger.debug("components.netloc='%s' is not wanted - SKIPPED!", components.netloc)
continue
logger.debug("Fetching nodeinfo from:", url)
logger.debug(f"Fetching path='{path}' from '{domain}' ...")
response = network.fetch_response(domain, path, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
- logger.debug("domain,response.ok,response.status_code,response.text[]:", domain, response.ok, response.status_code, type(response.text))
+ logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
if response.ok and response.status_code < 300 and response.text.find("<html") > 0:
logger.debug(f"Parsing response.text()={len(response.text)} Bytes ...")
+
doc = bs4.BeautifulSoup(response.text, "html.parser")
+ logger.debug("doc[]='%s'", type(doc))
- logger.debug("doc[]:", type(doc))
generator = doc.find("meta", {"name" : "generator"})
site_name = doc.find("meta", {"property": "og:site_name"})
- logger.debug(f"generator='{generator}',site_name='{site_name}'")
+ logger.debug("generator[]='%s',site_name[]='%s'", type(generator), type(site_name))
if isinstance(generator, bs4.element.Tag) and isinstance(generator.get("content"), str):
logger.debug("Found generator meta tag:", domain)
software = tidyup.domain(generator.get("content"))
+
logger.debug("software[%s]='%s'", type(software), software)
if software is not None and software != "":
logger.info("domain='%s' is generated by '%s'", domain, software)
elif isinstance(site_name, bs4.element.Tag) and isinstance(site_name.get("content"), str):
logger.debug("Found property=og:site_name:", domain)
software = tidyup.domain(site_name.get("content"))
+
logger.debug("software[%s]='%s'", type(software), software)
if software is not None and software != "":
logger.info("domain='%s' has og:site_name='%s'", domain, software)
logger.debug("software[]='%s'", type(software))
if isinstance(software, str) and software == "":
- logger.debug(f"Corrected empty string to None for software of domain='{domain}'")
+ logger.debug("Corrected empty string to None for software of domain='%s'", domain)
software = None
elif isinstance(software, str) and ("." in software or " " in software):
logger.debug(f"software='{software}' may contain a version number, domain='{domain}', removing it ...")
logger.debug("domain='%s',reason='%s'", domain, reason)
- if not validators.domain(domain.split("/")[0]):
- logger.warning("domain='%s' is not a valid domain - SKIPPED!", domain)
- continue
- elif domain.endswith(".arpa"):
- logger.warning("domain='%s' is a domain for reversed IP addresses - SKIPPED!", domain)
- continue
- elif domain.endswith(".tld"):
- logger.warning("domain='%s' is a fake domain - SKIPPED!", domain)
- continue
- elif blacklist.is_blacklisted(domain):
+ if not utils.is_domain_wanted((domain):
logger.debug("domain='%s' is blacklisted - SKIPPED!", domain)
continue
elif domain == "gab.com/.ai, develop.gab.com":
raise ValueError(f"peer[]='{type(peer)}' is not supported,key='{key}'")
logger.debug(f"peer='{peer}' - AFTER!")
- if not validators.domain(peer):
- logger.warning(f"peer='{peer}' is not a valid domain - SKIPPED!")
- continue
- elif peer.endswith(".arpa"):
- logger.warning(f"peer='{peer}' is a domain for reversed IP addresses -SKIPPED!")
- continue
- elif peer.endswith(".tld"):
- logger.warning(f"peer='{peer}' is a fake domain - SKIPPED!")
- continue
- elif blacklist.is_blacklisted(peer):
- logger.debug(f"peer='{peer}' is blacklisted - SKIPPED!")
+ if not utils.is_domain_wanted((peer):
+ logger.debug("peer='%s' is not wanted - SKIPPED!", peer)
continue
logger.debug(f"Adding peer='{peer}' ...")