- WordPress is a peer list provider and can be utilized for more "peers" aka.
instances
- URL from response object can be different than requested, it needs to be
revalidated
- invalid redirect URLs are now logged with a level WARNING message
database.cursor.execute(
"SELECT domain, origin, software \
FROM instances \
-WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'lemmy', 'peertube', 'takahe', 'gotosocial', 'brighteon', 'wildebeest', 'bookwyrm', 'mitra', 'areionskey', 'mammuthus', 'neodb', 'smithereen', 'vebinet', 'toki', 'snac', 'biblioreads') \
+WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'lemmy', 'peertube', 'takahe', 'gotosocial', 'brighteon', 'wildebeest', 'bookwyrm', 'mitra', 'areionskey', 'mammuthus', 'neodb', 'smithereen', 'vebinet', 'toki', 'snac', 'biblioreads', 'wordpress') \
ORDER BY total_peers DESC, last_response_time ASC, last_updated ASC"
)
import bs4
import reqto
import requests
+import validators
from fba.helpers import blacklist
from fba.helpers import config
)
logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
- if response.ok and response.status_code == 200 and response.text.strip() != "" and response.text.find("<html") >= 0 and domain_helper.is_in_url(domain, response.url.split("#")[0]):
+ response_url = response.url.split("#")[0]
+ logger.debug("response_url='%s'", response_url)
+
+ if response.ok and response.status_code == 200 and response.text.strip() != "" and response.text.find("<html") >= 0 and validators.url(response_url) and domain_helper.is_in_url(domain, response_url):
# Save cookies
logger.debug("Parsing response.text()=%d Bytes ...", len(response.text))
cookies.store(domain, response.cookies.get_dict())
if tag is not None:
logger.debug("Adding CSRF token='%s' for domain='%s'", tag["content"], domain)
reqheaders["X-CSRF-Token"] = tag["content"]
- elif not domain_helper.is_in_url(domain, response.url.split("#")[0]):
+ elif not validators.url(response_url):
+ logger.warning("response_url='%s' is not valid - Raising exception ...", response_url)
+
+ message = f"Redirect from domain='{domain}' to response_url='{response_url}'"
+ instances.set_last_error(domain, message)
+ raise requests.exceptions.TooManyRedirects(message)
+ elif not domain_helper.is_in_url(domain, response_url):
logger.warning("domain='%s' doesn't match with response.url='%s', maybe redirect to other domain?", domain, response.url)
- message = f"Redirect from domain='{domain}' to response.url='{response.url}'"
+ message = f"Redirect from domain='{domain}' to response_url='{response_url}'"
instances.set_last_error(domain, message)
raise requests.exceptions.TooManyRedirects(message)
allow_redirects=True
)
- response_url = response.url.split("#")[0]
+ logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
+ response_url = response.url.split("#")[0], response_url)
+ logger.debug("response_url='%s'"
- logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d,response_url='%s'", response.ok, response.status_code, len(response.text), response_url)
if ((response.ok and response.status_code == 200) or response.status_code == 410) and response.text.find("<html") >= 0 and validators.url(response_url) and domain_helper.is_in_url(domain, response_url):
logger.debug("Parsing response.text()=%d Bytes ...", len(response.text))
doc = bs4.BeautifulSoup(response.text, "html.parser")
if software is not None and software != "":
logger.debug("domain='%s' has og:site_name='%s' - Setting detection_mode=SITE_NAME ...", domain, software)
instances.set_detection_mode(domain, "SITE_NAME")
- elif validators.url(response_url) and not domain_helper.is_in_url(domain, response_url):
+ elif not validators.url(response_url):
+ logger.warning("response_url='%s' is not valid - Raising exception ...", response_url)
+
+ message = f"Redirect from domain='{domain}' to response_url='{response_url}'"
+ instances.set_last_error(domain, message)
+ raise requests.exceptions.TooManyRedirects(message)
+ elif not domain_helper.is_in_url(domain, response_url):
logger.warning("domain='%s' doesn't match response.url='%s', maybe redirect to other domain?", domain, response.url)
components = urlparse(response.url)