).text
logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
- doc = bs4.BeautifulSoup(raw, "html.parser")
+ doc = bs4.BeautifulSoup(raw, features="html.parser")
logger.debug("doc[]='%s'", type(doc))
silenced = doc.find("h3", {"id": "limited_servers"}).find_next("ul").findAll("li")
logger.debug("atom[]='%s'", type(atom))
for entry in atom.entries:
logger.debug("entry[]='%s'", type(entry))
- doc = bs4.BeautifulSoup(entry.content.value, "html.parser")
+ doc = bs4.BeautifulSoup(entry.content.value, features="html.parser")
logger.debug("doc[]='%s'", type(doc))
elements = doc.findAll("a")
if response.ok and response.status_code == 200 and response.text != "":
logger.debug("Parsing %d Bytes ...", len(response.text))
- doc = bs4.BeautifulSoup(response.text, "html.parser")
+ doc = bs4.BeautifulSoup(response.text, features="html.parser")
logger.debug("doc[]='%s'", type(doc))
rows = doc.findAll("li")
if ((response.ok and response.status_code == 200) or response.status_code == 410) and response.text.find("<html") >= 0 and validators.url(response_url) and domain_helper.is_in_url(domain, response_url):
logger.debug("Parsing response.text()=%d Bytes ...", len(response.text))
- doc = bs4.BeautifulSoup(response.text, "html.parser")
+ doc = bs4.BeautifulSoup(response.text, features="html.parser")
logger.debug("doc[]='%s'", type(doc))
platform = doc.find("meta", {"property": "og:platform"})
).text
logger.debug("Parsing %d Bytes ...", len(raw))
- doc = bs4.BeautifulSoup(raw, "html.parser",)
+ doc = bs4.BeautifulSoup(raw, features="html.parser")
logger.debug("doc[]='%s'", type(doc))
block_tag = doc.find(id="about_blocklist")
logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
if response.ok and response.status_code == 200 and response.text != "":
logger.debug("Parsing %s Bytes ...", len(response.text))
- doc = bs4.BeautifulSoup(response.text, "html.parser")
+ doc = bs4.BeautifulSoup(response.text, features="html.parser")
logger.debug("doc[]='%s'", type(doc))
found = None
if response.ok and response.status_code == 200 and response.text != "":
logger.debug("Parsing %s Bytes ...", len(response.text))
- doc = bs4.BeautifulSoup(response.text, "html.parser")
+ doc = bs4.BeautifulSoup(response.text, features="html.parser")
logger.debug("doc[]='%s'", type(doc))
for criteria in [{"class": "home-instances container-lg"}, {"class": "container"}]:
domain,
path
).text,
- "html.parser",
+ "html.parser"
)
-
- if len(doc.find_all("h3")) > 0:
+ logger.debug("doc[]='%s'", type(doc))
+ if doc is None:
+ logger.warning("domain='%s',path='%s' has returned no parseable document! - BREAK!", domain, path)
+ break
+ elif len(doc.find_all("h3")) > 0:
logger.debug("path='%s' had some headlines - BREAK!", path)
break
instances.set_last_error(domain, exception)
break
+ logger.debug("doc[]='%s'", type(doc))
+ if doc is None:
+ logger.warning("Cannot fetch any /about pages for domain='%s' - EXIT!", domain)
+ return []
+
blocklist = {
"suspended servers": [],
"filtered media" : [],
"silenced servers" : [],
}
- logger.debug("doc[]='%s'", type(doc))
- if doc is None:
- logger.warning("Cannot fetch any /about pages for domain='%s' - EXIT!", domain)
- return []
-
headers = doc.find_all("h3")
logger.info("Checking %d h3 headers ...", len(headers))
logger.debug("Parsing response.text()=%d Bytes ...", len(response.text))
doc = bs4.BeautifulSoup(
response.text,
- "html.parser",
+ "html.parser"
)
logger.debug("doc[]='%s'", type(doc))