+
+ logger.debug("Marking domain='%s' as successfully handled ...", domain)
+ instances.set_success(domain)
+
+ if len(peers) == 0:
+ logger.warning("Fetching instances for domain='%s' from /instances ...", domain)
+ peers = fetch_instances(domain, origin)
+
+ except network.exceptions as exception:
+ logger.warning("Exception during fetching JSON: domain='%s',exception[%s]:'%s'", domain, type(exception), str(exception))
+ instances.set_last_error(domain, exception)
+
+ logger.debug("peers()=%d - EXIT!", len(peers))
+ return peers
+
+def fetch_blocks(domain: str) -> list:
+ logger.debug("domain='%s - CALLED!", domain)
+ domain_helper.raise_on(domain)
+
+ if blacklist.is_blacklisted(domain):
+ raise Exception(f"domain='{domain}' is blacklisted but function is invoked.")
+ elif not instances.is_registered(domain):
+ raise Exception(f"domain='{domain}' is not registered but function is invoked.")
+
+ blocklist = list()
+
+ try:
+ # json endpoint for newer mastodongs
+ logger.debug("Fetching /instances from domain='%s'", domain)
+ response = network.fetch_response(
+ domain,
+ "/instances",
+ network.web_headers,
+ (config.get("connection_timeout"), config.get("read_timeout"))
+ )
+
+ logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
+ if response.ok and response.status_code == 200 and response.text != "":
+ logger.debug("Parsing %s Bytes ...", len(response.text))
+
+ doc = bs4.BeautifulSoup(response.text, "html.parser")
+ logger.debug("doc[]='%s'", type(doc))
+
+ found = None
+ for criteria in [{"class": "home-instances container-lg"}, {"class": "container"}]:
+ logger.debug("criteria='%s'", criteria)
+ containers = doc.findAll("div", criteria)
+
+ logger.debug("Checking %d containers ...", len(containers))
+ for container in containers:
+ logger.debug("container[]='%s'", type(container))
+ for header in container.find_all(["h2", "h3", "h4", "h5"]):
+ content = header
+ logger.debug("header[%s]='%s' - BEFORE!", type(header), header)
+ if header is not None:
+ content = str(header.contents[0])
+ logger.debug("content[%s]='%s' - AFTER!", type(content), content)
+
+ if content is None or content == "":
+ logger.debug("domain='%s' has returned empty header='%s' - SKIPPED!", domain, header)
+ continue
+ elif not isinstance(content, str):
+ logger.debug("content[]='%s' is not supported/wanted type 'str' - SKIPPED!", type(content))
+ continue
+ elif content.lower() in translations:
+ logger.debug("Found header='%s' with blocked instances - BREAK(3) !", header)
+ found = header
+ break
+
+ logger.debug("found[]='%s'", type(found))
+ if found is not None:
+ logger.debug("Found header with blocked instances - BREAK(2) !")
+ break
+
+ logger.debug("found[]='%s'", type(found))
+ if found is not None:
+ logger.debug("Found header with blocked instances - BREAK(1) !")
+ break
+
+ logger.debug("found[]='%s'", type(found))
+ if found is None:
+ logger.info("domain='%s' has no HTML blocklist, checking scripts ...", domain)
+ peers = parse_script(doc, "blocked")
+
+ logger.debug("domain='%s' has %d peer(s).", domain, len(peers))
+ for blocked in peers:
+ logger.debug("Appending blocker='%s',blocked='%s',block_level='reject' ...", domain, blocked)
+ blocklist.append({
+ "blocker" : domain,
+ "blocked" : blocked,
+ "reason" : None,
+ "block_level": "reject",
+ })
+
+ logger.debug("blocklist()=%d - EXIT!", len(blocklist))
+ return blocklist
+
+ blocking = found.find_next(["ul", "table"]).findAll("a")
+ logger.debug("Found %d blocked instance(s) ...", len(blocking))
+ for tag in blocking:
+ logger.debug("tag[]='%s'", type(tag))
+ blocked = tidyup.domain(tag.contents[0]) if tag.contents[0] != "" else None
+ logger.debug("blocked='%s'", blocked)
+
+ if blocked is None or blocked == "":
+ logger.warning("blocked='%s' is empty after tidyup.domain() - SKIPPED!", tag.contents[0])
+ continue
+ elif not domain_helper.is_wanted(blocked):
+ logger.debug("blocked='%s' is not wanted - SKIPPED!", blocked)
+ continue
+
+ logger.debug("Appending blocker='%s',blocked='%s',block_level='reject' ...", domain, blocked)
+ blocklist.append({
+ "blocker" : domain,
+ "blocked" : blocked,
+ "reason" : None,
+ "block_level": "reject",
+ })
+ else:
+ logger.warning("Cannot fetch /instances due to error: response.ok='%s',response.status_code=%d,response.details='%s'", response.ok, response.status_code, response.reason)
+ instances.set_last_error(domain, response)
+
+ except network.exceptions as exception:
+ logger.warning("domain='%s',exception[%s]:'%s'", domain, type(exception), str(exception))
+ instances.set_last_error(domain, exception)
+
+ logger.debug("blocklist()=%d - EXIT!", len(blocklist))
+ return blocklist
+
+def fetch_instances(domain: str, origin: str) -> list:
+ logger.debug("domain='%s',origin='%s' - CALLED!", domain, origin)
+ domain_helper.raise_on(domain)
+
+ if blacklist.is_blacklisted(domain):
+ raise Exception(f"domain='{domain}' is blacklisted but function is invoked.")
+
+ peers = list()
+
+ try:
+ # json endpoint for newer mastodongs
+ logger.debug("Fetching /instances from domain='%s'", domain)
+ response = network.fetch_response(
+ domain,
+ "/instances",
+ network.web_headers,
+ (config.get("connection_timeout"), config.get("read_timeout"))
+ )
+
+ logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
+ if response.ok and response.status_code == 200 and response.text != "":
+ logger.debug("Parsing %s Bytes ...", len(response.text))
+
+ doc = bs4.BeautifulSoup(response.text, "html.parser")
+ logger.debug("doc[]='%s'", type(doc))
+
+ for criteria in [{"class": "home-instances container-lg"}, {"class": "container"}]:
+ logger.debug("criteria='%s'", criteria)
+ containers = doc.findAll("div", criteria)
+
+ logger.debug("Checking %d containers ...", len(containers))
+ for header in containers:
+ logger.debug("header[%s]='%s'", type(header), header)
+
+ rows = header.find_next(["ul","table"]).findAll("a")
+ logger.debug("Found %d instance(s) ...", len(rows))
+ for tag in rows:
+ logger.debug("tag[]='%s'", type(tag))
+ text = tag.contents[0] if isinstance(tag.contents[0], str) else tag.contents[0].text
+ logger.debug("text='%s' - BEFORE!", text)
+
+ peer = tidyup.domain(text) if text != "" else None
+ logger.debug("peer='%s' - AFTER", peer)
+
+ if peer is None or peer == "":
+ logger.warning("peer='%s' is empty, text='%s' - SKIPPED!", peer, text)
+ continue
+ elif not domain_helper.is_wanted(peer):
+ logger.debug("peer='%s' is not wanted - SKIPPED!", peer)
+ continue
+ elif peer in peers:
+ logger.debug("peer='%s' already added - SKIPPED!", peer)
+ continue
+
+ logger.debug("Appending peer='%s' ...", peer)
+ peers.append(peer)
+
+ logger.debug("peers()=%d", len(peers))
+ if len(peers) == 0:
+ logger.debug("Found no peers for domain='%s', trying script tag ...", domain)
+ peers = parse_script(doc)