+ logger.debug("Checking %d blockdict record(s) ...", len(blockdict))
+ for block in blockdict:
+ logger.debug("block[blocked]='%s',blocked='%s'", block["blocked"], blocked)
+ if block["blocked"] == blocked:
+ logger.debug("Updating reason='%s' for blocker='%s'", reason, block["blocked"])
+ block["reason"] = reason
+ else:
+ logger.warning("Cannot find 'mrf_simple_info' or 'quarantined_instances_info' in JSON reply: domain='%s'", domain)
+
+ if not found:
+ logger.debug("Did not find any useable JSON elements, domain='%s', continuing with /about page ...", domain)
+ blocklist = fetch_blocks_from_about(domain)
+
+ logger.debug("blocklist()=%d", len(blocklist))
+ if len(blocklist) > 0:
+ logger.info("Checking %d different blocklist(s) ...", len(blocklist))
+ for block_level in blocklist:
+ logger.debug("block_level='%s'", block_level)
+ rows = blocklist[block_level]
+
+ logger.debug("rows[%s]()=%d'", type(rows), len(rows))
+ for block in rows:
+ logger.debug("Appending blocker='%s',block[blocked]='%s',block[reason]='%s',block_level='%s' ...",domain, block["blocked"], block["reason"], block_level)
+ blockdict.append({
+ "blocker" : domain,
+ "blocked" : block["blocked"],
+ "reason" : block["reason"],
+ "block_level": block_level,
+ })
+
+ logger.debug("blockdict()=%d - EXIT!", len(blockdict))
+ return blockdict
+
+def fetch_blocks_from_about(domain: str) -> dict:
+ logger.debug("domain='%s' - CALLED!", domain)
+ domain_helper.raise_on(domain)
+
+ if blacklist.is_blacklisted(domain):
+ raise Exception(f"domain='{domain}' is blacklisted but function is invoked.")
+ elif not instances.is_registered(domain):
+ raise Exception(f"domain='{domain}' is not registered but function is invoked.")
+
+ logger.debug("Fetching mastodon blocks from domain='%s'", domain)
+ doc = None
+ for path in ["/instance/about/index.html"]:
+ try:
+ # Resetting doc type
+ doc = None
+
+ logger.debug("Fetching path='%s' from domain='%s' ...", path, domain)
+ response = network.fetch_response(
+ domain,
+ path,
+ network.web_headers,
+ (config.get("connection_timeout"), config.get("read_timeout"))
+ )
+
+ logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
+ if not response.ok or response.text.strip() == "":
+ logger.warning("path='%s' does not exist on domain='%s' - SKIPPED!", path, domain)
+ continue
+
+ logger.debug("Parsing response.text()=%d Bytes ...", len(response.text))
+ doc = bs4.BeautifulSoup(
+ response.text,
+ "html.parser",
+ )
+
+ logger.debug("doc[]='%s'", type(doc))
+ if doc.find("h2") is not None:
+ logger.debug("Found 'h2' header in path='%s' - BREAK!", path)
+ break
+
+ except network.exceptions as exception:
+ logger.warning("Cannot fetch from domain='%s',exception[%s]='%s'", domain, type(exception), str(exception))
+ instances.set_last_error(domain, exception)
+ break
+
+ blocklist = {
+ "reject" : [],
+ "filtered_media": [],
+ "followers_only": [],
+ "silenced" : [],
+ "media_nsfw" : [],
+ "media_removal" : [],
+ "federated_timeline_removal": [],
+ }
+
+ logger.debug("doc[]='%s'", type(doc))
+ if doc is None:
+ logger.warning("Cannot fetch any /about pages for domain='%s' - EXIT!", domain)
+ return list()
+
+ headers = doc.find_all("h2")
+
+ logger.debug("headers[]='%s'", type(headers))
+ if headers is None:
+ logger.warning("Cannot fetch any /about pages for domain='%s' - EXIT!", domain)
+ return list()
+
+ logger.info("Checking %d headers ...", len(headers))
+ for header in headers:
+ logger.debug("header[%s]='%s'", type(header), header)
+ block_level = tidyup.reason(header.text).lower()
+
+ logger.debug("block_level='%s' - BEFORE!", block_level)
+ if block_level in language_mapping:
+ logger.debug("block_level='%s' - FOUND!", block_level)
+ block_level = language_mapping[block_level].lower()
+ else:
+ logger.warning("block_level='%s' not found in language mapping table", block_level)
+
+ logger.debug("block_level='%s - AFTER!'", block_level)
+ if block_level in blocklist:
+ # replaced find_next_siblings with find_all_next to account for instances that e.g. hide lists in dropdown menu
+ logger.debug("Found block_level='%s', importing domain blocks ...", block_level)
+ for line in header.find_next("table").find_all("tr")[1:]:
+ logger.debug("line[]='%s'", type(line))
+ blocked = line.find_all("td")[0].text
+ logger.debug("blocked='%s'", blocked)
+
+ blocked = tidyup.domain(blocked) if blocked != "" else None
+ reason = tidyup.reason(line.find_all("td")[1].text)
+ logger.debug("blocked='%s',reason='%s' - AFTER!", blocked, reason)
+
+ if blocked is None or blocked == "":
+ logger.debug("domain='%s',block_level='%s': blocked is empty - SKIPPED!", domain, block_level)
+ continue
+ elif not domain_helper.is_wanted(blocked):
+ logger.debug("blocked='%s' is not wanted - SKIPPED!", blocked)
+ continue