- elif blocked.endswith(".tld"):
- logger.warning(f"blocked='{blocked}' is a fake domain, please don't crawl them!")
- continue
- elif blacklist.is_blacklisted(blocked):
- logger.debug("blocked='%s' is blacklisted - SKIPPED!", blocked)
- continue
- elif not instances.is_registered(blocked):
- logger.debug("Hash wasn't found, adding:", blocked, domain)
- instances.add(blocked, domain, inspect.currentframe().f_code.co_name, nodeinfo_url)
-
- if not blocks.is_instance_blocked(domain, blocked, "reject"):
- logger.debug("Blocking:", domain, blocked)
- blocks.add_instance(domain, blocked, None, "reject")
-
- found_blocks.append({
- "blocked": blocked,
- "reason" : None
- })
- else:
- logger.debug(f"Updating block last seen for domain='{domain}',blocked='{blocked}' ...")
- blocks.update_last_seen(domain, blocked, "reject")
-
- logger.debug("Committing changes ...")
- fba.connection.commit()
+
+ logger.debug("Appending blocker='%s',blocked='%s',block_level='reject' ...", domain, blocked)
+ blocklist.append({
+ "blocker" : domain,
+ "blocked" : blocked,
+ "reason" : None,
+ "block_level": "reject",
+ })
+
+ except network.exceptions as exception:
+ logger.warning("domain='%s',exception[%s]:'%s'", domain, type(exception), str(exception))
+ instances.set_last_error(domain, exception)
+
+ logger.debug("blocklist()=%d - EXIT!", len(blocklist))
+ return blocklist
+
+def fetch_instances(domain: str, origin: str) -> list:
+ logger.debug("domain='%s',origin='%s' - CALLED!", domain, origin)
+ domain_helper.raise_on(domain)
+
+ peers = list()
+
+ try:
+ # json endpoint for newer mastodongs
+ logger.debug("Fetching /instances from domain='%s'", domain)
+ response = network.fetch_response(
+ domain,
+ "/instances",
+ network.web_headers,
+ (config.get("connection_timeout"), config.get("read_timeout"))
+ )
+
+ logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
+ if response.ok and response.status_code < 300 and response.text != "":
+ logger.debug("Parsing %s Bytes ...", len(response.text))
+
+ doc = bs4.BeautifulSoup(response.text, "html.parser")
+ logger.debug("doc[]='%s'", type(doc))
+
+ headers = doc.findAll("h5")
+ logger.debug("Checking %d headers ...", len(headers))
+ for header in headers:
+ logger.debug("header[%s]='%s'", type(header), header)
+
+ rows = header.find_next(["ul","table"]).findAll("a")
+ logger.debug("Found %d blocked instance(s) ...", len(rows))
+ for tag in rows:
+ logger.debug("tag[]='%s'", type(tag))
+ text = tag.contents[0] if isinstance(tag.contents[0], str) else tag.contents[0].text
+ peer = tidyup.domain(text)
+ logger.debug("peer='%s'", peer)
+
+ if peer == "":
+ logger.debug("peer is empty - SKIPPED!")
+ continue
+ elif not utils.is_domain_wanted(peer):
+ logger.debug("peer='%s' is not wanted - SKIPPED!", peer)
+ continue
+ elif peer in peers:
+ logger.debug("peer='%s' already added - SKIPPED!", peer)
+ continue
+
+ logger.debug("Appending peer='%s' ...", peer)
+ peers.append(peer)
+
+ logger.debug("Marking domain='%s' as successfully handled ...", domain)
+ instances.set_success(domain)
+