From: Roland Häder Date: Sun, 25 Jun 2023 10:32:07 +0000 (+0200) Subject: Continued: X-Git-Url: https://git.mxchange.org/?a=commitdiff_plain;h=2d08bbdd47e0ce0c069f20d3773c88f2bead75ee;p=fba.git Continued: - if lemmy's API doesn't work or returns zero rows, try /instances instead --- diff --git a/fba/http/federation.py b/fba/http/federation.py index ff7cf5e..7d5d97c 100644 --- a/fba/http/federation.py +++ b/fba/http/federation.py @@ -82,8 +82,8 @@ def fetch_instances(domain: str, origin: str, software: str, command: str, path: logger.debug("Updating last_instance_fetch for domain='%s' ...", domain) instances.set_last_instance_fetch(domain) - logger.debug("Fetching instances for domain='%s',software='%s'", domain, software) - peerlist = fetch_peers(domain, software) + logger.debug("Fetching instances for domain='%s',software='%s',origin='%s'", domain, software, origin) + peerlist = fetch_peers(domain, software, origin) logger.debug("peerlist[]='%s'", type(peerlist)) if isinstance(peerlist, list): @@ -137,8 +137,8 @@ def fetch_instances(domain: str, origin: str, software: str, command: str, path: logger.debug("EXIT!") -def fetch_peers(domain: str, software: str) -> list: - logger.debug("domain='%s',software='%s' - CALLED!", domain, software) +def fetch_peers(domain: str, software: str, origin: str) -> list: + logger.debug("domain='%s',software='%s',origin='%s' - CALLED!", domain, software, origin) domain_helper.raise_on(domain) if not isinstance(software, str) and software is not None: @@ -148,8 +148,8 @@ def fetch_peers(domain: str, software: str) -> list: logger.debug("Invoking misskey.fetch_peers(%s) ...", domain) return misskey.fetch_peers(domain) elif software == "lemmy": - logger.debug("Invoking lemmy.fetch_peers(%s) ...", domain) - return lemmy.fetch_peers(domain) + logger.debug("Invoking lemmy.fetch_peers(%s,%s) ...", domain, origin) + return lemmy.fetch_peers(domain, origin) elif software == "peertube": logger.debug("Invoking peertube.fetch_peers(%s) ...", domain) return peertube.fetch_peers(domain) diff --git a/fba/networks/lemmy.py b/fba/networks/lemmy.py index c8d0bad..288280b 100644 --- a/fba/networks/lemmy.py +++ b/fba/networks/lemmy.py @@ -32,9 +32,10 @@ from fba.models import instances logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) +#logger.setLevel(logging.DEBUG) -def fetch_peers(domain: str) -> list: - logger.debug("domain='%s' - CALLED!", domain) +def fetch_peers(domain: str, origin: str) -> list: + logger.debug("domain='%s',origin='%s' - CALLED!", domain, origin) domain_helper.raise_on(domain) peers = list() @@ -68,8 +69,8 @@ def fetch_peers(domain: str) -> list: peers = peers + federation.add_peers(data["json"]["federated_instances"]) instances.set_success(domain) else: - logger.warning("JSON response does not contain 'federated_instances', domain='%s'", domain) - instances.set_last_error(domain, data) + logger.warning("JSON response does not contain 'federated_instances', domain='%s' - trying /instances ...", domain) + peers = fetch_instances(domain, origin) except network.exceptions as exception: logger.warning("Exception during fetching JSON: domain='%s',exception[%s]:'%s'", domain, type(exception), str(exception)) @@ -185,3 +186,58 @@ def fetch_blocks(domain: str, nodeinfo_url: str) -> list: logger.debug("blocklist()=%d - EXIT!", len(blocklist)) return blocklist + +def fetch_instances(domain: str, origin: str) -> list: + logger.debug("domain='%s',origin='%s' - CALLED!", domain, origin) + domain_helper.raise_on(domain) + + peers = list() + + try: + # json endpoint for newer mastodongs + logger.debug("Fetching /instances from domain='%s'", domain) + response = network.fetch_response( + domain, + "/instances", + network.web_headers, + (config.get("connection_timeout"), config.get("read_timeout")) + ) + + logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text)) + if response.ok and response.status_code < 300 and response.text != "": + logger.debug("Parsing %s Bytes ...", len(response.text)) + + doc = bs4.BeautifulSoup(response.text, "html.parser") + logger.debug("doc[]='%s'", type(doc)) + + headers = doc.findAll("h5") + logger.debug("Checking %d headers ...", len(headers)) + for header in headers: + logger.debug("header[%s]='%s'", type(header), header) + + rows = header.find_next(["ul","table"]).findAll("a") + logger.debug("Found %d blocked instance(s) ...", len(rows)) + for tag in rows: + logger.debug("tag[]='%s'", type(tag)) + peer = tidyup.domain(tag.contents[0]) + logger.debug("peer='%s'", peer) + + if peer == "": + logger.debug("peer is empty - SKIPPED!") + continue + elif not utils.is_domain_wanted(peer): + logger.debug("peer='%s' is not wanted - SKIPPED!", peer) + continue + elif peer in peers: + logger.debug("peer='%s' already added - SKIPPED!", peer) + continue + + logger.debug("Appending peer='%s' ...", peer) + peers.append(peer) + + except network.exceptions as exception: + logger.warning("domain='%s',exception[%s]:'%s'", domain, type(exception), str(exception)) + instances.set_last_error(domain, exception) + + logger.debug("peers()=%d - EXIT!", len(peers)) + return peers