From 9c7760440dc866b2f75bac29d12e6d57c62f9f0c Mon Sep 17 00:00:00 2001 From: =?utf8?q?Roland=20H=C3=A4der?= Date: Tue, 11 Jul 2023 08:14:33 +0200 Subject: [PATCH] Continued: - find more blocklists/peer lists from Lemmy by also scanning for (out-dated? class=container) --- fba/networks/lemmy.py | 90 +++++++++++++++++++++++-------------------- 1 file changed, 48 insertions(+), 42 deletions(-) diff --git a/fba/networks/lemmy.py b/fba/networks/lemmy.py index 1306bb8..18cf2c4 100644 --- a/fba/networks/lemmy.py +++ b/fba/networks/lemmy.py @@ -144,24 +144,27 @@ def fetch_blocks(domain: str, nodeinfo_url: str) -> list: doc = bs4.BeautifulSoup(response.text, "html.parser") logger.debug("doc[]='%s'", type(doc)) - headers = doc.findAll("div", {"class": "home-instances container-lg"}) found = None - logger.debug("Checking %d header(s) ...", len(headers)) - for header in headers: - logger.debug("header[]='%s'", type(header)) - content = header.contents[0] - - logger.debug("content[%s]='%s'", type(content), content) - if content is None: - logger.debug("domain='%s' has returned empty header='%s' - SKIPPED!", domain, header) - continue - elif not isinstance(content, str): - logger.debug("content[]='%s' is not supported/wanted type 'str' - SKIPPED!", type(content)) - continue - elif content.lower() in translations: - logger.debug("Found header with blocked instances - BREAK!") - found = header - break + for container in [{"class": "home-instances container-lg"}, {"class": "container"}]: + logger.debug("container='%s'", container) + headers = doc.findAll("div", container) + + logger.debug("Checking %d header(s) ...", len(headers)) + for header in headers: + logger.debug("header[]='%s'", type(header)) + content = header.find(["h2", "h3", "h4", "h5"]).contents[0] + + logger.debug("content[%s]='%s'", type(content), content) + if content is None: + logger.debug("domain='%s' has returned empty header='%s' - SKIPPED!", domain, header) + continue + elif not isinstance(content, str): + logger.debug("content[]='%s' is not supported/wanted type 'str' - SKIPPED!", type(content)) + continue + elif content.lower() in translations: + logger.debug("Found header with blocked instances - BREAK!") + found = header + break logger.debug("found[]='%s'", type(found)) if found is None: @@ -233,31 +236,34 @@ def fetch_instances(domain: str, origin: str) -> list: doc = bs4.BeautifulSoup(response.text, "html.parser") logger.debug("doc[]='%s'", type(doc)) - headers = doc.findAll("div", {"class": "home-instances container-lg"}) - logger.debug("Checking %d headers ...", len(headers)) - for header in headers: - logger.debug("header[%s]='%s'", type(header), header) - - rows = header.find_next(["ul","table"]).findAll("a") - logger.debug("Found %d blocked instance(s) ...", len(rows)) - for tag in rows: - logger.debug("tag[]='%s'", type(tag)) - text = tag.contents[0] if isinstance(tag.contents[0], str) else tag.contents[0].text - peer = tidyup.domain(text) - logger.debug("peer='%s'", peer) - - if peer == "": - logger.debug("peer is empty - SKIPPED!") - continue - elif not utils.is_domain_wanted(peer): - logger.debug("peer='%s' is not wanted - SKIPPED!", peer) - continue - elif peer in peers: - logger.debug("peer='%s' already added - SKIPPED!", peer) - continue - - logger.debug("Appending peer='%s' ...", peer) - peers.append(peer) + for container in [{"class": "home-instances container-lg"}, {"class": "container"}]: + logger.debug("container='%s'", container) + headers = doc.findAll("div", container) + + logger.debug("Checking %d headers ...", len(headers)) + for header in headers: + logger.debug("header[%s]='%s'", type(header), header) + + rows = header.find_next(["ul","table"]).findAll("a") + logger.debug("Found %d blocked instance(s) ...", len(rows)) + for tag in rows: + logger.debug("tag[]='%s'", type(tag)) + text = tag.contents[0] if isinstance(tag.contents[0], str) else tag.contents[0].text + peer = tidyup.domain(text) + logger.debug("peer='%s'", peer) + + if peer == "": + logger.debug("peer is empty - SKIPPED!") + continue + elif not utils.is_domain_wanted(peer): + logger.debug("peer='%s' is not wanted - SKIPPED!", peer) + continue + elif peer in peers: + logger.debug("peer='%s' already added - SKIPPED!", peer) + continue + + logger.debug("Appending peer='%s' ...", peer) + peers.append(peer) logger.debug("peers()=%d", len(peers)) if len(peers) == 0: -- 2.39.5