X-Git-Url: https://git.mxchange.org/?a=blobdiff_plain;f=fba%2Fnetworks%2Flemmy.py;h=7b427fc91ca05db6a85238be3015bb2b76b41294;hb=bd36d58fadcf377a7982103ca4d7e5c4376ef463;hp=18cf2c474a5c5a35a0e4b91cc1608413a1d23087;hpb=9c7760440dc866b2f75bac29d12e6d57c62f9f0c;p=fba.git diff --git a/fba/networks/lemmy.py b/fba/networks/lemmy.py index 18cf2c4..7b427fc 100644 --- a/fba/networks/lemmy.py +++ b/fba/networks/lemmy.py @@ -20,7 +20,6 @@ import logging import bs4 from fba import csrf -from fba import utils from fba.helpers import config from fba.helpers import domain as domain_helper @@ -48,8 +47,10 @@ def fetch_peers(domain: str, origin: str) -> list: logger.debug("Checking CSRF for domain='%s'", domain) headers = csrf.determine(domain, dict()) except network.exceptions as exception: - logger.warning("Exception '%s' during checking CSRF (fetch_peers,%s) - EXIT!", type(exception), __name__) + logger.warning("Exception '%s' during checking CSRF (fetch_peers,%s)", type(exception), __name__) instances.set_last_error(domain, exception) + + logger.debug("Returning empty list ... - EXIT!") return list() try: @@ -145,27 +146,41 @@ def fetch_blocks(domain: str, nodeinfo_url: str) -> list: logger.debug("doc[]='%s'", type(doc)) found = None - for container in [{"class": "home-instances container-lg"}, {"class": "container"}]: - logger.debug("container='%s'", container) - headers = doc.findAll("div", container) - - logger.debug("Checking %d header(s) ...", len(headers)) - for header in headers: - logger.debug("header[]='%s'", type(header)) - content = header.find(["h2", "h3", "h4", "h5"]).contents[0] - - logger.debug("content[%s]='%s'", type(content), content) - if content is None: - logger.debug("domain='%s' has returned empty header='%s' - SKIPPED!", domain, header) - continue - elif not isinstance(content, str): - logger.debug("content[]='%s' is not supported/wanted type 'str' - SKIPPED!", type(content)) - continue - elif content.lower() in translations: - logger.debug("Found header with blocked instances - BREAK!") - found = header + for criteria in [{"class": "home-instances container-lg"}, {"class": "container"}]: + logger.debug("criteria='%s'", criteria) + containers = doc.findAll("div", criteria) + + logger.debug("Checking %d containers ...", len(containers)) + for container in containers: + logger.debug("container[]='%s'", type(container)) + for header in container.find_all(["h2", "h3", "h4", "h5"]): + content = header + logger.debug("header[%s]='%s' - BEFORE!", type(header), header) + if header is not None: + content = str(header.contents[0]) + logger.debug("content[%s]='%s' - AFTER!", type(content), content) + + if content is None: + logger.debug("domain='%s' has returned empty header='%s' - SKIPPED!", domain, header) + continue + elif not isinstance(content, str): + logger.debug("content[]='%s' is not supported/wanted type 'str' - SKIPPED!", type(content)) + continue + elif content.lower() in translations: + logger.debug("Found header='%s' with blocked instances - BREAK(3) !", header) + found = header + break + + logger.debug("found[]='%s'", type(found)) + if found is not None: + logger.debug("Found header with blocked instances - BREAK(2) !") break + logger.debug("found[]='%s'", type(found)) + if found is not None: + logger.debug("Found header with blocked instances - BREAK(1) !") + break + logger.debug("found[]='%s'", type(found)) if found is None: logger.info("domain='%s' has no HTML blocklist, checking scripts ...", domain) @@ -184,7 +199,7 @@ def fetch_blocks(domain: str, nodeinfo_url: str) -> list: logger.debug("blocklist()=%d - EXIT!", len(blocklist)) return blocklist - blocking = found.find_next(["ul","table"]).findAll("a") + blocking = found.find_next(["ul", "table"]).findAll("a") logger.debug("Found %d blocked instance(s) ...", len(blocking)) for tag in blocking: logger.debug("tag[]='%s'", type(tag)) @@ -194,7 +209,7 @@ def fetch_blocks(domain: str, nodeinfo_url: str) -> list: if blocked == "": logger.warning("blocked='%s' is empty after tidyup.domain() - SKIPPED!", tag.contents[0]) continue - elif not utils.is_domain_wanted(blocked): + elif not domain_helper.is_wanted(blocked): logger.debug("blocked='%s' is not wanted - SKIPPED!", blocked) continue @@ -236,16 +251,16 @@ def fetch_instances(domain: str, origin: str) -> list: doc = bs4.BeautifulSoup(response.text, "html.parser") logger.debug("doc[]='%s'", type(doc)) - for container in [{"class": "home-instances container-lg"}, {"class": "container"}]: - logger.debug("container='%s'", container) - headers = doc.findAll("div", container) + for criteria in [{"class": "home-instances container-lg"}, {"class": "container"}]: + logger.debug("criteria='%s'", criteria) + containers = doc.findAll("div", criteria) - logger.debug("Checking %d headers ...", len(headers)) - for header in headers: + logger.debug("Checking %d containers ...", len(containers)) + for header in containers: logger.debug("header[%s]='%s'", type(header), header) rows = header.find_next(["ul","table"]).findAll("a") - logger.debug("Found %d blocked instance(s) ...", len(rows)) + logger.debug("Found %d instance(s) ...", len(rows)) for tag in rows: logger.debug("tag[]='%s'", type(tag)) text = tag.contents[0] if isinstance(tag.contents[0], str) else tag.contents[0].text @@ -255,7 +270,7 @@ def fetch_instances(domain: str, origin: str) -> list: if peer == "": logger.debug("peer is empty - SKIPPED!") continue - elif not utils.is_domain_wanted(peer): + elif not domain_helper.is_wanted(peer): logger.debug("peer='%s' is not wanted - SKIPPED!", peer) continue elif peer in peers: @@ -284,7 +299,7 @@ def parse_script(doc: bs4.BeautifulSoup, only: str = None) -> list: logger.debug("doc[]='%s',only='%s' - CALLED!") if not isinstance(doc, bs4.BeautifulSoup): raise ValueError(f"Parameter doc[]='{type(only)}' is not of type 'bs4.BeautifulSoup'") - elif not isinstance(only, str) and only != None: + elif not isinstance(only, str) and only is not None: raise ValueError(f"Parameter only[]='{type(only)}' is not of type 'str'") elif isinstance(only, str) and only == "": raise ValueError("Parameter 'only' is empty") @@ -304,14 +319,14 @@ def parse_script(doc: bs4.BeautifulSoup, only: str = None) -> list: logger.debug("script.contents[0][]='%s'", type(script.contents[0])) - isoData = script.contents[0].split("=")[1].strip().replace(":undefined", ":\"undefined\"") - logger.debug("isoData[%s]='%s'", type(isoData), isoData) + iso_data = script.contents[0].split("=")[1].strip().replace(":undefined", ":\"undefined\"") + logger.debug("iso_data[%s]='%s'", type(iso_data), iso_data) parsed = None try: - parsed = json.loads(isoData) + parsed = json.loads(iso_data) except json.decoder.JSONDecodeError as exception: - logger.warning("Exception '%s' during parsing %d Bytes: '%s'", type(exception), len(isoData), str(exception)) + logger.warning("Exception '%s' during parsing %d Bytes: '%s' - EXIT!", type(exception), len(iso_data), str(exception)) return list() logger.debug("parsed[%s]()=%d", type(parsed), len(parsed)) @@ -351,7 +366,7 @@ def parse_script(doc: bs4.BeautifulSoup, only: str = None) -> list: if peer == "": logger.debug("peer is empty - SKIPPED!") continue - elif not utils.is_domain_wanted(peer): + elif not domain_helper.is_wanted(peer): logger.debug("peer='%s' is not wanted - SKIPPED!", peer) continue elif peer in peers: