X-Git-Url: https://git.mxchange.org/?a=blobdiff_plain;f=fba%2Fnetworks%2Flemmy.py;h=bfe1e805f0eed78685d3fa7669de0bb966aca2ac;hb=9584067c49bcf277009aaf41f52c19f6425fd9ec;hp=238f673c7fc4945a408d5efb30eacd8f8599a393;hpb=33a04f9bd70c62bed93f63dd7f3c26b46f070df0;p=fba.git diff --git a/fba/networks/lemmy.py b/fba/networks/lemmy.py index 238f673..bfe1e80 100644 --- a/fba/networks/lemmy.py +++ b/fba/networks/lemmy.py @@ -19,13 +19,12 @@ import logging import bs4 -from fba import csrf -from fba import utils - +from fba.helpers import blacklist from fba.helpers import config from fba.helpers import domain as domain_helper from fba.helpers import tidyup +from fba.http import csrf from fba.http import federation from fba.http import network @@ -35,10 +34,47 @@ logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) #logger.setLevel(logging.DEBUG) +# Lemmy translations +translations = [ + "Blocked Instances".lower(), + "Instàncies bloquejades".lower(), + "Blocáilte Ásc".lower(), + "封锁实例".lower(), + "Blokované instance".lower(), + "Geblokkeerde instanties".lower(), + "Blockerade instanser".lower(), + "Instàncias blocadas".lower(), + "Istanze bloccate".lower(), + "Instances bloquées".lower(), + "Letiltott példányok".lower(), + "Instancias bloqueadas".lower(), + "Blokeatuta dauden instantziak".lower(), + "차단된 인스턴스".lower(), + "Peladen Yang Diblokir".lower(), + "Blokerede servere".lower(), + "Blokitaj nodoj".lower(), + "Блокирани Инстанции".lower(), + "Blockierte Instanzen".lower(), + "Estetyt instanssit".lower(), + "Instâncias bloqueadas".lower(), + "Zablokowane instancje".lower(), + "Blokované inštancie".lower(), + "المثلاء المحجوبون".lower(), + "Užblokuoti serveriai".lower(), + "ブロックしたインスタンス".lower(), + "Блокированные Инстансы".lower(), + "Αποκλεισμένοι διακομιστές".lower(), + "封鎖站台".lower(), + "Instâncias bloqueadas".lower(), +] + def fetch_peers(domain: str, origin: str) -> list: logger.debug("domain='%s',origin='%s' - CALLED!", domain, origin) domain_helper.raise_on(domain) + if blacklist.is_blacklisted(domain): + raise Exception(f"domain='{domain}' is blacklisted but function is invoked.") + peers = list() # No CSRF by default, you don't have to add network.api_headers by yourself here @@ -48,8 +84,10 @@ def fetch_peers(domain: str, origin: str) -> list: logger.debug("Checking CSRF for domain='%s'", domain) headers = csrf.determine(domain, dict()) except network.exceptions as exception: - logger.warning("Exception '%s' during checking CSRF (fetch_peers,%s) - EXIT!", type(exception), __name__) + logger.warning("Exception '%s' during checking CSRF (fetch_peers,%s)", type(exception), __name__) instances.set_last_error(domain, exception) + + logger.debug("Returning empty list ... - EXIT!") return list() try: @@ -83,47 +121,14 @@ def fetch_peers(domain: str, origin: str) -> list: logger.debug("peers()=%d - EXIT!", len(peers)) return peers -def fetch_blocks(domain: str, nodeinfo_url: str) -> list: - logger.debug("domain='%s,nodeinfo_url='%s' - CALLED!", domain, nodeinfo_url) +def fetch_blocks(domain: str) -> list: + logger.debug("domain='%s - CALLED!", domain) domain_helper.raise_on(domain) - if not isinstance(nodeinfo_url, str): - raise ValueError(f"Parameter nodeinfo_url[]='{type(nodeinfo_url)}' is not of type 'str'") - elif nodeinfo_url == "": - raise ValueError("Parameter 'nodeinfo_url' is empty") - - translations = [ - "Blocked Instances".lower(), - "Instàncies bloquejades".lower(), - "Blocáilte Ásc".lower(), - "封锁实例".lower(), - "Blokované instance".lower(), - "Geblokkeerde instanties".lower(), - "Blockerade instanser".lower(), - "Instàncias blocadas".lower(), - "Istanze bloccate".lower(), - "Instances bloquées".lower(), - "Letiltott példányok".lower(), - "Instancias bloqueadas".lower(), - "Blokeatuta dauden instantziak".lower(), - "차단된 인스턴스".lower(), - "Peladen Yang Diblokir".lower(), - "Blokerede servere".lower(), - "Blokitaj nodoj".lower(), - "Блокирани Инстанции".lower(), - "Blockierte Instanzen".lower(), - "Estetyt instanssit".lower(), - "Instâncias bloqueadas".lower(), - "Zablokowane instancje".lower(), - "Blokované inštancie".lower(), - "المثلاء المحجوبون".lower(), - "Užblokuoti serveriai".lower(), - "ブロックしたインスタンス".lower(), - "Блокированные Инстансы".lower(), - "Αποκλεισμένοι διακομιστές".lower(), - "封鎖站台".lower(), - "Instâncias bloqueadas".lower(), - ] + if blacklist.is_blacklisted(domain): + raise Exception(f"domain='{domain}' is blacklisted but function is invoked.") + elif not instances.is_registered(domain): + raise Exception(f"domain='{domain}' is not registered but function is invoked.") blocklist = list() @@ -138,38 +143,48 @@ def fetch_blocks(domain: str, nodeinfo_url: str) -> list: ) logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text)) - if response.ok and response.status_code < 300 and response.text != "": + if response.ok and response.status_code == 200 and response.text != "": logger.debug("Parsing %s Bytes ...", len(response.text)) doc = bs4.BeautifulSoup(response.text, "html.parser") logger.debug("doc[]='%s'", type(doc)) found = None - for container in [{"class": "home-instances container-lg"}, {"class": "container"}]: - logger.debug("container='%s'", container) - headers = doc.findAll("div", container) - - logger.debug("Checking %d header(s) ...", len(headers)) - for header in headers: - logger.debug("header[]='%s'", type(header)) - content = header.find(["h2", "h3", "h4", "h5"]) - - logger.debug("content[%s]='%s' - BEFORE!", type(content), content) - if content is not None: - content = content.contents[0] - logger.debug("content[%s]='%s' - AFTER!", type(content), content) - - if content is None: - logger.debug("domain='%s' has returned empty header='%s' - SKIPPED!", domain, header) - continue - elif not isinstance(content, str): - logger.debug("content[]='%s' is not supported/wanted type 'str' - SKIPPED!", type(content)) - continue - elif content.lower() in translations: - logger.debug("Found header with blocked instances - BREAK!") - found = header + for criteria in [{"class": "home-instances container-lg"}, {"class": "container"}]: + logger.debug("criteria='%s'", criteria) + containers = doc.findAll("div", criteria) + + logger.debug("Checking %d containers ...", len(containers)) + for container in containers: + logger.debug("container[]='%s'", type(container)) + for header in container.find_all(["h2", "h3", "h4", "h5"]): + content = header + logger.debug("header[%s]='%s' - BEFORE!", type(header), header) + if header is not None: + content = str(header.contents[0]) + logger.debug("content[%s]='%s' - AFTER!", type(content), content) + + if content is None or content == "": + logger.debug("domain='%s' has returned empty header='%s' - SKIPPED!", domain, header) + continue + elif not isinstance(content, str): + logger.debug("content[]='%s' is not supported/wanted type 'str' - SKIPPED!", type(content)) + continue + elif content.lower() in translations: + logger.debug("Found header='%s' with blocked instances - BREAK(3) !", header) + found = header + break + + logger.debug("found[]='%s'", type(found)) + if found is not None: + logger.debug("Found header with blocked instances - BREAK(2) !") break + logger.debug("found[]='%s'", type(found)) + if found is not None: + logger.debug("Found header with blocked instances - BREAK(1) !") + break + logger.debug("found[]='%s'", type(found)) if found is None: logger.info("domain='%s' has no HTML blocklist, checking scripts ...", domain) @@ -188,17 +203,17 @@ def fetch_blocks(domain: str, nodeinfo_url: str) -> list: logger.debug("blocklist()=%d - EXIT!", len(blocklist)) return blocklist - blocking = found.find_next(["ul","table"]).findAll("a") + blocking = found.find_next(["ul", "table"]).findAll("a") logger.debug("Found %d blocked instance(s) ...", len(blocking)) for tag in blocking: logger.debug("tag[]='%s'", type(tag)) - blocked = tidyup.domain(tag.contents[0]) + blocked = tidyup.domain(tag.contents[0]) if tag.contents[0] != "" else None logger.debug("blocked='%s'", blocked) - if blocked == "": + if blocked is None or blocked == "": logger.warning("blocked='%s' is empty after tidyup.domain() - SKIPPED!", tag.contents[0]) continue - elif not utils.is_domain_wanted(blocked): + elif not domain_helper.is_wanted(blocked): logger.debug("blocked='%s' is not wanted - SKIPPED!", blocked) continue @@ -209,6 +224,9 @@ def fetch_blocks(domain: str, nodeinfo_url: str) -> list: "reason" : None, "block_level": "reject", }) + else: + logger.warning("Cannot fetch /instances due to error: response.ok='%s',response.status_code=%d,response.details='%s'", response.ok, response.status_code, response.reason) + instances.set_last_error(domain, response) except network.exceptions as exception: logger.warning("domain='%s',exception[%s]:'%s'", domain, type(exception), str(exception)) @@ -221,6 +239,9 @@ def fetch_instances(domain: str, origin: str) -> list: logger.debug("domain='%s',origin='%s' - CALLED!", domain, origin) domain_helper.raise_on(domain) + if blacklist.is_blacklisted(domain): + raise Exception(f"domain='{domain}' is blacklisted but function is invoked.") + peers = list() try: @@ -234,32 +255,34 @@ def fetch_instances(domain: str, origin: str) -> list: ) logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text)) - if response.ok and response.status_code < 300 and response.text != "": + if response.ok and response.status_code == 200 and response.text != "": logger.debug("Parsing %s Bytes ...", len(response.text)) doc = bs4.BeautifulSoup(response.text, "html.parser") logger.debug("doc[]='%s'", type(doc)) - for container in [{"class": "home-instances container-lg"}, {"class": "container"}]: - logger.debug("container='%s'", container) - headers = doc.findAll("div", container) + for criteria in [{"class": "home-instances container-lg"}, {"class": "container"}]: + logger.debug("criteria='%s'", criteria) + containers = doc.findAll("div", criteria) - logger.debug("Checking %d headers ...", len(headers)) - for header in headers: + logger.debug("Checking %d containers ...", len(containers)) + for header in containers: logger.debug("header[%s]='%s'", type(header), header) rows = header.find_next(["ul","table"]).findAll("a") - logger.debug("Found %d blocked instance(s) ...", len(rows)) + logger.debug("Found %d instance(s) ...", len(rows)) for tag in rows: logger.debug("tag[]='%s'", type(tag)) text = tag.contents[0] if isinstance(tag.contents[0], str) else tag.contents[0].text - peer = tidyup.domain(text) - logger.debug("peer='%s'", peer) + logger.debug("text='%s' - BEFORE!", text) + + peer = tidyup.domain(text) if text != "" else None + logger.debug("peer='%s' - AFTER", peer) - if peer == "": - logger.debug("peer is empty - SKIPPED!") + if peer is None or peer == "": + logger.warning("peer='%s' is empty, text='%s' - SKIPPED!", peer, text) continue - elif not utils.is_domain_wanted(peer): + elif not domain_helper.is_wanted(peer): logger.debug("peer='%s' is not wanted - SKIPPED!", peer) continue elif peer in peers: @@ -273,6 +296,9 @@ def fetch_instances(domain: str, origin: str) -> list: if len(peers) == 0: logger.debug("Found no peers for domain='%s', trying script tag ...", domain) peers = parse_script(doc) + else: + logger.warning("Cannot fetch /instances due to error: response.ok='%s',response.status_code=%d,response.details='%s'", response.ok, response.status_code, response.reason) + instances.set_last_error(domain, response) logger.debug("Marking domain='%s' as successfully handled, peers()=%d ...", domain, len(peers)) instances.set_success(domain) @@ -286,6 +312,7 @@ def fetch_instances(domain: str, origin: str) -> list: def parse_script(doc: bs4.BeautifulSoup, only: str = None) -> list: logger.debug("doc[]='%s',only='%s' - CALLED!") + if not isinstance(doc, bs4.BeautifulSoup): raise ValueError(f"Parameter doc[]='{type(only)}' is not of type 'bs4.BeautifulSoup'") elif not isinstance(only, str) and only is not None: @@ -315,7 +342,7 @@ def parse_script(doc: bs4.BeautifulSoup, only: str = None) -> list: try: parsed = json.loads(iso_data) except json.decoder.JSONDecodeError as exception: - logger.warning("Exception '%s' during parsing %d Bytes: '%s'", type(exception), len(iso_data), str(exception)) + logger.warning("Exception '%s' during parsing %d Bytes: '%s' - EXIT!", type(exception), len(iso_data), str(exception)) return list() logger.debug("parsed[%s]()=%d", type(parsed), len(parsed)) @@ -352,12 +379,11 @@ def parse_script(doc: bs4.BeautifulSoup, only: str = None) -> list: peer = tidyup.domain(row["domain"]) logger.debug("peer='%s' - AFTER!", peer) - if peer == "": - logger.debug("peer is empty - SKIPPED!") + if peer is None or peer == "": + logger.warning("peer='%s' is empty, row[domain]='%s' - SKIPPED!", peer, row["domain"]) continue - elif not utils.is_domain_wanted(peer): + elif not domain_helper.is_wanted(peer): logger.debug("peer='%s' is not wanted - SKIPPED!", peer) - continue elif peer in peers: logger.debug("peer='%s' already added - SKIPPED!", peer) continue