X-Git-Url: https://git.mxchange.org/?a=blobdiff_plain;f=fba%2Fnetworks%2Fpleroma.py;h=4126999e8f08ea466980b4cfa3b5bd2cb1dc6f9a;hb=d56e4ed1d0090419c824e17f6220fc83d4e89fb8;hp=8c16f884e95629a5c734cdb3fd75b797d9da792b;hpb=d154a64084ce03b7f8d5fb62b1ee9e2fd9d3a75e;p=fba.git diff --git a/fba/networks/pleroma.py b/fba/networks/pleroma.py index 8c16f88..4126999 100644 --- a/fba/networks/pleroma.py +++ b/fba/networks/pleroma.py @@ -14,61 +14,97 @@ # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . -import inspect -import validators +import logging import bs4 +import validators -from fba import blacklist -from fba import blocks -from fba import config -from fba import fba -from fba import federation -from fba import instances -from fba import network +from fba import database +from fba import utils +from fba.helpers import blacklist +from fba.helpers import config +from fba.helpers import domain as domain_helper from fba.helpers import tidyup -def fetch_blocks(domain: str, origin: str, nodeinfo_url: str): - # DEBUG: print(f"DEBUG: domain='{domain}',origin='{origin}',nodeinfo_url='{nodeinfo_url}' - CALLED!") - if not isinstance(domain, str): - raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'") - elif domain == "": - raise ValueError("Parameter 'domain' is empty") - elif not isinstance(origin, str) and origin is not None: - raise ValueError(f"Parameter origin[]='{type(origin)}' is not 'str'") - elif origin == "": - raise ValueError("Parameter 'origin' is empty") - elif not isinstance(nodeinfo_url, str): - raise ValueError(f"Parameter nodeinfo_url[]='{type(nodeinfo_url)}' is not 'str'") - elif nodeinfo_url == "": - raise ValueError("Parameter 'nodeinfo_url' is empty") - - # Blocks +from fba.http import network +from fba.http import nodeinfo + +from fba.models import blocks +from fba.models import instances + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + +# Language mapping X -> English +language_mapping = { + # English -> English + "filtered media" : "filtered_media", + "limited servers" : "followers_only", + "followers-only" : "followers_only", + "media removal" : "media_removal", + "media_removal" : "media_removal", + "media force-set as sensitive": "media_nsfw", + "nsfw" : "media_nsfw", + "reject" : "reject", + "suspended servers": "reject", + "silenced servers" : "silenced", + "removal from \"the whole known network\" timeline": "federated_timeline_removal", +} + +def fetch_blocks(domain: str) -> list: + logger.debug("domain='%s' - CALLED!", domain) + domain_helper.raise_on(domain) + + if blacklist.is_blacklisted(domain): + raise Exception(f"domain='{domain}' is blacklisted but function is invoked.") + elif not instances.is_registered(domain): + raise Exception(f"domain='{domain}' is not registered but function is invoked.") + + # Init variables blockdict = list() rows = None + try: - rows = federation.fetch_nodeinfo(domain, nodeinfo_url) + logger.debug("Fetching nodeinfo: domain='%s'", domain) + rows = nodeinfo.fetch(domain, update_mode=False) + + if "error_message" in rows: + logger.warning("Error message '%s' during fetching nodeinfo for domain='%s'", rows["error_message"], domain) + instances.set_last_error(domain, rows) + instances.update(domain) + + logger.debug("Returning empty list ... - EXIT!") + return list() + elif "exception" in rows: + logger.warning("Exception '%s' during fetching nodeinfo for domain='%s' - EXIT!", type(rows["exception"]), domain) + return list() + elif "json" in rows: + logger.debug("rows[json] found for domain='%s'", domain) + rows = rows["json"] + except network.exceptions as exception: - print(f"WARNING: Exception '{type(exception)}' during fetching nodeinfo") + logger.warning("Exception '%s' during fetching nodeinfo from domain='%s'", type(exception), domain) + instances.set_last_error(domain, exception) + logger.debug("rows[]='%s'", type(rows)) if rows is None: - print("WARNING: Could not fetch nodeinfo from domain:", domain) - return + logger.warning("Could not fetch nodeinfo from domain='%s' - EXIT!", domain) + return list() elif "metadata" not in rows: - print(f"WARNING: rows()={len(rows)} does not have key 'metadata', domain='{domain}'") - return + logger.warning("rows()=%d does not have key 'metadata', domain='%s' - EXIT!", len(rows), domain) + return list() elif "federation" not in rows["metadata"]: - print(f"WARNING: rows()={len(rows['metadata'])} does not have key 'federation', domain='{domain}'") - return - - # DEBUG: print("DEBUG: Updating nodeinfo:", domain) - instances.update_last_nodeinfo(domain) + logger.warning("rows()=%d does not have key 'federation', domain='%s' - EXIT!", len(rows["metadata"]), domain) + return list() + found = False data = rows["metadata"]["federation"] + logger.debug("data[]='%s'", type(data)) if "mrf_simple" in data: - # DEBUG: print("DEBUG: Found mrf_simple:", domain) + logger.debug("Found mrf_simple in API response from domain='%s'", domain) + found = True for block_level, blocklist in ( { **data["mrf_simple"], @@ -77,151 +113,294 @@ def fetch_blocks(domain: str, origin: str, nodeinfo_url: str): } } ).items(): - # DEBUG: print("DEBUG: block_level, blocklist():", block_level, len(blocklist)) - block_level = tidyup.domain(block_level) - # DEBUG: print("DEBUG: BEFORE block_level:", block_level) + logger.debug("block_level='%s', blocklist()=%d", block_level, len(blocklist)) + block_level = tidyup.domain(block_level) if block_level != "" else None + logger.debug("block_level='%s' - AFTER!", block_level) if block_level == "": - print("WARNING: block_level is now empty!") + logger.warning("block_level is now empty!") + continue + elif block_level == "accept": + logger.debug("domain='%s' skipping block_level='accept'", domain) continue - # DEBUG: print(f"DEBUG: Checking {len(blocklist)} entries from domain='{domain}',block_level='{block_level}' ...") - if len(blocklist) > 0: - for blocked in blocklist: - # DEBUG: print("DEBUG: BEFORE blocked:", blocked) - blocked = tidyup.domain(blocked) - # DEBUG: print("DEBUG: AFTER blocked:", blocked) - - if blocked == "": - print("WARNING: blocked is empty after tidyup.domain():", domain, block_level) - continue - elif blacklist.is_blacklisted(blocked): - # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - skipping!") - continue - elif blocked.count("*") > 1: - # Obsured domain name with no hash - # DEBUG: print(f"DEBUG: Trying to de-obscure blocked='{blocked}' ...") - fba.cursor.execute( - "SELECT domain, nodeinfo_url FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", [blocked.replace("*", "_")] - ) - searchres = fba.cursor.fetchone() - - # DEBUG: print(f"DEBUG: searchres[]='{type(searchres)}'") - if searchres is None: - print(f"WARNING: Cannot deobsfucate blocked='{blocked}' - SKIPPED!") - continue - - # DEBUG: print(f"DEBUG: blocked='{blocked}' de-obscured to '{searchres[0]}'") - blocked = searchres[0] - nodeinfo_url = searchres[1] - - # DEBUG: print(f"DEBUG: blocked='{blocked}'") - if not validators.domain(blocked): - print(f"WARNING: blocked='{blocked}',software='pleroma' is not a valid domain name - skipped!") - continue - elif blocked.split(".")[-1] == "arpa": - print(f"WARNING: blocked='{blocked}' is a reversed .arpa domain and should not be used generally.") - continue - elif not instances.is_registered(blocked): - # Commit changes - fba.connection.commit() - - # DEBUG: print(f"DEBUG: Domain blocked='{blocked}' wasn't found, adding ..., domain='{domain}',origin='{origin}',nodeinfo_url='{nodeinfo_url}'") - instances.add(blocked, domain, inspect.currentframe().f_code.co_name, nodeinfo_url) - - if not blocks.is_instance_blocked(domain, blocked, block_level): - # DEBUG: print("DEBUG: Blocking:", domain, blocked, block_level) - blocks.add_instance(domain, blocked, "unknown", block_level) - - if block_level == "reject": - # DEBUG: print("DEBUG: Adding to blockdict:", blocked) - blockdict.append({ - "blocked": blocked, - "reason" : None - }) - else: - # DEBUG: print(f"DEBUG: Updating block last seen for domain='{domain}',blocked='{blocked}' ...") - blocks.update_last_seen(domain, blocked, block_level) - - # DEBUG: print("DEBUG: Committing changes ...") - fba.connection.commit() + block_level = blocks.alias_block_level(block_level) + + logger.debug("Checking %d entries from domain='%s',block_level='%s' ...", len(blocklist), domain, block_level) + for blocked in blocklist: + logger.debug("blocked='%s' - BEFORE!", blocked) + blocked = tidyup.domain(blocked) if blocked != "" else None + logger.debug("blocked='%s' - AFTER!", blocked) + + if blocked in [None, ""]: + logger.warning("blocked='%s' is empty after tidyup.domain(): domain='%s',block_level='%s' - SKIPPED!", blocked, domain, block_level) + continue + elif validators.domain(blocked) and blacklist.is_blacklisted(blocked): + logger.debug("blocked='%s' is blacklisted - SKIPPED!") + continue + + logger.debug("Invoking utils.deobfuscate(%s, %s) ...", blocked, domain) + blocked = utils.deobfuscate(blocked, domain) + logger.debug("blocked[%s]='%s' - DEOBFUSCATED!", type(blocked), blocked) + + if blocked in [None, ""]: + logger.warning("instance[host]='%s' is None or empty after tidyup.domain() - SKIPPED!", instance["host"]) + continue + elif not domain_helper.is_wanted(blocked): + logger.debug("blocked='%s' is not wanted - SKIPPED!", blocked) + continue + + logger.debug("Appending blocker='%s',blocked='%s',block_level='%s' ...", domain, blocked, block_level) + blockdict.append({ + "blocker" : domain, + "blocked" : blocked, + "reason" : None, + "block_level": block_level, + }) + + elif "quarantined_instances" in data: + logger.debug("Found 'quarantined_instances' in JSON response: domain='%s'", domain) + found = True + block_level = "quarantined" + + logger.debug("Checking %d quarantined instance(s) ...", len(data["quarantined_instances"])) + for blocked in data["quarantined_instances"]: + logger.debug("blocked='%s' - BEFORE!", blocked) + blocked = tidyup.domain(blocked) if blocked != "" else None + logger.debug("blocked='%s' - AFTER!", blocked) + + if blocked in [None, ""]: + logger.warning("blocked is empty after tidyup.domain(): domain='%s',block_level='%s'", domain, block_level) + continue + elif not domain_helper.is_wanted(blocked): + logger.debug("blocked='%s' is not wanted - SKIPPED!", blocked) + continue + + logger.debug("Appending blocker='%s',blocked='%s',block_level='%s' ...", domain, blocked, block_level) + blockdict.append({ + "blocker" : domain, + "blocked" : blocked, + "reason" : None, + "block_level": block_level, + }) + + else: + logger.warning("Cannot find 'mrf_simple' or 'quarantined_instances' in JSON reply: domain='%s'", domain) + + logger.debug("Invoking commit() ...") + database.connection.commit() # Reasons if "mrf_simple_info" in data: - # DEBUG: print("DEBUG: Found mrf_simple_info:", domain) + logger.debug("Found mrf_simple_info in API response: domain='%s'", domain) + found = True for block_level, info in ( { **data["mrf_simple_info"], **(data["quarantined_instances_info"] if "quarantined_instances_info" in data else {}) } ).items(): - # DEBUG: print("DEBUG: block_level, info.items():", block_level, len(info.items())) - block_level = tidyup.domain(block_level) - # DEBUG: print("DEBUG: BEFORE block_level:", block_level) + logger.debug("block_level='%s', info.items()=%d", block_level, len(info.items())) + block_level = tidyup.domain(block_level) if block_level != "" else None + logger.debug("block_level='%s' - AFTER!", block_level) - if block_level == "": - print("WARNING: block_level is now empty!") + if block_level in [None, ""]: + logger.warning("block_level='%s' is now empty!", block_level) + continue + elif block_level == "accept": + logger.debug("domain='%s': Skipping block_level='%s' ...", domain, block_level) continue - # DEBUG: print(f"DEBUG: Checking {len(info.items())} entries from domain='{domain}',software='pleroma',block_level='{block_level}' ...") + block_level = blocks.alias_block_level(block_level) + + logger.debug("Checking %d entries from domain='%s',block_level='%s' ...", len(info.items()), domain, block_level) for blocked, reason in info.items(): - # DEBUG: print(f"DEBUG: blocked='{blocked}',reason[{type(reason)}]='{reason}' - BEFORE!") - blocked = tidyup.domain(blocked) + logger.debug("blocked='%s',reason[%s]='%s' - BEFORE!", blocked, type(reason), reason) + blocked = tidyup.domain(blocked) if blocked != "" else None + logger.debug("blocked='%s' - AFTER!", blocked) if isinstance(reason, str): - # DEBUG: print("DEBUG: reason[] is a string") + logger.debug("reason[] is a string") reason = tidyup.reason(reason) elif isinstance(reason, dict) and "reason" in reason: - # DEBUG: print("DEBUG: reason[] is a dict") - reason = tidyup.reason(reason["reason"]) + logger.debug("reason[] is a dict") + reason = tidyup.reason(reason["reason"]) if isinstance(reason["reason"], str) else None elif reason is not None: raise ValueError(f"Cannot handle reason[]='{type(reason)}'") - # DEBUG: print(f"DEBUG: blocked='{blocked}',reason='{reason}' - AFTER!") + logger.debug("blocked='%s',reason='%s' - AFTER!", blocked, reason) if blocked == "": - print("WARNING: blocked is empty after tidyup.domain():", domain, block_level) - continue - elif blacklist.is_blacklisted(blocked): - # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - skipping!") - continue - elif blocked.count("*") > 1: - # Obsured domain with no hash - # DEBUG: print(f"DEBUG: Trying to de-obscure blocked='{blocked}' ...") - fba.cursor.execute( - "SELECT domain, origin, nodeinfo_url FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", [blocked.replace("*", "_")] - ) - searchres = fba.cursor.fetchone() - - # DEBUG: print(f"DEBUG: searchres[]='{type(searchres)}'") - if searchres is None: - print(f"WARNING: Cannot deobsfucate blocked='{blocked}' - SKIPPED!") - continue - - # DEBUG: print(f"DEBUG: blocked='{blocked}' de-obscured to '{searchres[0]}'") - blocked = searchres[0] - origin = searchres[1] - nodeinfo_url = searchres[2] - elif not validators.domain(blocked): - print(f"WARNING: blocked='{blocked}',software='pleroma' is not a valid domain name - skipped!") + logger.warning("blocked is empty after tidyup.domain(): domain='%s',block_level='%s'", domain, block_level) continue - # DEBUG: print(f"DEBUG: blocked='{blocked}'") - if blocked.split(".")[-1] == "arpa": - print(f"WARNING: blocked='{blocked}' is a reversed .arpa domain and should not be used generally.") - continue - elif not instances.is_registered(blocked): - # DEBUG: print(f"DEBUG: Domain blocked='{blocked}' wasn't found, adding ..., domain='{domain}',origin='{origin}',nodein - instances.add(blocked, domain, inspect.currentframe().f_code.co_name, nodeinfo_url) + logger.debug("Checking %d blockdict records ...", len(blockdict)) + for block in blockdict: + logger.debug("block[blocked]='%s',blocked='%s'", block["blocked"], blocked) + if block["blocked"] == blocked: + logger.debug("Updating reason='%s' for blocker='%s'", reason, block["blocked"]) + block["reason"] = reason + + elif "quarantined_instances_info" in data and "quarantined_instances" in data["quarantined_instances_info"]: + logger.debug("Found 'quarantined_instances_info' in JSON response: domain='%s'", domain) + found = True + block_level = "quarantined" + + #print(data["quarantined_instances_info"]) + rows = data["quarantined_instances_info"]["quarantined_instances"] + for blocked in rows: + logger.debug("blocked='%s' - BEFORE!", blocked) + reason = tidyup.reason(rows[blocked]["reason"]) if rows[blocked]["reason"] != "" else None + blocked = tidyup.domain(blocked) if blocked != "" else None + logger.debug("blocked='%s',reason='%s' - AFTER!", blocked, reason) + + if blocked not in rows or "reason" not in rows[blocked]: + logger.warning("Cannot find blocked='%s' in rows()=%d,domain='%s' - BREAK!", blocked, len(rows), domain) + break + elif blocked == "": + logger.warning("blocked is empty after tidyup.domain(): domain='%s',block_level='%s'", domain, block_level) + continue - # DEBUG: print(f"DEBUG: Updating block reason: reason='{reason}',domain='{domain}',blocked='{blocked}',block_level='{block_level}'") - blocks.update_reason(reason, domain, blocked, block_level) + logger.debug("Checking %d blockdict record(s) ...", len(blockdict)) + for block in blockdict: + logger.debug("block[blocked]='%s',blocked='%s'", block["blocked"], blocked) + if block["blocked"] == blocked: + logger.debug("Updating reason='%s' for blocker='%s'", reason, block["blocked"]) + block["reason"] = reason + else: + logger.warning("Cannot find 'mrf_simple_info' or 'quarantined_instances_info' in JSON reply: domain='%s'", domain) + + logger.debug("found='%s'", found) + if not found: + logger.debug("Did not find any useable JSON elements, domain='%s', continuing with /about page ...", domain) + blocklist = fetch_blocks_from_about(domain) + + logger.debug("blocklist()=%d", len(blocklist)) + if len(blocklist) > 0: + logger.info("Checking %d different blocklist(s) ...", len(blocklist)) + for block_level in blocklist: + logger.debug("Checking blocklist[%s]()=%d entries ...", block_level, blocklist[block_level]) + for block in blocklist[block_level]: + logger.debug("Appending blocker='%s',block[blocked]='%s',block[reason]='%s',block_level='%s' ...",domain, block["blocked"], block["reason"], block_level) + blockdict.append({ + "blocker" : domain, + "blocked" : block["blocked"], + "reason" : block["reason"], + "block_level": block_level, + }) + + logger.debug("blockdict()=%d - EXIT!", len(blockdict)) + return blockdict + +def fetch_blocks_from_about(domain: str) -> dict: + logger.debug("domain='%s' - CALLED!", domain) + domain_helper.raise_on(domain) + + if blacklist.is_blacklisted(domain): + raise Exception(f"domain='{domain}' is blacklisted but function is invoked.") + elif not instances.is_registered(domain): + raise Exception(f"domain='{domain}' is not registered but function is invoked.") + + # Init variables + doc = None + + logger.debug("Fetching mastodon blocks from domain='%s'", domain) + for path in ["/instance/about/index.html"]: + try: + # Resetting doc type + doc = None + + logger.debug("Fetching path='%s' from domain='%s' ...", path, domain) + response = network.fetch_response( + domain, + path, + network.web_headers, + (config.get("connection_timeout"), config.get("read_timeout")) + ) + + logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text)) + if not response.ok or response.text.strip() == "": + logger.warning("path='%s' does not exist on domain='%s' - SKIPPED!", path, domain) + continue + + logger.debug("Parsing response.text()=%d Bytes ...", len(response.text)) + doc = bs4.BeautifulSoup( + response.text, + "html.parser", + ) + + logger.debug("doc[]='%s'", type(doc)) + if doc.find("h2") is not None: + logger.debug("Found 'h2' header in path='%s' - BREAK!", path) + break + + except network.exceptions as exception: + logger.warning("Cannot fetch from domain='%s',exception[%s]='%s'", domain, type(exception), str(exception)) + instances.set_last_error(domain, exception) + break + + blocklist = { + "reject" : [], + "filtered_media": [], + "followers_only": [], + "silenced" : [], + "media_nsfw" : [], + "media_removal" : [], + "federated_timeline_removal": [], + } + + logger.debug("doc[]='%s'", type(doc)) + if doc is None: + logger.warning("Cannot fetch any /about pages for domain='%s' - EXIT!", domain) + return list() + + headers = doc.find_all("h2") + + logger.debug("headers[]='%s'", type(headers)) + if headers is None: + logger.warning("Cannot fetch any /about pages for domain='%s' - EXIT!", domain) + return list() + + logger.info("Checking %d headers ...", len(headers)) + for header in headers: + logger.debug("header[%s]='%s'", type(header), header) + block_level = tidyup.reason(header.text).lower() + + logger.debug("block_level='%s' - BEFORE!", block_level) + if block_level in language_mapping: + logger.debug("block_level='%s' - FOUND!", block_level) + block_level = language_mapping[block_level].lower() + else: + logger.warning("block_level='%s' not found in language mapping table", block_level) + + logger.debug("block_level='%s - AFTER!'", block_level) + if block_level in blocklist: + # replaced find_next_siblings with find_all_next to account for instances that e.g. hide lists in dropdown menu + logger.debug("Found block_level='%s', importing domain blocks ...", block_level) + for line in header.find_next("table").find_all("tr")[1:]: + logger.debug("line[]='%s'", type(line)) + blocked = line.find_all("td")[0].text + reason = line.find_all("td")[1].text + + logger.debug("blocked='%s',reason='%s' - BEFORE!", blocked, reason) + blocked = tidyup.domain(blocked) if blocked != "" else None + reason = tidyup.reason(reason) if reason != "" else None + logger.debug("blocked='%s',reason='%s' - AFTER!", blocked, reason) + + if blocked in [None, ""]: + logger.debug("domain='%s',block_level='%s': blocked='%s' is empty - SKIPPED!", domain, block_level, blocked) + continue + elif not domain_helper.is_wanted(blocked): + logger.debug("blocked='%s' is not wanted - SKIPPED!", blocked) + continue - # DEBUG: print(f"DEBUG: blockdict()={len(blockdict)}") - for entry in blockdict: - if entry["blocked"] == blocked: - # DEBUG: print(f"DEBUG: Updating entry reason: blocked='{blocked}',reason='{reason}'") - entry["reason"] = reason + logger.debug("Appending block_level='%s',blocked='%s',reason='%s' ...", block_level, blocked, reason) + blocklist[block_level].append({ + "blocked": blocked, + "reason" : reason, + }) + else: + logger.warning("block_level='%s' not found in blocklist()=%d", block_level, len(blocklist)) - fba.connection.commit() - # DEBUG: print("DEBUG: EXIT!") + logger.debug("Returning blocklist for domain='%s' - EXIT!", domain) + return blocklist