# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
-import inspect
-import validators
+import logging
import bs4
+import validators
-from fba import blacklist
-from fba import blocks
-from fba import config
-from fba import fba
-from fba import federation
-from fba import instances
-from fba import network
+from fba import database
+from fba import utils
+from fba.helpers import blacklist
+from fba.helpers import config
+from fba.helpers import domain as domain_helper
from fba.helpers import tidyup
-def fetch_blocks(domain: str, origin: str, nodeinfo_url: str):
- # DEBUG: print(f"DEBUG: domain='{domain}',origin='{origin}',nodeinfo_url='{nodeinfo_url}' - CALLED!")
- if not isinstance(domain, str):
- raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
- elif domain == "":
- raise ValueError("Parameter 'domain' is empty")
- elif not isinstance(origin, str) and origin is not None:
- raise ValueError(f"Parameter origin[]='{type(origin)}' is not 'str'")
- elif origin == "":
- raise ValueError("Parameter 'origin' is empty")
- elif not isinstance(nodeinfo_url, str):
- raise ValueError(f"Parameter nodeinfo_url[]='{type(nodeinfo_url)}' is not 'str'")
- elif nodeinfo_url == "":
- raise ValueError("Parameter 'nodeinfo_url' is empty")
-
- # Blocks
+from fba.http import network
+from fba.http import nodeinfo
+
+from fba.models import blocks
+from fba.models import instances
+
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+
+# Language mapping X -> English
+language_mapping = {
+ # English -> English
+ "filtered media" : "filtered_media",
+ "limited servers" : "followers_only",
+ "followers-only" : "followers_only",
+ "media removal" : "media_removal",
+ "media_removal" : "media_removal",
+ "media force-set as sensitive": "media_nsfw",
+ "nsfw" : "media_nsfw",
+ "reject" : "reject",
+ "suspended servers": "reject",
+ "silenced servers" : "silenced",
+ "removal from \"the whole known network\" timeline": "federated_timeline_removal",
+}
+
+def fetch_blocks(domain: str) -> list:
+ logger.debug("domain='%s' - CALLED!", domain)
+ domain_helper.raise_on(domain)
+
+ if blacklist.is_blacklisted(domain):
+ raise Exception(f"domain='{domain}' is blacklisted but function is invoked.")
+ elif not instances.is_registered(domain):
+ raise Exception(f"domain='{domain}' is not registered but function is invoked.")
+
+ # Init variables
blockdict = list()
rows = None
+
try:
- rows = federation.fetch_nodeinfo(domain, nodeinfo_url)
+ logger.debug("Fetching nodeinfo: domain='%s'", domain)
+ rows = nodeinfo.fetch(domain, update_mode=False)
+
+ if "error_message" in rows:
+ logger.warning("Error message '%s' during fetching nodeinfo for domain='%s'", rows["error_message"], domain)
+ instances.set_last_error(domain, rows)
+ instances.update(domain)
+
+ logger.debug("Returning empty list ... - EXIT!")
+ return list()
+ elif "exception" in rows:
+ logger.warning("Exception '%s' during fetching nodeinfo for domain='%s' - EXIT!", type(rows["exception"]), domain)
+ return list()
+ elif "json" in rows:
+ logger.debug("rows[json] found for domain='%s'", domain)
+ rows = rows["json"]
+
except network.exceptions as exception:
- print(f"WARNING: Exception '{type(exception)}' during fetching nodeinfo")
+ logger.warning("Exception '%s' during fetching nodeinfo from domain='%s'", type(exception), domain)
+ instances.set_last_error(domain, exception)
+ logger.debug("rows[]='%s'", type(rows))
if rows is None:
- print("WARNING: Could not fetch nodeinfo from domain:", domain)
- return
+ logger.warning("Could not fetch nodeinfo from domain='%s' - EXIT!", domain)
+ return list()
elif "metadata" not in rows:
- print(f"WARNING: rows()={len(rows)} does not have key 'metadata', domain='{domain}'")
- return
+ logger.warning("rows()=%d does not have key 'metadata', domain='%s' - EXIT!", len(rows), domain)
+ return list()
elif "federation" not in rows["metadata"]:
- print(f"WARNING: rows()={len(rows['metadata'])} does not have key 'federation', domain='{domain}'")
- return
-
- # DEBUG: print("DEBUG: Updating nodeinfo:", domain)
- instances.update_last_nodeinfo(domain)
+ logger.warning("rows()=%d does not have key 'federation', domain='%s' - EXIT!", len(rows["metadata"]), domain)
+ return list()
+ found = False
data = rows["metadata"]["federation"]
+ logger.debug("data[]='%s'", type(data))
if "mrf_simple" in data:
- # DEBUG: print("DEBUG: Found mrf_simple:", domain)
+ logger.debug("Found mrf_simple in API response from domain='%s'", domain)
+ found = True
for block_level, blocklist in (
{
**data["mrf_simple"],
}
}
).items():
- # DEBUG: print("DEBUG: block_level, blocklist():", block_level, len(blocklist))
- block_level = tidyup.domain(block_level)
- # DEBUG: print("DEBUG: BEFORE block_level:", block_level)
+ logger.debug("block_level='%s', blocklist()=%d", block_level, len(blocklist))
+ block_level = tidyup.domain(block_level) if block_level != "" else None
+ logger.debug("block_level='%s' - AFTER!", block_level)
if block_level == "":
- print("WARNING: block_level is now empty!")
+ logger.warning("block_level is now empty!")
+ continue
+ elif block_level == "accept":
+ logger.debug("domain='%s' skipping block_level='accept'", domain)
continue
- # DEBUG: print(f"DEBUG: Checking {len(blocklist)} entries from domain='{domain}',block_level='{block_level}' ...")
- if len(blocklist) > 0:
- for blocked in blocklist:
- # DEBUG: print("DEBUG: BEFORE blocked:", blocked)
- blocked = tidyup.domain(blocked)
- # DEBUG: print("DEBUG: AFTER blocked:", blocked)
-
- if blocked == "":
- print("WARNING: blocked is empty after tidyup.domain():", domain, block_level)
- continue
- elif blacklist.is_blacklisted(blocked):
- # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - skipping!")
- continue
- elif blocked.count("*") > 0:
- # Obsured domain name with no hash
- # DEBUG: print(f"DEBUG: Trying to de-obscure blocked='{blocked}' ...")
- fba.cursor.execute(
- "SELECT domain, nodeinfo_url FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", [blocked.replace("*", "_")]
- )
- searchres = fba.cursor.fetchone()
-
- # DEBUG: print(f"DEBUG: searchres[]='{type(searchres)}'")
- if searchres is None:
- print(f"WARNING: Cannot deobsfucate blocked='{blocked}' - SKIPPED!")
- continue
-
- # DEBUG: print(f"DEBUG: blocked='{blocked}' de-obscured to '{searchres[0]}'")
- blocked = searchres[0]
- nodeinfo_url = searchres[1]
-
- # DEBUG: print(f"DEBUG: blocked='{blocked}'")
- if not validators.domain(blocked):
- print(f"WARNING: blocked='{blocked}',software='pleroma' is not a valid domain name - skipped!")
- continue
- elif blocked.split(".")[-1] == "arpa":
- print(f"WARNING: blocked='{blocked}' is a reversed .arpa domain and should not be used generally.")
- continue
- elif not instances.is_registered(blocked):
- # Commit changes
- fba.connection.commit()
-
- # DEBUG: print(f"DEBUG: Domain blocked='{blocked}' wasn't found, adding ..., domain='{domain}',origin='{origin}',nodeinfo_url='{nodeinfo_url}'")
- instances.add(blocked, domain, inspect.currentframe().f_code.co_name, nodeinfo_url)
-
- if not blocks.is_instance_blocked(domain, blocked, block_level):
- # DEBUG: print("DEBUG: Blocking:", domain, blocked, block_level)
- blocks.add_instance(domain, blocked, "unknown", block_level)
-
- if block_level == "reject":
- # DEBUG: print("DEBUG: Adding to blockdict:", blocked)
- blockdict.append({
- "blocked": blocked,
- "reason" : None
- })
- else:
- # DEBUG: print(f"DEBUG: Updating block last seen for domain='{domain}',blocked='{blocked}' ...")
- blocks.update_last_seen(domain, blocked, block_level)
-
- # DEBUG: print("DEBUG: Committing changes ...")
- fba.connection.commit()
+ block_level = blocks.alias_block_level(block_level)
+
+ logger.debug("Checking %d entries from domain='%s',block_level='%s' ...", len(blocklist), domain, block_level)
+ for blocked in blocklist:
+ logger.debug("blocked='%s' - BEFORE!", blocked)
+ blocked = tidyup.domain(blocked) if blocked != "" else None
+ logger.debug("blocked='%s' - AFTER!", blocked)
+
+ if blocked in [None, ""]:
+ logger.warning("blocked='%s' is empty after tidyup.domain(): domain='%s',block_level='%s' - SKIPPED!", blocked, domain, block_level)
+ continue
+ elif validators.domain(blocked) and blacklist.is_blacklisted(blocked):
+ logger.debug("blocked='%s' is blacklisted - SKIPPED!")
+ continue
+
+ logger.debug("Invoking utils.deobfuscate(%s, %s) ...", blocked, domain)
+ blocked = utils.deobfuscate(blocked, domain)
+ logger.debug("blocked[%s]='%s' - DEOBFUSCATED!", type(blocked), blocked)
+
+ if blocked in [None, ""]:
+ logger.warning("instance[host]='%s' is None or empty after tidyup.domain() - SKIPPED!", instance["host"])
+ continue
+ elif not domain_helper.is_wanted(blocked):
+ logger.debug("blocked='%s' is not wanted - SKIPPED!", blocked)
+ continue
+
+ logger.debug("Appending blocker='%s',blocked='%s',block_level='%s' ...", domain, blocked, block_level)
+ blockdict.append({
+ "blocker" : domain,
+ "blocked" : blocked,
+ "reason" : None,
+ "block_level": block_level,
+ })
+
+ elif "quarantined_instances" in data:
+ logger.debug("Found 'quarantined_instances' in JSON response: domain='%s'", domain)
+ found = True
+ block_level = "quarantined"
+
+ logger.debug("Checking %d quarantined instance(s) ...", len(data["quarantined_instances"]))
+ for blocked in data["quarantined_instances"]:
+ logger.debug("blocked='%s' - BEFORE!", blocked)
+ blocked = tidyup.domain(blocked) if blocked != "" else None
+ logger.debug("blocked='%s' - AFTER!", blocked)
+
+ if blocked in [None, ""]:
+ logger.warning("blocked is empty after tidyup.domain(): domain='%s',block_level='%s'", domain, block_level)
+ continue
+ elif not domain_helper.is_wanted(blocked):
+ logger.debug("blocked='%s' is not wanted - SKIPPED!", blocked)
+ continue
+
+ logger.debug("Appending blocker='%s',blocked='%s',block_level='%s' ...", domain, blocked, block_level)
+ blockdict.append({
+ "blocker" : domain,
+ "blocked" : blocked,
+ "reason" : None,
+ "block_level": block_level,
+ })
+
+ else:
+ logger.warning("Cannot find 'mrf_simple' or 'quarantined_instances' in JSON reply: domain='%s'", domain)
+
+ logger.debug("Invoking commit() ...")
+ database.connection.commit()
# Reasons
if "mrf_simple_info" in data:
- # DEBUG: print("DEBUG: Found mrf_simple_info:", domain)
+ logger.debug("Found mrf_simple_info in API response: domain='%s'", domain)
+ found = True
for block_level, info in (
{
**data["mrf_simple_info"],
**(data["quarantined_instances_info"] if "quarantined_instances_info" in data else {})
}
).items():
- # DEBUG: print("DEBUG: block_level, info.items():", block_level, len(info.items()))
- block_level = tidyup.domain(block_level)
- # DEBUG: print("DEBUG: BEFORE block_level:", block_level)
+ logger.debug("block_level='%s', info.items()=%d", block_level, len(info.items()))
+ block_level = tidyup.domain(block_level) if block_level != "" else None
+ logger.debug("block_level='%s' - AFTER!", block_level)
- if block_level == "":
- print("WARNING: block_level is now empty!")
+ if block_level in [None, ""]:
+ logger.warning("block_level='%s' is now empty!", block_level)
+ continue
+ elif block_level == "accept":
+ logger.debug("domain='%s': Skipping block_level='%s' ...", domain, block_level)
continue
- # DEBUG: print(f"DEBUG: Checking {len(info.items())} entries from domain='{domain}',software='pleroma',block_level='{block_level}' ...")
+ block_level = blocks.alias_block_level(block_level)
+
+ logger.debug("Checking %d entries from domain='%s',block_level='%s' ...", len(info.items()), domain, block_level)
for blocked, reason in info.items():
- # DEBUG: print(f"DEBUG: blocked='{blocked}',reason[{type(reason)}]='{reason}' - BEFORE!")
- blocked = tidyup.domain(blocked)
+ logger.debug("blocked='%s',reason[%s]='%s' - BEFORE!", blocked, type(reason), reason)
+ blocked = tidyup.domain(blocked) if blocked != "" else None
+ logger.debug("blocked='%s' - AFTER!", blocked)
if isinstance(reason, str):
- # DEBUG: print("DEBUG: reason[] is a string")
+ logger.debug("reason[] is a string")
reason = tidyup.reason(reason)
elif isinstance(reason, dict) and "reason" in reason:
- # DEBUG: print("DEBUG: reason[] is a dict")
- reason = tidyup.reason(reason["reason"])
+ logger.debug("reason[] is a dict")
+ reason = tidyup.reason(reason["reason"]) if isinstance(reason["reason"], str) else None
elif reason is not None:
raise ValueError(f"Cannot handle reason[]='{type(reason)}'")
- # DEBUG: print(f"DEBUG: blocked='{blocked}',reason='{reason}' - AFTER!")
+ logger.debug("blocked='%s',reason='%s' - AFTER!", blocked, reason)
if blocked == "":
- print("WARNING: blocked is empty after tidyup.domain():", domain, block_level)
- continue
- elif blacklist.is_blacklisted(blocked):
- # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - skipping!")
- continue
- elif blocked.count("*") > 0:
- # Obsured domain with no hash
- # DEBUG: print(f"DEBUG: Trying to de-obscure blocked='{blocked}' ...")
- fba.cursor.execute(
- "SELECT domain, origin, nodeinfo_url FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", [blocked.replace("*", "_")]
- )
- searchres = fba.cursor.fetchone()
-
- # DEBUG: print(f"DEBUG: searchres[]='{type(searchres)}'")
- if searchres is None:
- print(f"WARNING: Cannot deobsfucate blocked='{blocked}' - SKIPPED!")
- continue
-
- # DEBUG: print(f"DEBUG: blocked='{blocked}' de-obscured to '{searchres[0]}'")
- blocked = searchres[0]
- origin = searchres[1]
- nodeinfo_url = searchres[2]
- elif not validators.domain(blocked):
- print(f"WARNING: blocked='{blocked}',software='pleroma' is not a valid domain name - skipped!")
+ logger.warning("blocked is empty after tidyup.domain(): domain='%s',block_level='%s'", domain, block_level)
continue
- # DEBUG: print(f"DEBUG: blocked='{blocked}'")
- if blocked.split(".")[-1] == "arpa":
- print(f"WARNING: blocked='{blocked}' is a reversed .arpa domain and should not be used generally.")
- continue
- elif not instances.is_registered(blocked):
- # DEBUG: print(f"DEBUG: Domain blocked='{blocked}' wasn't found, adding ..., domain='{domain}',origin='{origin}',nodein
- instances.add(blocked, domain, inspect.currentframe().f_code.co_name, nodeinfo_url)
+ logger.debug("Checking %d blockdict records ...", len(blockdict))
+ for block in blockdict:
+ logger.debug("block[blocked]='%s',blocked='%s'", block["blocked"], blocked)
+ if block["blocked"] == blocked:
+ logger.debug("Updating reason='%s' for blocker='%s'", reason, block["blocked"])
+ block["reason"] = reason
+
+ elif "quarantined_instances_info" in data and "quarantined_instances" in data["quarantined_instances_info"]:
+ logger.debug("Found 'quarantined_instances_info' in JSON response: domain='%s'", domain)
+ found = True
+ block_level = "quarantined"
+
+ #print(data["quarantined_instances_info"])
+ rows = data["quarantined_instances_info"]["quarantined_instances"]
+ for blocked in rows:
+ logger.debug("blocked='%s' - BEFORE!", blocked)
+ reason = tidyup.reason(rows[blocked]["reason"]) if rows[blocked]["reason"] != "" else None
+ blocked = tidyup.domain(blocked) if blocked != "" else None
+ logger.debug("blocked='%s',reason='%s' - AFTER!", blocked, reason)
+
+ if blocked not in rows or "reason" not in rows[blocked]:
+ logger.warning("Cannot find blocked='%s' in rows()=%d,domain='%s' - BREAK!", blocked, len(rows), domain)
+ break
+ elif blocked == "":
+ logger.warning("blocked is empty after tidyup.domain(): domain='%s',block_level='%s'", domain, block_level)
+ continue
- # DEBUG: print(f"DEBUG: Updating block reason: reason='{reason}',domain='{domain}',blocked='{blocked}',block_level='{block_level}'")
- blocks.update_reason(reason, domain, blocked, block_level)
+ logger.debug("Checking %d blockdict record(s) ...", len(blockdict))
+ for block in blockdict:
+ logger.debug("block[blocked]='%s',blocked='%s'", block["blocked"], blocked)
+ if block["blocked"] == blocked:
+ logger.debug("Updating reason='%s' for blocker='%s'", reason, block["blocked"])
+ block["reason"] = reason
+ else:
+ logger.warning("Cannot find 'mrf_simple_info' or 'quarantined_instances_info' in JSON reply: domain='%s'", domain)
+
+ logger.debug("found='%s'", found)
+ if not found:
+ logger.debug("Did not find any useable JSON elements, domain='%s', continuing with /about page ...", domain)
+ blocklist = fetch_blocks_from_about(domain)
+
+ logger.debug("blocklist()=%d", len(blocklist))
+ if len(blocklist) > 0:
+ logger.info("Checking %d different blocklist(s) ...", len(blocklist))
+ for block_level in blocklist:
+ logger.debug("Checking blocklist[%s]()=%d entries ...", block_level, blocklist[block_level])
+ for block in blocklist[block_level]:
+ logger.debug("Appending blocker='%s',block[blocked]='%s',block[reason]='%s',block_level='%s' ...",domain, block["blocked"], block["reason"], block_level)
+ blockdict.append({
+ "blocker" : domain,
+ "blocked" : block["blocked"],
+ "reason" : block["reason"],
+ "block_level": block_level,
+ })
+
+ logger.debug("blockdict()=%d - EXIT!", len(blockdict))
+ return blockdict
+
+def fetch_blocks_from_about(domain: str) -> dict:
+ logger.debug("domain='%s' - CALLED!", domain)
+ domain_helper.raise_on(domain)
+
+ if blacklist.is_blacklisted(domain):
+ raise Exception(f"domain='{domain}' is blacklisted but function is invoked.")
+ elif not instances.is_registered(domain):
+ raise Exception(f"domain='{domain}' is not registered but function is invoked.")
+
+ # Init variables
+ doc = None
+
+ logger.debug("Fetching mastodon blocks from domain='%s'", domain)
+ for path in ["/instance/about/index.html"]:
+ try:
+ # Resetting doc type
+ doc = None
+
+ logger.debug("Fetching path='%s' from domain='%s' ...", path, domain)
+ response = network.fetch_response(
+ domain,
+ path,
+ network.web_headers,
+ (config.get("connection_timeout"), config.get("read_timeout"))
+ )
+
+ logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
+ if not response.ok or response.text.strip() == "":
+ logger.warning("path='%s' does not exist on domain='%s' - SKIPPED!", path, domain)
+ continue
+
+ logger.debug("Parsing response.text()=%d Bytes ...", len(response.text))
+ doc = bs4.BeautifulSoup(
+ response.text,
+ "html.parser",
+ )
+
+ logger.debug("doc[]='%s'", type(doc))
+ if doc.find("h2") is not None:
+ logger.debug("Found 'h2' header in path='%s' - BREAK!", path)
+ break
+
+ except network.exceptions as exception:
+ logger.warning("Cannot fetch from domain='%s',exception[%s]='%s'", domain, type(exception), str(exception))
+ instances.set_last_error(domain, exception)
+ break
+
+ blocklist = {
+ "reject" : [],
+ "filtered_media": [],
+ "followers_only": [],
+ "silenced" : [],
+ "media_nsfw" : [],
+ "media_removal" : [],
+ "federated_timeline_removal": [],
+ }
+
+ logger.debug("doc[]='%s'", type(doc))
+ if doc is None:
+ logger.warning("Cannot fetch any /about pages for domain='%s' - EXIT!", domain)
+ return list()
+
+ headers = doc.find_all("h2")
+
+ logger.debug("headers[]='%s'", type(headers))
+ if headers is None:
+ logger.warning("Cannot fetch any /about pages for domain='%s' - EXIT!", domain)
+ return list()
+
+ logger.info("Checking %d headers ...", len(headers))
+ for header in headers:
+ logger.debug("header[%s]='%s'", type(header), header)
+ block_level = tidyup.reason(header.text).lower()
+
+ logger.debug("block_level='%s' - BEFORE!", block_level)
+ if block_level in language_mapping:
+ logger.debug("block_level='%s' - FOUND!", block_level)
+ block_level = language_mapping[block_level].lower()
+ else:
+ logger.warning("block_level='%s' not found in language mapping table", block_level)
+
+ logger.debug("block_level='%s - AFTER!'", block_level)
+ if block_level in blocklist:
+ # replaced find_next_siblings with find_all_next to account for instances that e.g. hide lists in dropdown menu
+ logger.debug("Found block_level='%s', importing domain blocks ...", block_level)
+ for line in header.find_next("table").find_all("tr")[1:]:
+ logger.debug("line[]='%s'", type(line))
+ blocked = line.find_all("td")[0].text
+ reason = line.find_all("td")[1].text
+
+ logger.debug("blocked='%s',reason='%s' - BEFORE!", blocked, reason)
+ blocked = tidyup.domain(blocked) if blocked != "" else None
+ reason = tidyup.reason(reason) if reason != "" else None
+ logger.debug("blocked='%s',reason='%s' - AFTER!", blocked, reason)
+
+ if blocked in [None, ""]:
+ logger.debug("domain='%s',block_level='%s': blocked='%s' is empty - SKIPPED!", domain, block_level, blocked)
+ continue
+ elif not domain_helper.is_wanted(blocked):
+ logger.debug("blocked='%s' is not wanted - SKIPPED!", blocked)
+ continue
- # DEBUG: print(f"DEBUG: blockdict()={len(blockdict)}")
- for entry in blockdict:
- if entry["blocked"] == blocked:
- # DEBUG: print(f"DEBUG: Updating entry reason: blocked='{blocked}',reason='{reason}'")
- entry["reason"] = reason
+ logger.debug("Appending block_level='%s',blocked='%s',reason='%s' ...", block_level, blocked, reason)
+ blocklist[block_level].append({
+ "blocked": blocked,
+ "reason" : reason,
+ })
+ else:
+ logger.warning("block_level='%s' not found in blocklist()=%d", block_level, len(blocklist))
- fba.connection.commit()
- # DEBUG: print("DEBUG: EXIT!")
+ logger.debug("Returning blocklist for domain='%s' - EXIT!", domain)
+ return blocklist