From: Roland Häder Date: Mon, 12 Jun 2023 22:01:25 +0000 (+0200) Subject: Continued: X-Git-Url: https://git.mxchange.org/?a=commitdiff_plain;h=d30a841e8f6a7217da892b0771add5d7b4ffafb2;p=fba.git Continued: - moved blocks, instances to new package fba.models as these modules are only around a database table with the same name - movved+renamed fba.log_error() to fba.models.error_log.add() - pleroma has a /instance/about/index.html "API" which can be parsed for block lists if the JSON has returned no 'mrf_simple' and 'quarantined_instances' --- diff --git a/fba/__init__.py b/fba/__init__.py index 0230fa3..ceb63ed 100644 --- a/fba/__init__.py +++ b/fba/__init__.py @@ -15,7 +15,6 @@ __all__ = [ 'blacklist', - 'blocks', 'boot', 'commands', 'config', @@ -23,8 +22,8 @@ __all__ = [ 'federation', 'fba', 'helpers', - 'instances', 'locking', + 'model', 'network', 'networks', ] diff --git a/fba/blocks.py b/fba/blocks.py deleted file mode 100644 index d9ab70d..0000000 --- a/fba/blocks.py +++ /dev/null @@ -1,176 +0,0 @@ -# Fedi API Block - An aggregator for fetching blocking data from fediverse nodes -# Copyright (C) 2023 Free Software Foundation -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as published -# by the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. -# -# You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . - -import sys -import time -import validators - -from fba import blacklist -from fba import fba -from fba.helpers import tidyup - -def update_reason(reason: str, blocker: str, blocked: str, block_level: str): - # DEBUG: print(f"DEBUG: reason='{reason}',blocker={blocker},blocked={blocked},block_level={block_level} - CALLED!") - if not isinstance(reason, str) and reason is not None: - raise ValueError(f"Parameter reason[]='{type(reason)}' is not 'str'") - elif not isinstance(blocker, str): - raise ValueError(f"Parameter blocker[]='{type(blocker)}' is not 'str'") - elif blocker == "": - raise ValueError("Parameter 'blocker' is empty") - elif not isinstance(blocked, str): - raise ValueError(f"Parameter blocked[]='{type(blocked)}' is not 'str'") - elif blocked == "": - raise ValueError("Parameter 'blocked' is empty") - elif not isinstance(block_level, str): - raise ValueError(f"Parameter block_level[]='{type(block_level)}' is not 'str'") - elif block_level == "": - raise ValueError("Parameter 'block_level' is empty") - - # DEBUG: print("DEBUG: Updating block reason:", reason, blocker, blocked, block_level) - try: - fba.cursor.execute( - "UPDATE blocks SET reason = ?, last_seen = ? WHERE blocker = ? AND blocked = ? AND block_level = ? AND (reason IS NULL OR reason = '') LIMIT 1", - ( - reason, - time.time(), - blocker, - blocked, - block_level - ), - ) - - # DEBUG: print(f"DEBUG: fba.cursor.rowcount={fba.cursor.rowcount}") - if fba.cursor.rowcount == 0: - # DEBUG: print(f"DEBUG: Did not update any rows: blocker='{blocker}',blocked='{blocked}',block_level='{block_level}',reason='{reason}' - EXIT!") - return - - except BaseException as exception: - print(f"ERROR: failed SQL query: reason='{reason}',blocker='{blocker}',blocked='{blocked}',block_level='{block_level}',exception[{type(exception)}]:'{str(exception)}'") - sys.exit(255) - - # DEBUG: print("DEBUG: EXIT!") - -def update_last_seen(blocker: str, blocked: str, block_level: str): - # DEBUG: print("DEBUG: Updating last_seen for:", blocker, blocked, block_level) - if not isinstance(blocker, str): - raise ValueError(f"Parameter blocker[]='{type(blocker)}' is not 'str'") - elif blocker == "": - raise ValueError("Parameter 'blocker' is empty") - elif not isinstance(blocked, str): - raise ValueError(f"Parameter blocked[]='{type(blocked)}' is not 'str'") - elif blocked == "": - raise ValueError("Parameter 'blocked' is empty") - elif not isinstance(block_level, str): - raise ValueError(f"Parameter block_level[]='{type(block_level)}' is not 'str'") - elif block_level == "": - raise ValueError("Parameter 'block_level' is empty") - - try: - fba.cursor.execute( - "UPDATE blocks SET last_seen = ? WHERE blocker = ? AND blocked = ? AND block_level = ? LIMIT 1", - ( - time.time(), - blocker, - blocked, - block_level - ) - ) - - # DEBUG: print(f"DEBUG: fba.cursor.rowcount={fba.cursor.rowcount}") - if fba.cursor.rowcount == 0: - # DEBUG: print(f"DEBUG: Did not update any rows: blocker='{blocker}',blocked='{blocked}',block_level='{block_level}' - EXIT!") - return - - except BaseException as exception: - print(f"ERROR: failed SQL query: blocker='{blocker}',blocked='{blocked}',block_level='{block_level}',exception[{type(exception)}]:'{str(exception)}'") - sys.exit(255) - - # DEBUG: print("DEBUG: EXIT!") - -def is_instance_blocked(blocker: str, blocked: str, block_level: str) -> bool: - # DEBUG: print(f"DEBUG: blocker={blocker},blocked={blocked},block_level={block_level} - CALLED!") - if not isinstance(blocker, str): - raise ValueError(f"Parameter blocker[]='{type(blocker)}' is not of type 'str'") - elif blocker == "": - raise ValueError("Parameter 'blocker' is empty") - elif not isinstance(blocked, str): - raise ValueError(f"Parameter blocked[]='{type(blocked)}' is not of type 'str'") - elif blocked == "": - raise ValueError("Parameter 'blocked' is empty") - elif not isinstance(block_level, str): - raise ValueError(f"Parameter block_level[]='{type(block_level)}' is not of type 'str'") - elif block_level == "": - raise ValueError("Parameter 'block_level' is empty") - - fba.cursor.execute( - "SELECT * FROM blocks WHERE blocker = ? AND blocked = ? AND block_level = ? LIMIT 1", - ( - blocker, - blocked, - block_level - ), - ) - - is_blocked = fba.cursor.fetchone() is not None - - # DEBUG: print(f"DEBUG: is_blocked='{is_blocked}' - EXIT!") - return is_blocked - -def add_instance(blocker: str, blocked: str, reason: str, block_level: str): - # DEBUG: print("DEBUG: blocker,blocked,reason,block_level:", blocker, blocked, reason, block_level) - if not isinstance(blocker, str): - raise ValueError(f"Parameter blocker[]='{type(blocker)}' is not 'str'") - elif blocker == "": - raise ValueError("Parameter 'blocker' is empty") - elif not validators.domain(blocker.split("/")[0]): - raise ValueError(f"Bad blocker='{blocker}'") - elif not isinstance(blocked, str): - raise ValueError(f"Parameter blocked[]='{type(blocked)}' is not 'str'") - elif blocked == "": - raise ValueError("Parameter 'blocked' is empty") - elif not isinstance(block_level, str): - raise ValueError(f"Parameter block_level[]='{type(block_level)}' is not of type 'str'") - elif block_level == "": - raise ValueError("Parameter 'block_level' is empty") - elif not validators.domain(blocked.split("/")[0]): - raise ValueError(f"Bad blocked='{blocked}'") - elif blacklist.is_blacklisted(blocker): - raise Exception(f"blocker='{blocker}' is blacklisted but function invoked") - elif blacklist.is_blacklisted(blocked): - raise Exception(f"blocked='{blocked}' is blacklisted but function invoked") - - if reason is not None: - # Maybe needs cleaning - reason = tidyup.reason(reason) - - print(f"INFO: New block: blocker='{blocker}',blocked='{blocked}', reason='{reason}', block_level='{block_level}'") - try: - fba.cursor.execute( - "INSERT INTO blocks (blocker, blocked, reason, block_level, first_seen, last_seen) VALUES(?, ?, ?, ?, ?, ?)", - ( - blocker, - blocked, - reason, - block_level, - time.time(), - time.time() - ), - ) - except BaseException as exception: - print(f"ERROR: failed SQL query: blocker='{blocker}',blocked='{blocked}',reason='{reason}',block_level='{block_level}',exception[{type(exception)}]:'{str(exception)}'") - sys.exit(255) - - # DEBUG: print("DEBUG: EXIT!") diff --git a/fba/commands.py b/fba/commands.py index 2c50c68..ad5b7ca 100644 --- a/fba/commands.py +++ b/fba/commands.py @@ -27,16 +27,17 @@ import reqto import validators from fba import blacklist -from fba import blocks from fba import config from fba import federation from fba import fba -from fba import instances from fba import locking from fba import network from fba.helpers import tidyup +from fba.models import blocks +from fba.models import instances + from fba.networks import friendica from fba.networks import mastodon from fba.networks import misskey diff --git a/fba/fba.py b/fba/fba.py index 6273e45..3cfc925 100644 --- a/fba/fba.py +++ b/fba/fba.py @@ -14,9 +14,7 @@ # along with this program. If not, see . import hashlib -import json import sqlite3 -import time from urllib.parse import urlparse @@ -24,11 +22,11 @@ import requests import validators from fba import blacklist -from fba import config from fba import federation -from fba import instances from fba import network +from fba.models import instances + # Connect to database connection = sqlite3.connect("blocks.db") cursor = connection.cursor() @@ -47,41 +45,6 @@ def get_hash(domain: str) -> str: return hashlib.sha256(domain.encode("utf-8")).hexdigest() -def log_error(domain: str, error: dict): - # DEBUG: print("DEBUG: domain,error[]:", domain, type(error)) - if not isinstance(domain, str): - raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'") - elif domain == "": - raise ValueError("Parameter 'domain' is empty") - elif config.get("write_error_log").lower() != "true": - # DEBUG: print(f"DEBUG: Writing to error_log is disabled in configuruation file - EXIT!") - return - - # DEBUG: print("DEBUG: BEFORE error[]:", type(error)) - if isinstance(error, BaseException, error, json.decoder.JSONDecodeError): - error = f"error[{type(error)}]='{str(error)}'" - - # DEBUG: print("DEBUG: AFTER error[]:", type(error)) - if isinstance(error, str): - cursor.execute("INSERT INTO error_log (domain, error_code, error_message, created) VALUES (?, 999, ?, ?)",[ - domain, - error, - time.time() - ]) - else: - cursor.execute("INSERT INTO error_log (domain, error_code, error_message, created) VALUES (?, ?, ?, ?)",[ - domain, - error["status_code"], - error["error_message"], - time.time() - ]) - - # Cleanup old entries - # DEBUG: print(f"DEBUG: Purging old records (distance: {config.get('error_log_cleanup')})") - cursor.execute("DELETE FROM error_log WHERE created < ?", [time.time() - config.get("error_log_cleanup")]) - - # DEBUG: print("DEBUG: EXIT!") - def fetch_url(url: str, headers: dict, timeout: tuple) -> requests.models.Response: # DEBUG: print(f"DEBUG: url='{url}',headers()={len(headers)},timeout={timeout} - CALLED!") if not isinstance(url, str): diff --git a/fba/federation.py b/fba/federation.py index 54157b6..9e0922d 100644 --- a/fba/federation.py +++ b/fba/federation.py @@ -19,12 +19,13 @@ import validators from fba import blacklist from fba import config from fba import csrf -from fba import instances from fba import network from fba.helpers import tidyup from fba.helpers import version +from fba.models import instances + from fba.networks import lemmy from fba.networks import misskey from fba.networks import peertube diff --git a/fba/helpers/tidyup.py b/fba/helpers/tidyup.py index b784eeb..32f3e9e 100644 --- a/fba/helpers/tidyup.py +++ b/fba/helpers/tidyup.py @@ -23,9 +23,6 @@ def reason(string: str) -> str: # Strip string string = string.strip() - # Replace â with " - string = re.sub("â", "\"", string) - # DEBUG: print(f"DEBUG: string='{string}' - EXIT!") return string diff --git a/fba/instances.py b/fba/instances.py deleted file mode 100644 index a6109df..0000000 --- a/fba/instances.py +++ /dev/null @@ -1,377 +0,0 @@ -# Fedi API Block - An aggregator for fetching blocking data from fediverse nodes -# Copyright (C) 2023 Free Software Foundation -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as published -# by the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. -# -# You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . - -import json -import sys -import time - -import requests -import validators - -from fba import blacklist -from fba import config -from fba import fba -from fba import federation -from fba import network - -from fba.helpers import cache - -# Found info from node, such as nodeinfo URL, detection mode that needs to be -# written to database. Both arrays must be filled at the same time or else -# update_data() will fail -_pending = { - # Detection mode: 'AUTO_DISCOVERY', 'STATIC_CHECKS' or 'GENERATOR' - # NULL means all detection methods have failed (maybe still reachable instance) - "detection_mode" : {}, - # Found nodeinfo URL - "nodeinfo_url" : {}, - # Found total peers - "total_peers" : {}, - # Last fetched instances - "last_instance_fetch": {}, - # Last updated - "last_updated" : {}, - # Last blocked - "last_blocked" : {}, - # Last nodeinfo (fetched) - "last_nodeinfo" : {}, - # Last status code - "last_status_code" : {}, - # Last error details - "last_error_details" : {}, -} - -def set_data(key: str, domain: str, value: any): - # DEBUG: print(f"DEBUG: key='{key}',domain='{domain}',value[]='{type(value)}' - CALLED!") - if not isinstance(key, str): - raise ValueError("Parameter key[]='{type(key)}' is not 'str'") - elif key == "": - raise ValueError("Parameter 'key' is empty") - elif not isinstance(domain, str): - raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'") - elif domain == "": - raise ValueError("Parameter 'domain' is empty") - elif not key in _pending: - raise ValueError(f"key='{key}' not found in _pending") - elif not fba.is_primitive(value): - raise ValueError(f"value[]='{type(value)}' is not a primitive type") - - # Set it - _pending[key][domain] = value - - # DEBUG: print("DEBUG: EXIT!") - -def has_pending(domain: str) -> bool: - # DEBUG: print(f"DEBUG: domain='{domain}' - CALLED!") - if not isinstance(domain, str): - raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'") - elif domain == "": - raise ValueError("Parameter 'domain' is empty") - - has = False - for key in _pending: - # DEBUG: print(f"DEBUG: key='{key}',domain='{domain}',_pending[key]()='{len(_pending[key])}'") - if domain in _pending[key]: - has = True - break - - # DEBUG: print(f"DEBUG: has='{has}' - EXIT!") - return has - -def update_data(domain: str): - # DEBUG: print(f"DEBUG: domain='{domain}' - CALLED!") - if not isinstance(domain, str): - raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'") - elif domain == "": - raise ValueError("Parameter 'domain' is empty") - elif not has_pending(domain): - raise Exception(f"Domain '{domain}' has no pending instance data, but function invoked") - - # DEBUG: print(f"DEBUG: Updating instance data for domain='{domain}' ...") - sql_string = "" - fields = list() - for key in _pending: - # DEBUG: print("DEBUG: key:", key) - if domain in _pending[key]: - # DEBUG: print(f"DEBUG: Adding '{_pending[key][domain]}' for key='{key}' ...") - fields.append(_pending[key][domain]) - sql_string += f" {key} = ?," - - fields.append(time.time()) - fields.append(domain) - - if sql_string == "": - raise ValueError(f"No fields have been set, but method invoked, domain='{domain}'") - - # DEBUG: print(f"DEBUG: sql_string='{sql_string}',fields()={len(fields)}") - sql_string = "UPDATE instances SET" + sql_string + " last_updated = ? WHERE domain = ? LIMIT 1" - # DEBUG: print("DEBUG: sql_string:", sql_string) - - try: - # DEBUG: print("DEBUG: Executing SQL:", sql_string) - fba.cursor.execute(sql_string, fields) - - # DEBUG: print(f"DEBUG: Success! (rowcount={fba.cursor.rowcount })") - if fba.cursor.rowcount == 0: - # DEBUG: print(f"DEBUG: Did not update any rows: domain='{domain}',fields()={len(fields)} - EXIT!") - return - - # DEBUG: print("DEBUG: Committing changes ...") - fba.connection.commit() - - # DEBUG: print(f"DEBUG: Deleting _pending for domain='{domain}'") - for key in _pending: - # DEBUG: print(f"DEBUG: domain='{domain}',key='{key}'") - if domain in _pending[key]: - del _pending[key][domain] - - except BaseException as exception: - print(f"ERROR: failed SQL query: domain='{domain}',sql_string='{sql_string}',exception[{type(exception)}]:'{str(exception)}'") - sys.exit(255) - - # DEBUG: print("DEBUG: EXIT!") - -def update_last_instance_fetch(domain: str): - # DEBUG: print(f"DEBUG: domain='{domain}' - CALLED!") - if not isinstance(domain, str): - raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'") - elif domain == "": - raise ValueError("Parameter 'domain' is empty") - - # DEBUG: print("DEBUG: Updating last_instance_fetch for domain:", domain) - set_data("last_instance_fetch", domain, time.time()) - - # Running pending updated - # DEBUG: print(f"DEBUG: Invoking update_data({domain}) ...") - update_data(domain) - - # DEBUG: print("DEBUG: EXIT!") - -def update_last_blocked(domain: str): - # DEBUG: print(f"DEBUG: domain='{domain}' - CALLED!") - if not isinstance(domain, str): - raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'") - elif domain == "": - raise ValueError("Parameter 'domain' is empty") - - # DEBUG: print("DEBUG: Updating last_blocked for domain", domain) - set_data("last_blocked", domain, time.time()) - - # Running pending updated - # DEBUG: print(f"DEBUG: Invoking update_data({domain}) ...") - update_data(domain) - - # DEBUG: print("DEBUG: EXIT!") - -def add(domain: str, origin: str, command: str, path: str = None): - # DEBUG: print(f"DEBUG: domain='{domain}',origin='{origin}',command='{command}',path='{path}' - CALLED!") - if not isinstance(domain, str): - raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'") - elif domain == "": - raise ValueError("Parameter 'domain' is empty") - elif not isinstance(origin, str) and origin is not None: - raise ValueError(f"origin[]='{type(origin)}' is not 'str'") - elif origin == "": - raise ValueError("Parameter 'origin' is empty") - elif not isinstance(command, str): - raise ValueError(f"command[]='{type(command)}' is not 'str'") - elif command == "": - raise ValueError("Parameter 'command' is empty") - elif not validators.domain(domain.split("/")[0]): - raise ValueError(f"Bad domain name='{domain}'") - elif domain.endswith(".arpa"): - raise ValueError(f"Please don't crawl .arpa domains: domain='{domain}'") - elif origin is not None and not validators.domain(origin.split("/")[0]): - raise ValueError(f"Bad origin name='{origin}'") - elif blacklist.is_blacklisted(domain): - raise Exception(f"domain='{domain}' is blacklisted, but method invoked") - elif domain.find("/profile/") > 0 or domain.find("/users/") > 0: - raise Exception(f"domain='{domain}' is a single user") - - software = None - try: - # DEBUG: print("DEBUG: domain,origin,command,path:", domain, origin, command, path) - software = federation.determine_software(domain, path) - except network.exceptions as exception: - print(f"WARNING Exception '{type(exception)}' during determining software type") - - # DEBUG: print("DEBUG: Determined software:", software) - if software == "lemmy" and domain.find("/c/") > 0: - domain = domain.split("/c/")[0] - if is_registered(domain): - print(f"WARNING: domain='{domain}' already registered after cutting off user part. - EXIT!") - return - - print(f"INFO: Adding instance domain='{domain}' (origin='{origin}',software='{software}')") - fba.cursor.execute( - "INSERT INTO instances (domain, origin, command, hash, software, first_seen) VALUES (?, ?, ?, ?, ?, ?)", - ( - domain, - origin, - command, - fba.get_hash(domain), - software, - time.time() - ), - ) - - cache.set_sub_key("is_registered", domain, True) - - if has_pending(domain): - # DEBUG: print(f"DEBUG: domain='{domain}' has pending nodeinfo being updated ...") - set_data("last_status_code" , domain, None) - set_data("last_error_details", domain, None) - update_data(domain) - - # DEBUG: print(f"DEBUG: Updating nodeinfo for domain='{domain}'") - update_last_nodeinfo(domain) - - # DEBUG: print("DEBUG: EXIT!") - -def update_last_nodeinfo(domain: str): - # DEBUG: print(f"DEBUG: domain='{domain}' - CALLED!") - if not isinstance(domain, str): - raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'") - elif domain == "": - raise ValueError("Parameter 'domain' is empty") - - # DEBUG: print("DEBUG: Updating last_nodeinfo for domain:", domain) - set_data("last_nodeinfo", domain, time.time()) - set_data("last_updated" , domain, time.time()) - - # Running pending updated - # DEBUG: print(f"DEBUG: Invoking update_data({domain}) ...") - update_data(domain) - - # DEBUG: print("DEBUG: EXIT!") - -def update_last_error(domain: str, error: dict): - # DEBUG: print("DEBUG: domain,error[]:", domain, type(error)) - if not isinstance(domain, str): - raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'") - elif domain == "": - raise ValueError("Parameter 'domain' is empty") - - # DEBUG: print("DEBUG: BEFORE error[]:", type(error)) - if isinstance(error, (BaseException, json.decoder.JSONDecodeError)): - error = f"error[{type(error)}]='{str(error)}'" - # DEBUG: print("DEBUG: AFTER error[]:", type(error)) - - if isinstance(error, str): - # DEBUG: print(f"DEBUG: Setting last_error_details='{error}'") - set_data("last_status_code" , domain, 999) - set_data("last_error_details", domain, error if error != "" else None) - elif isinstance(error, requests.models.Response): - # DEBUG: print(f"DEBUG: Setting last_error_details='{error.reason}'") - set_data("last_status_code" , domain, error.status_code) - set_data("last_error_details", domain, error.reason if error.reason != "" else None) - elif not isinstance(error, dict): - raise KeyError(f"Cannot handle keys in error[{type(error)}]='{error}'") - elif "status_code" in error and "error_message" in error: - # DEBUG: print(f"DEBUG: Setting last_error_details='{error['error_message']}'") - set_data("last_status_code" , domain, error["status_code"]) - set_data("last_error_details", domain, error["error_message"] if error["error_message"] != "" else None) - elif "json" in error and "error" in error["json"]: - set_data("last_status_code" , domain, error["status_code"]) - set_data("last_error_details", domain, error["json"]["error"] if error["json"]["error"] != "" else None) - - # Running pending updated - # DEBUG: print(f"DEBUG: Invoking update_data({domain}) ...") - update_data(domain) - - fba.log_error(domain, error) - - # DEBUG: print("DEBUG: EXIT!") - -def is_registered(domain: str) -> bool: - # DEBUG: print(f"DEBUG: domain='{domain}' - CALLED!") - if not isinstance(domain, str): - raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'") - elif domain == "": - raise ValueError("Parameter 'domain' is empty") - - # DEBUG: print(f"DEBUG: domain='{domain}' - CALLED!") - if not cache.key_exists("is_registered"): - # DEBUG: print("DEBUG: Cache for 'is_registered' not initialized, fetching all rows ...") - fba.cursor.execute("SELECT domain FROM instances") - - # Check Set all - cache.set_all("is_registered", fba.cursor.fetchall(), True) - - # Is cache found? - registered = cache.sub_key_exists("is_registered", domain) - - # DEBUG: print(f"DEBUG: registered='{registered}' - EXIT!") - return registered - -def is_recent(domain: str) -> bool: - # DEBUG: print(f"DEBUG: domain='{domain}' - CALLED!") - if not isinstance(domain, str): - raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'") - elif domain == "": - raise ValueError("Parameter 'domain' is empty") - elif not is_registered(domain): - # DEBUG: print(f"DEBUG: domain='{domain}' is not registered, returning False - EXIT!") - return False - - # Query database - fba.cursor.execute("SELECT last_instance_fetch FROM instances WHERE domain = ? LIMIT 1", [domain]) - - # Fetch row - fetched = fba.cursor.fetchone()[0] - - # DEBUG: print(f"DEBUG: fetched[{type(fetched)}]='{fetched}'") - recently = isinstance(fetched, float) and time.time() - fetched <= config.get("recheck_instance") - - # DEBUG: print(f"DEBUG: recently='{recently}' - EXIT!") - return recently - -def deobscure(char: str, domain: str, blocked_hash: str = None) -> tuple: - # DEBUG: print(f"DEBUG: char='{char}',domain='{domain}',blocked_hash='{blocked_hash}' - CALLED!") - if not isinstance(char, str): - raise ValueError(f"Parameter char[]='{type(char)}' is not 'str'") - elif char == "": - raise ValueError("Parameter 'char' is empty") - elif not isinstance(domain, str): - raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'") - elif domain == "": - raise ValueError("Parameter 'domain' is empty") - elif not isinstance(blocked_hash, str) and blocked_hash is not None: - raise ValueError(f"Parameter blocked_hash[]='{type(blocked_hash)}' is not 'str'") - - if isinstance(blocked_hash, str): - # DEBUG: print(f"DEBUG: Looking up blocked_hash='{blocked_hash}' ...") - fba.cursor.execute( - "SELECT domain, origin, nodeinfo_url FROM instances WHERE hash = ? LIMIT 1", [blocked_hash] - ) - - row = fba.cursor.fetchone() - # DEBUG: print(f"DEBUG: row[]='{type(row)}'") - - if row is None: - # DEBUG: print(f"DEBUG: blocked_hash='{blocked_hash}' not found, trying domain='{domain}' ...") - return deobscure(char, domain) - else: - # DEBUG: print(f"DEBUG: Looking up domain='{domain}' ...") - fba.cursor.execute( - "SELECT domain, origin, nodeinfo_url FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", [domain.replace(char, "_")] - ) - - row = fba.cursor.fetchone() - # DEBUG: print(f"DEBUG: row[]='{type(row)}'") - - # DEBUG: print(f"DEBUG: row[]='{type(row)}' - EXIT!") - return row diff --git a/fba/models/__init__.py b/fba/models/__init__.py new file mode 100644 index 0000000..bc2afbe --- /dev/null +++ b/fba/models/__init__.py @@ -0,0 +1,20 @@ +# Copyright (C) 2023 Free Software Foundation +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published +# by the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +__all__ = [ + 'blocks', + 'error_log', + 'instances', +] diff --git a/fba/models/blocks.py b/fba/models/blocks.py new file mode 100644 index 0000000..8b6c745 --- /dev/null +++ b/fba/models/blocks.py @@ -0,0 +1,176 @@ +# Fedi API Block - An aggregator for fetching blocking data from fediverse nodes +# Copyright (C) 2023 Free Software Foundation +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published +# by the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +import sys +import time +import validators + +from fba import blacklist +from fba import fba +from fba.helpers import tidyup + +def update_reason(reason: str, blocker: str, blocked: str, block_level: str): + # DEBUG: print(f"DEBUG: reason='{reason}',blocker={blocker},blocked={blocked},block_level={block_level} - CALLED!") + if not isinstance(reason, str) and reason is not None: + raise ValueError(f"Parameter reason[]='{type(reason)}' is not 'str'") + elif not isinstance(blocker, str): + raise ValueError(f"Parameter blocker[]='{type(blocker)}' is not 'str'") + elif blocker == "": + raise ValueError("Parameter 'blocker' is empty") + elif not isinstance(blocked, str): + raise ValueError(f"Parameter blocked[]='{type(blocked)}' is not 'str'") + elif blocked == "": + raise ValueError("Parameter 'blocked' is empty") + elif not isinstance(block_level, str): + raise ValueError(f"Parameter block_level[]='{type(block_level)}' is not 'str'") + elif block_level == "": + raise ValueError("Parameter 'block_level' is empty") + + # DEBUG: print("DEBUG: Updating block reason:", reason, blocker, blocked, block_level) + try: + fba.cursor.execute( + "UPDATE blocks SET reason = ?, last_seen = ? WHERE blocker = ? AND blocked = ? AND block_level = ? AND (reason IS NULL OR reason = '') LIMIT 1", + ( + reason, + time.time(), + blocker, + blocked, + block_level + ), + ) + + # DEBUG: print(f"DEBUG: fba.cursor.rowcount={fba.cursor.rowcount}") + if fba.cursor.rowcount == 0: + # DEBUG: print(f"DEBUG: Did not update any rows: blocker='{blocker}',blocked='{blocked}',block_level='{block_level}',reason='{reason}' - EXIT!") + return + + except BaseException as exception: + print(f"ERROR: failed SQL query: reason='{reason}',blocker='{blocker}',blocked='{blocked}',block_level='{block_level}',exception[{type(exception)}]:'{str(exception)}'") + sys.exit(255) + + # DEBUG: print("DEBUG: EXIT!") + +def update_last_seen(blocker: str, blocked: str, block_level: str): + # DEBUG: print("DEBUG: Updating last_seen for:", blocker, blocked, block_level) + if not isinstance(blocker, str): + raise ValueError(f"Parameter blocker[]='{type(blocker)}' is not 'str'") + elif blocker == "": + raise ValueError("Parameter 'blocker' is empty") + elif not isinstance(blocked, str): + raise ValueError(f"Parameter blocked[]='{type(blocked)}' is not 'str'") + elif blocked == "": + raise ValueError("Parameter 'blocked' is empty") + elif not isinstance(block_level, str): + raise ValueError(f"Parameter block_level[]='{type(block_level)}' is not 'str'") + elif block_level == "": + raise ValueError("Parameter 'block_level' is empty") + + try: + fba.cursor.execute( + "UPDATE blocks SET last_seen = ? WHERE blocker = ? AND blocked = ? AND block_level = ? LIMIT 1", + ( + time.time(), + blocker, + blocked, + block_level + ) + ) + + # DEBUG: print(f"DEBUG: fba.cursor.rowcount={fba.cursor.rowcount}") + if fba.cursor.rowcount == 0: + # DEBUG: print(f"DEBUG: Did not update any rows: blocker='{blocker}',blocked='{blocked}',block_level='{block_level}' - EXIT!") + return + + except BaseException as exception: + print(f"ERROR: failed SQL query: blocker='{blocker}',blocked='{blocked}',block_level='{block_level}',exception[{type(exception)}]:'{str(exception)}'") + sys.exit(255) + + # DEBUG: print("DEBUG: EXIT!") + +def is_instance_blocked(blocker: str, blocked: str, block_level: str) -> bool: + # DEBUG: print(f"DEBUG: blocker={blocker},blocked={blocked},block_level={block_level} - CALLED!") + if not isinstance(blocker, str): + raise ValueError(f"Parameter blocker[]='{type(blocker)}' is not of type 'str'") + elif blocker == "": + raise ValueError("Parameter 'blocker' is empty") + elif not isinstance(blocked, str): + raise ValueError(f"Parameter blocked[]='{type(blocked)}' is not of type 'str'") + elif blocked == "": + raise ValueError("Parameter 'blocked' is empty") + elif not isinstance(block_level, str): + raise ValueError(f"Parameter block_level[]='{type(block_level)}' is not of type 'str'") + elif block_level == "": + raise ValueError("Parameter 'block_level' is empty") + + fba.cursor.execute( + "SELECT * FROM blocks WHERE blocker = ? AND blocked = ? AND block_level = ? LIMIT 1", + ( + blocker, + blocked, + block_level + ), + ) + + is_blocked = fba.cursor.fetchone() is not None + + # DEBUG: print(f"DEBUG: is_blocked='{is_blocked}' - EXIT!") + return is_blocked + +def add_instance(blocker: str, blocked: str, reason: str, block_level: str): + # DEBUG: print("DEBUG: blocker,blocked,reason,block_level:", blocker, blocked, reason, block_level) + if not isinstance(blocker, str): + raise ValueError(f"Parameter blocker[]='{type(blocker)}' is not 'str'") + elif blocker == "": + raise ValueError("Parameter 'blocker' is empty") + elif not validators.domain(blocker.split("/")[0]): + raise ValueError(f"Bad blocker='{blocker}'") + elif not isinstance(blocked, str): + raise ValueError(f"Parameter blocked[]='{type(blocked)}' is not 'str'") + elif blocked == "": + raise ValueError("Parameter 'blocked' is empty") + elif not isinstance(block_level, str): + raise ValueError(f"Parameter block_level[]='{type(block_level)}' is not of type 'str'") + elif block_level == "": + raise ValueError("Parameter 'block_level' is empty") + elif not validators.domain(blocked.split("/")[0]): + raise ValueError(f"Bad blocked='{blocked}'") + elif blacklist.is_blacklisted(blocker): + raise Exception(f"blocker='{blocker}' is blacklisted but function invoked") + elif blacklist.is_blacklisted(blocked): + raise Exception(f"blocked='{blocked}' is blacklisted but function invoked") + + if reason is not None: + # Maybe needs cleaning + reason = tidyup.reason(reason) + + print(f"INFO: New block: blocker='{blocker}',blocked='{blocked}',reason='{reason}',block_level='{block_level}'") + try: + fba.cursor.execute( + "INSERT INTO blocks (blocker, blocked, reason, block_level, first_seen, last_seen) VALUES (?, ?, ?, ?, ?, ?)", + ( + blocker, + blocked, + reason, + block_level, + time.time(), + time.time() + ), + ) + except BaseException as exception: + print(f"ERROR: failed SQL query: blocker='{blocker}',blocked='{blocked}',reason='{reason}',block_level='{block_level}',exception[{type(exception)}]:'{str(exception)}'") + sys.exit(255) + + # DEBUG: print("DEBUG: EXIT!") diff --git a/fba/models/error_log.py b/fba/models/error_log.py new file mode 100644 index 0000000..6602406 --- /dev/null +++ b/fba/models/error_log.py @@ -0,0 +1,55 @@ +# Copyright (C) 2023 Free Software Foundation +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published +# by the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +import json +import time + +from fba import config +from fba import fba + +def add(domain: str, error: dict): + # DEBUG: print("DEBUG: domain,error[]:", domain, type(error)) + if not isinstance(domain, str): + raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'") + elif domain == "": + raise ValueError("Parameter 'domain' is empty") + elif config.get("write_error_log").lower() != "true": + # DEBUG: print(f"DEBUG: Writing to error_log is disabled in configuruation file - EXIT!") + return + + # DEBUG: print("DEBUG: BEFORE error[]:", type(error)) + if isinstance(error, BaseException, error, json.decoder.JSONDecodeError): + error = f"error[{type(error)}]='{str(error)}'" + + # DEBUG: print("DEBUG: AFTER error[]:", type(error)) + if isinstance(error, str): + fba.cursor.execute("INSERT INTO error_log (domain, error_code, error_message, created) VALUES (?, 999, ?, ?)",[ + domain, + error, + time.time() + ]) + else: + fba.cursor.execute("INSERT INTO error_log (domain, error_code, error_message, created) VALUES (?, ?, ?, ?)",[ + domain, + error["status_code"], + error["error_message"], + time.time() + ]) + + # Cleanup old entries + # DEBUG: print(f"DEBUG: Purging old records (distance: {config.get('error_log_cleanup')})") + fba.cursor.execute("DELETE FROM error_log WHERE created < ?", [time.time() - config.get("error_log_cleanup")]) + + # DEBUG: print("DEBUG: EXIT!") diff --git a/fba/models/instances.py b/fba/models/instances.py new file mode 100644 index 0000000..99e61ec --- /dev/null +++ b/fba/models/instances.py @@ -0,0 +1,379 @@ +# Fedi API Block - An aggregator for fetching blocking data from fediverse nodes +# Copyright (C) 2023 Free Software Foundation +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published +# by the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +import json +import sys +import time + +import requests +import validators + +from fba import blacklist +from fba import config +from fba import fba +from fba import federation +from fba import network + +from fba.helpers import cache + +from fba.models import error_log + +# Found info from node, such as nodeinfo URL, detection mode that needs to be +# written to database. Both arrays must be filled at the same time or else +# update_data() will fail +_pending = { + # Detection mode: 'AUTO_DISCOVERY', 'STATIC_CHECKS' or 'GENERATOR' + # NULL means all detection methods have failed (maybe still reachable instance) + "detection_mode" : {}, + # Found nodeinfo URL + "nodeinfo_url" : {}, + # Found total peers + "total_peers" : {}, + # Last fetched instances + "last_instance_fetch": {}, + # Last updated + "last_updated" : {}, + # Last blocked + "last_blocked" : {}, + # Last nodeinfo (fetched) + "last_nodeinfo" : {}, + # Last status code + "last_status_code" : {}, + # Last error details + "last_error_details" : {}, +} + +def set_data(key: str, domain: str, value: any): + # DEBUG: print(f"DEBUG: key='{key}',domain='{domain}',value[]='{type(value)}' - CALLED!") + if not isinstance(key, str): + raise ValueError("Parameter key[]='{type(key)}' is not 'str'") + elif key == "": + raise ValueError("Parameter 'key' is empty") + elif not isinstance(domain, str): + raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'") + elif domain == "": + raise ValueError("Parameter 'domain' is empty") + elif not key in _pending: + raise ValueError(f"key='{key}' not found in _pending") + elif not fba.is_primitive(value): + raise ValueError(f"value[]='{type(value)}' is not a primitive type") + + # Set it + _pending[key][domain] = value + + # DEBUG: print("DEBUG: EXIT!") + +def has_pending(domain: str) -> bool: + # DEBUG: print(f"DEBUG: domain='{domain}' - CALLED!") + if not isinstance(domain, str): + raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'") + elif domain == "": + raise ValueError("Parameter 'domain' is empty") + + has = False + for key in _pending: + # DEBUG: print(f"DEBUG: key='{key}',domain='{domain}',_pending[key]()='{len(_pending[key])}'") + if domain in _pending[key]: + has = True + break + + # DEBUG: print(f"DEBUG: has='{has}' - EXIT!") + return has + +def update_data(domain: str): + # DEBUG: print(f"DEBUG: domain='{domain}' - CALLED!") + if not isinstance(domain, str): + raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'") + elif domain == "": + raise ValueError("Parameter 'domain' is empty") + elif not has_pending(domain): + raise Exception(f"Domain '{domain}' has no pending instance data, but function invoked") + + # DEBUG: print(f"DEBUG: Updating instance data for domain='{domain}' ...") + sql_string = "" + fields = list() + for key in _pending: + # DEBUG: print("DEBUG: key:", key) + if domain in _pending[key]: + # DEBUG: print(f"DEBUG: Adding '{_pending[key][domain]}' for key='{key}' ...") + fields.append(_pending[key][domain]) + sql_string += f" {key} = ?," + + fields.append(time.time()) + fields.append(domain) + + if sql_string == "": + raise ValueError(f"No fields have been set, but method invoked, domain='{domain}'") + + # DEBUG: print(f"DEBUG: sql_string='{sql_string}',fields()={len(fields)}") + sql_string = "UPDATE instances SET" + sql_string + " last_updated = ? WHERE domain = ? LIMIT 1" + # DEBUG: print("DEBUG: sql_string:", sql_string) + + try: + # DEBUG: print("DEBUG: Executing SQL:", sql_string) + fba.cursor.execute(sql_string, fields) + + # DEBUG: print(f"DEBUG: Success! (rowcount={fba.cursor.rowcount })") + if fba.cursor.rowcount == 0: + # DEBUG: print(f"DEBUG: Did not update any rows: domain='{domain}',fields()={len(fields)} - EXIT!") + return + + # DEBUG: print("DEBUG: Committing changes ...") + fba.connection.commit() + + # DEBUG: print(f"DEBUG: Deleting _pending for domain='{domain}'") + for key in _pending: + # DEBUG: print(f"DEBUG: domain='{domain}',key='{key}'") + if domain in _pending[key]: + del _pending[key][domain] + + except BaseException as exception: + print(f"ERROR: failed SQL query: domain='{domain}',sql_string='{sql_string}',exception[{type(exception)}]:'{str(exception)}'") + sys.exit(255) + + # DEBUG: print("DEBUG: EXIT!") + +def update_last_instance_fetch(domain: str): + # DEBUG: print(f"DEBUG: domain='{domain}' - CALLED!") + if not isinstance(domain, str): + raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'") + elif domain == "": + raise ValueError("Parameter 'domain' is empty") + + # DEBUG: print("DEBUG: Updating last_instance_fetch for domain:", domain) + set_data("last_instance_fetch", domain, time.time()) + + # Running pending updated + # DEBUG: print(f"DEBUG: Invoking update_data({domain}) ...") + update_data(domain) + + # DEBUG: print("DEBUG: EXIT!") + +def update_last_blocked(domain: str): + # DEBUG: print(f"DEBUG: domain='{domain}' - CALLED!") + if not isinstance(domain, str): + raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'") + elif domain == "": + raise ValueError("Parameter 'domain' is empty") + + # DEBUG: print("DEBUG: Updating last_blocked for domain", domain) + set_data("last_blocked", domain, time.time()) + + # Running pending updated + # DEBUG: print(f"DEBUG: Invoking update_data({domain}) ...") + update_data(domain) + + # DEBUG: print("DEBUG: EXIT!") + +def add(domain: str, origin: str, command: str, path: str = None): + # DEBUG: print(f"DEBUG: domain='{domain}',origin='{origin}',command='{command}',path='{path}' - CALLED!") + if not isinstance(domain, str): + raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'") + elif domain == "": + raise ValueError("Parameter 'domain' is empty") + elif not isinstance(origin, str) and origin is not None: + raise ValueError(f"origin[]='{type(origin)}' is not 'str'") + elif origin == "": + raise ValueError("Parameter 'origin' is empty") + elif not isinstance(command, str): + raise ValueError(f"command[]='{type(command)}' is not 'str'") + elif command == "": + raise ValueError("Parameter 'command' is empty") + elif not validators.domain(domain.split("/")[0]): + raise ValueError(f"Bad domain name='{domain}'") + elif domain.endswith(".arpa"): + raise ValueError(f"Please don't crawl .arpa domains: domain='{domain}'") + elif origin is not None and not validators.domain(origin.split("/")[0]): + raise ValueError(f"Bad origin name='{origin}'") + elif blacklist.is_blacklisted(domain): + raise Exception(f"domain='{domain}' is blacklisted, but method invoked") + elif domain.find("/profile/") > 0 or domain.find("/users/") > 0: + raise Exception(f"domain='{domain}' is a single user") + + software = None + try: + # DEBUG: print("DEBUG: domain,origin,command,path:", domain, origin, command, path) + software = federation.determine_software(domain, path) + except network.exceptions as exception: + print(f"WARNING Exception '{type(exception)}' during determining software type") + + # DEBUG: print("DEBUG: Determined software:", software) + if software == "lemmy" and domain.find("/c/") > 0: + domain = domain.split("/c/")[0] + if is_registered(domain): + print(f"WARNING: domain='{domain}' already registered after cutting off user part. - EXIT!") + return + + print(f"INFO: Adding instance domain='{domain}' (origin='{origin}',software='{software}')") + fba.cursor.execute( + "INSERT INTO instances (domain, origin, command, hash, software, first_seen) VALUES (?, ?, ?, ?, ?, ?)", + ( + domain, + origin, + command, + fba.get_hash(domain), + software, + time.time() + ), + ) + + cache.set_sub_key("is_registered", domain, True) + + if has_pending(domain): + # DEBUG: print(f"DEBUG: domain='{domain}' has pending nodeinfo being updated ...") + set_data("last_status_code" , domain, None) + set_data("last_error_details", domain, None) + update_data(domain) + + # DEBUG: print(f"DEBUG: Updating nodeinfo for domain='{domain}'") + update_last_nodeinfo(domain) + + # DEBUG: print("DEBUG: EXIT!") + +def update_last_nodeinfo(domain: str): + # DEBUG: print(f"DEBUG: domain='{domain}' - CALLED!") + if not isinstance(domain, str): + raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'") + elif domain == "": + raise ValueError("Parameter 'domain' is empty") + + # DEBUG: print("DEBUG: Updating last_nodeinfo for domain:", domain) + set_data("last_nodeinfo", domain, time.time()) + set_data("last_updated" , domain, time.time()) + + # Running pending updated + # DEBUG: print(f"DEBUG: Invoking update_data({domain}) ...") + update_data(domain) + + # DEBUG: print("DEBUG: EXIT!") + +def update_last_error(domain: str, error: dict): + # DEBUG: print("DEBUG: domain,error[]:", domain, type(error)) + if not isinstance(domain, str): + raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'") + elif domain == "": + raise ValueError("Parameter 'domain' is empty") + + # DEBUG: print("DEBUG: BEFORE error[]:", type(error)) + if isinstance(error, (BaseException, json.decoder.JSONDecodeError)): + error = f"error[{type(error)}]='{str(error)}'" + # DEBUG: print("DEBUG: AFTER error[]:", type(error)) + + if isinstance(error, str): + # DEBUG: print(f"DEBUG: Setting last_error_details='{error}'") + set_data("last_status_code" , domain, 999) + set_data("last_error_details", domain, error if error != "" else None) + elif isinstance(error, requests.models.Response): + # DEBUG: print(f"DEBUG: Setting last_error_details='{error.reason}'") + set_data("last_status_code" , domain, error.status_code) + set_data("last_error_details", domain, error.reason if error.reason != "" else None) + elif not isinstance(error, dict): + raise KeyError(f"Cannot handle keys in error[{type(error)}]='{error}'") + elif "status_code" in error and "error_message" in error: + # DEBUG: print(f"DEBUG: Setting last_error_details='{error['error_message']}'") + set_data("last_status_code" , domain, error["status_code"]) + set_data("last_error_details", domain, error["error_message"] if error["error_message"] != "" else None) + elif "json" in error and "error" in error["json"]: + set_data("last_status_code" , domain, error["status_code"]) + set_data("last_error_details", domain, error["json"]["error"] if error["json"]["error"] != "" else None) + + # Running pending updated + # DEBUG: print(f"DEBUG: Invoking update_data({domain}) ...") + update_data(domain) + + error_log.add(domain, error) + + # DEBUG: print("DEBUG: EXIT!") + +def is_registered(domain: str) -> bool: + # DEBUG: print(f"DEBUG: domain='{domain}' - CALLED!") + if not isinstance(domain, str): + raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'") + elif domain == "": + raise ValueError("Parameter 'domain' is empty") + + # DEBUG: print(f"DEBUG: domain='{domain}' - CALLED!") + if not cache.key_exists("is_registered"): + # DEBUG: print("DEBUG: Cache for 'is_registered' not initialized, fetching all rows ...") + fba.cursor.execute("SELECT domain FROM instances") + + # Check Set all + cache.set_all("is_registered", fba.cursor.fetchall(), True) + + # Is cache found? + registered = cache.sub_key_exists("is_registered", domain) + + # DEBUG: print(f"DEBUG: registered='{registered}' - EXIT!") + return registered + +def is_recent(domain: str) -> bool: + # DEBUG: print(f"DEBUG: domain='{domain}' - CALLED!") + if not isinstance(domain, str): + raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'") + elif domain == "": + raise ValueError("Parameter 'domain' is empty") + elif not is_registered(domain): + # DEBUG: print(f"DEBUG: domain='{domain}' is not registered, returning False - EXIT!") + return False + + # Query database + fba.cursor.execute("SELECT last_instance_fetch FROM instances WHERE domain = ? LIMIT 1", [domain]) + + # Fetch row + fetched = fba.cursor.fetchone()[0] + + # DEBUG: print(f"DEBUG: fetched[{type(fetched)}]='{fetched}'") + recently = isinstance(fetched, float) and time.time() - fetched <= config.get("recheck_instance") + + # DEBUG: print(f"DEBUG: recently='{recently}' - EXIT!") + return recently + +def deobscure(char: str, domain: str, blocked_hash: str = None) -> tuple: + # DEBUG: print(f"DEBUG: char='{char}',domain='{domain}',blocked_hash='{blocked_hash}' - CALLED!") + if not isinstance(char, str): + raise ValueError(f"Parameter char[]='{type(char)}' is not 'str'") + elif char == "": + raise ValueError("Parameter 'char' is empty") + elif not isinstance(domain, str): + raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'") + elif domain == "": + raise ValueError("Parameter 'domain' is empty") + elif not isinstance(blocked_hash, str) and blocked_hash is not None: + raise ValueError(f"Parameter blocked_hash[]='{type(blocked_hash)}' is not 'str'") + + if isinstance(blocked_hash, str): + # DEBUG: print(f"DEBUG: Looking up blocked_hash='{blocked_hash}' ...") + fba.cursor.execute( + "SELECT domain, origin, nodeinfo_url FROM instances WHERE hash = ? LIMIT 1", [blocked_hash] + ) + + row = fba.cursor.fetchone() + # DEBUG: print(f"DEBUG: row[]='{type(row)}'") + + if row is None: + # DEBUG: print(f"DEBUG: blocked_hash='{blocked_hash}' not found, trying domain='{domain}' ...") + return deobscure(char, domain) + else: + # DEBUG: print(f"DEBUG: Looking up domain='{domain}' ...") + fba.cursor.execute( + "SELECT domain, origin, nodeinfo_url FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", [domain.replace(char, "_")] + ) + + row = fba.cursor.fetchone() + # DEBUG: print(f"DEBUG: row[]='{type(row)}'") + + # DEBUG: print(f"DEBUG: row[]='{type(row)}' - EXIT!") + return row diff --git a/fba/network.py b/fba/network.py index 5b87c90..a86d6d4 100644 --- a/fba/network.py +++ b/fba/network.py @@ -20,7 +20,8 @@ import requests from fba import config from fba import fba -from fba import instances + +from fba.models import instances # HTTP headers for non-API requests web_headers = { diff --git a/fba/networks/friendica.py b/fba/networks/friendica.py index dc63120..6941dab 100644 --- a/fba/networks/friendica.py +++ b/fba/networks/friendica.py @@ -17,11 +17,12 @@ import bs4 from fba import config -from fba import instances from fba import network from fba.helpers import tidyup +from fba.models import instances + def fetch_blocks(domain: str) -> dict: # DEBUG: print(f"DEBUG: domain='{domain}' - CALLED!") if not isinstance(domain, str): diff --git a/fba/networks/lemmy.py b/fba/networks/lemmy.py index cba46b1..8a0a962 100644 --- a/fba/networks/lemmy.py +++ b/fba/networks/lemmy.py @@ -17,9 +17,10 @@ from fba import config from fba import csrf from fba import federation -from fba import instances from fba import network +from fba.models import instances + def fetch_peers(domain: str) -> list: # DEBUG: print(f"DEBUG: domain({len(domain)})={domain},software='lemmy' - CALLED!") if not isinstance(domain, str): diff --git a/fba/networks/mastodon.py b/fba/networks/mastodon.py index 6fa4605..6470292 100644 --- a/fba/networks/mastodon.py +++ b/fba/networks/mastodon.py @@ -20,15 +20,16 @@ import bs4 import validators from fba import blacklist -from fba import blocks from fba import config from fba import csrf from fba import fba -from fba import instances from fba import network from fba.helpers import tidyup +from fba.models import blocks +from fba.models import instances + language_mapping = { # English -> English "Silenced instances" : "Silenced servers", @@ -63,15 +64,8 @@ def fetch_blocks_from_about(domain: str) -> dict: raise ValueError("Parameter 'domain' is empty") # DEBUG: print("DEBUG: Fetching mastodon blocks from domain:", domain) - blocklist = { - "Suspended servers": [], - "Filtered media" : [], - "Limited servers" : [], - "Silenced servers" : [], - } - doc = None - for path in ("/about/more", "/about"): + for path in ["/about/more", "/about"]: try: # DEBUG: print(f"DEBUG: Fetching path='{path}' from domain='{domain}' ...") doc = bs4.BeautifulSoup( @@ -98,6 +92,13 @@ def fetch_blocks_from_about(domain: str) -> dict: print(f"WARNING: Cannot fetch any /about pages for domain='{domain}' - EXIT!") return blocklist + blocklist = { + "Suspended servers": [], + "Filtered media" : [], + "Limited servers" : [], + "Silenced servers" : [], + } + for header in doc.find_all("h3"): header_text = tidyup.reason(header.text) diff --git a/fba/networks/misskey.py b/fba/networks/misskey.py index 68e5396..1dedde7 100644 --- a/fba/networks/misskey.py +++ b/fba/networks/misskey.py @@ -19,12 +19,13 @@ import json from fba import blacklist from fba import config from fba import csrf -from fba import instances from fba import network from fba.helpers import dicts from fba.helpers import tidyup +from fba.models import instances + def fetch_peers(domain: str) -> list: # DEBUG: print(f"DEBUG: domain({len(domain)})={domain} - CALLED!") if not isinstance(domain, str): @@ -202,13 +203,11 @@ def fetch_blocks(domain: str) -> dict: # DEBUG: print(f"DEBUG: instance[{type(instance)}]='{instance}' - suspend") if "isSuspended" in instance and instance["isSuspended"] and not dicts.has_key(blocklist["suspended"], "domain", instance["host"]): count = count + 1 - blocklist["suspended"].append( - { - "domain": tidyup.domain(instance["host"]), - # no reason field, nothing - "reason": None - } - ) + blocklist["suspended"].append({ + "domain": tidyup.domain(instance["host"]), + # no reason field, nothing + "reason": None + }) # DEBUG: print(f"DEBUG: count={count}") if count == 0: diff --git a/fba/networks/peertube.py b/fba/networks/peertube.py index c25cf8c..793fb45 100644 --- a/fba/networks/peertube.py +++ b/fba/networks/peertube.py @@ -16,9 +16,10 @@ from fba import config from fba import csrf -from fba import instances from fba import network +from fba.models import instances + def fetch_peers(domain: str) -> list: print(f"DEBUG: domain({len(domain)})={domain},software='peertube' - CALLED!") if not isinstance(domain, str): diff --git a/fba/networks/pleroma.py b/fba/networks/pleroma.py index 6822495..b32c327 100644 --- a/fba/networks/pleroma.py +++ b/fba/networks/pleroma.py @@ -16,17 +16,25 @@ import inspect +import bs4 import validators from fba import blacklist -from fba import blocks +from fba import config from fba import fba from fba import federation -from fba import instances from fba import network from fba.helpers import tidyup +from fba.models import blocks +from fba.models import instances + +language_mapping = { + # English -> English + "Reject": "Suspended servers", +} + def fetch_blocks(domain: str, origin: str, nodeinfo_url: str): # DEBUG: print(f"DEBUG: domain='{domain}',origin='{origin}',nodeinfo_url='{nodeinfo_url}' - CALLED!") if not isinstance(domain, str): @@ -42,7 +50,7 @@ def fetch_blocks(domain: str, origin: str, nodeinfo_url: str): elif nodeinfo_url == "": raise ValueError("Parameter 'nodeinfo_url' is empty") - # Blocks + # @TODO Unused blockdict blockdict = list() rows = None try: @@ -65,9 +73,11 @@ def fetch_blocks(domain: str, origin: str, nodeinfo_url: str): instances.update_last_nodeinfo(domain) data = rows["metadata"]["federation"] + found = False if "mrf_simple" in data: # DEBUG: print("DEBUG: Found mrf_simple:", domain) + found = True for block_level, blocklist in ( { **data["mrf_simple"], @@ -145,14 +155,15 @@ def fetch_blocks(domain: str, origin: str, nodeinfo_url: str): if block_level == "reject": # DEBUG: print("DEBUG: Adding to blockdict:", blocked) blockdict.append({ - "blocked": blocked, - "reason" : None + "blocked": blocked, + "reason" : None }) - else: - # DEBUG: print(f"DEBUG: Updating block last seen for domain='{domain}',blocked='{blocked}' ...") - blocks.update_last_seen(domain, blocked, block_level) + else: + # DEBUG: print(f"DEBUG: Updating block last seen for domain='{domain}',blocked='{blocked}' ...") + blocks.update_last_seen(domain, blocked, block_level) elif "quarantined_instances" in data: # DEBUG: print(f"DEBUG: Found 'quarantined_instances' in JSON response: domain='{domain}'") + found = True block_level = "quarantined" for blocked in data["quarantined_instances"]: @@ -214,12 +225,12 @@ def fetch_blocks(domain: str, origin: str, nodeinfo_url: str): if block_level == "reject": # DEBUG: print("DEBUG: Adding to blockdict:", blocked) blockdict.append({ - "blocked": blocked, - "reason" : None + "blocked": blocked, + "reason" : None }) - else: - # DEBUG: print(f"DEBUG: Updating block last seen for domain='{domain}',blocked='{blocked}' ...") - blocks.update_last_seen(domain, blocked, block_level) + else: + # DEBUG: print(f"DEBUG: Updating block last seen for domain='{domain}',blocked='{blocked}' ...") + blocks.update_last_seen(domain, blocked, block_level) else: print(f"WARNING: Cannot find 'mrf_simple' or 'quarantined_instances' in JSON reply: domain='{domain}'") @@ -229,6 +240,7 @@ def fetch_blocks(domain: str, origin: str, nodeinfo_url: str): # Reasons if "mrf_simple_info" in data: # DEBUG: print("DEBUG: Found mrf_simple_info:", domain) + found = True for block_level, info in ( { **data["mrf_simple_info"], @@ -314,6 +326,7 @@ def fetch_blocks(domain: str, origin: str, nodeinfo_url: str): elif "quarantined_instances_info" in data and "quarantined_instances" in data["quarantined_instances_info"]: # DEBUG: print(f"DEBUG: Found 'quarantined_instances_info' in JSON response: domain='{domain}'") + found = True block_level = "quarantined" #print(data["quarantined_instances_info"]) @@ -385,5 +398,165 @@ def fetch_blocks(domain: str, origin: str, nodeinfo_url: str): else: print(f"WARNING: Cannot find 'mrf_simple_info' or 'quarantined_instances_info' in JSON reply: domain='{domain}'") + if not found: + # DEBUG: print(f"DEBUG: Did not find any useable JSON elements, domain='{domain}', continuing with /about page ...") + blocklist = fetch_blocks_from_about(domain) + + # DEBUG: print(f"DEBUG: blocklist()={len(blocklist)}") + if len(blocklist) > 0: + print(f"INFO: Checking {len(blocklist)} record(s) ...") + for block_level in blocklist: + # DEBUG: print(f"DEBUG: block_level='{block_level}'") + rows = blocklist[block_level] + # DEBUG: print(f"DEBUG: rows['{type(rows)}]()={len(rows)}'") + for record in rows: + # DEBUG: print(f"DEBUG: record[]='{type(record)}'") + blocked = tidyup.domain(record["blocked"]) + reason = tidyup.reason(record["reason"]) + # DEBUG: print(f"DEBUG: blocked='{blocked}',reason='{reason}' - AFTER!") + + if blocked == "": + print("WARNING: blocked is empty after tidyup.domain():", domain, block_level) + continue + elif blacklist.is_blacklisted(blocked): + # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - skipping!") + continue + elif blocked.count("*") > 0: + # Obscured domain name with no hash + row = instances.deobscure("*", blocked) + + # DEBUG: print(f"DEBUG: row[]='{type(row)}'") + if row is None: + print(f"WARNING: Cannot deobsfucate blocked='{blocked}',domain='{domain}',origin='{origin}' - SKIPPED!") + continue + + # DEBUG: print(f"DEBUG: blocked='{blocked}' de-obscured to '{row[0]}'") + blocked = row[0] + origin = row[1] + nodeinfo_url = row[2] + elif blocked.count("?") > 0: + # Obscured domain name with no hash + row = instances.deobscure("?", blocked) + + # DEBUG: print(f"DEBUG: row[]='{type(row)}'") + if row is None: + print(f"WARNING: Cannot deobsfucate blocked='{blocked}',domain='{domain}',origin='{origin}' - SKIPPED!") + continue + + # DEBUG: print(f"DEBUG: blocked='{blocked}' de-obscured to '{row[0]}'") + blocked = row[0] + origin = row[1] + nodeinfo_url = row[2] + + # DEBUG: print(f"DEBUG: blocked='{blocked}'") + if not validators.domain(blocked): + print(f"WARNING: blocked='{blocked}',software='pleroma' is not a valid domain name - SKIPPED!") + continue + elif blocked.endswith(".arpa"): + print(f"WARNING: blocked='{blocked}' is a reversed .arpa domain and should not be used generally.") + continue + elif not instances.is_registered(blocked): + # DEBUG: print(f"DEBUG: Domain blocked='{blocked}' wasn't found, adding ..., domain='{domain}',origin='{origin}',nodeinfo_url='{nodeinfo_url}'") + instances.add(blocked, domain, inspect.currentframe().f_code.co_name, nodeinfo_url) + + if not blocks.is_instance_blocked(domain, blocked, block_level): + # DEBUG: print("DEBUG: Blocking:", domain, blocked, block_level) + blocks.add_instance(domain, blocked, reason, block_level) + + if block_level == "reject": + # DEBUG: print("DEBUG: Adding to blockdict:", blocked) + blockdict.append({ + "blocked": blocked, + "reason" : reason + }) + else: + # DEBUG: print(f"DEBUG: Updating block last seen for domain='{domain}',blocked='{blocked}' ...") + blocks.update_reason(reason, domain, blocked, block_level) + fba.connection.commit() # DEBUG: print("DEBUG: EXIT!") + +def fetch_blocks_from_about(domain: str) -> dict: + # DEBUG: print(f"DEBUG: domain='{domain}' - CALLED!") + if not isinstance(domain, str): + raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'") + elif domain == "": + raise ValueError("Parameter 'domain' is empty") + + # DEBUG: print(f"DEBUG: Fetching mastodon blocks from domain='{domain}'") + doc = None + for path in ["/instance/about/index.html"]: + try: + # Resetting doc type + doc = None + + # DEBUG: print(f"DEBUG: Fetching path='{path}' from domain='{domain}' ...") + response = network.fetch_response( + domain, + path, + network.web_headers, + (config.get("connection_timeout"), config.get("read_timeout")) + ) + + # DEBUG: print(f"DEBUG: response.ok='{response.ok}',response.status_code='{response.status_code}',response.text()={len(response.text)}") + if not response.ok or response.text.strip() == "": + print(f"WARNING: path='{path}' does not exist on domain='{domain}' - SKIPPED!") + continue + + # DEBUG: print(f"DEBUG: Parsing response.text()={len(response.text)} Bytes ...") + doc = bs4.BeautifulSoup( + response.text, + "html.parser", + ) + + # DEBUG: print(f"DEBUG: doc[]='{type(doc)}'") + if doc.find("h2") is not None: + # DEBUG: print(f"DEBUG: Found 'h2' header in path='{path}' - BREAK!") + break + + except BaseException as exception: + print("ERROR: Cannot fetch from domain:", domain, exception) + instances.update_last_error(domain, exception) + break + + # DEBUG: print(f"DEBUG: doc[]='{type(doc)}'") + if doc is None: + print(f"WARNING: Cannot fetch any /about pages for domain='{domain}' - EXIT!") + return blocklist + + blocklist = { + "Suspended servers": [], + "Filtered media" : [], + "Limited servers" : [], + "Silenced servers" : [], + } + + for header in doc.find_all("h2"): + header_text = tidyup.reason(header.text) + + # DEBUG: print(f"DEBUG: header_text='{header_text}' - BEFORE!") + if header_text in language_mapping: + # DEBUG: print(f"DEBUG: header_text='{header_text}' - FOUND!") + header_text = language_mapping[header_text] + else: + print(f"WARNING: header_text='{header_text}' not found in language mapping table") + + # DEBUG: print(f"DEBUG: header_text='{header_text} - AFTER!'") + if header_text in blocklist or header_text.lower() in blocklist: + # replaced find_next_siblings with find_all_next to account for instances that e.g. hide lists in dropdown menu + # DEBUG: print(f"DEBUG: Found header_text='{header_text}', importing domain blocks ...") + for line in header.find_next("table").find_all("tr")[1:]: + # DEBUG: print(f"DEBUG: line[]='{type(line)}'") + blocklist[header_text].append({ + "blocked": tidyup.domain(line.find_all("td")[0].text), + "reason" : tidyup.reason(line.find_all("td")[1].text), + }) + else: + print(f"WARNING: header_text='{header_text}' not found in blocklist()={len(blocklist)}") + + # DEBUG: print(f"DEBUG: Returning blocklist for domain='{domain}'") + return { + "reject" : blocklist["Suspended servers"], + "media_removal" : blocklist["Filtered media"], + "followers_only": blocklist["Limited servers"] + blocklist["Silenced servers"], + }