From: Roland Häder Date: Wed, 16 Aug 2023 22:56:19 +0000 (+0200) Subject: Continued: X-Git-Url: https://git.mxchange.org/?a=commitdiff_plain;h=1b92010670a38dadfe0d302d3fee7ae32e1006e8;p=fba.git Continued: - no, nope: validators.hostname() was a bad idea, it also let IP addresses and local host names in as well - added command remove_invalid to remove those from database - renamed recheck.sh -> nodeinfo.sh --- diff --git a/fba/boot.py b/fba/boot.py index 55e9fb4..6c14c2a 100644 --- a/fba/boot.py +++ b/fba/boot.py @@ -231,6 +231,13 @@ def init_parser(): parser.add_argument("--domain", help="Instance name (aka. 'relay')") parser.add_argument("--force", action="store_true", help="Forces update of data, no matter what.") + ### Remove invalid domains ### + parser = subparser_command.add_parser( + "remove_invalid", + help="Removes invalid domains.", + ) + parser.set_defaults(command=commands.remove_invalid) + logger.debug("EXIT!") def run_command(): diff --git a/fba/commands.py b/fba/commands.py index 58e8686..b021cab 100644 --- a/fba/commands.py +++ b/fba/commands.py @@ -64,7 +64,7 @@ logger = logging.getLogger(__name__) def check_instance(args: argparse.Namespace) -> int: logger.debug("args.domain='%s' - CALLED!", args.domain) status = 0 - if not validators.hostname(args.domain): + if not validators.domain(args.domain): logger.warning("args.domain='%s' is not valid", args.domain) status = 100 elif blacklist.is_blacklisted(args.domain): @@ -266,7 +266,7 @@ def fetch_blocks(args: argparse.Namespace) -> int: logger.debug("args[]='%s' - CALLED!", type(args)) if args.domain is not None and args.domain != "": logger.debug("args.domain='%s' - checking ...", args.domain) - if not validators.hostname(args.domain): + if not validators.domain(args.domain): logger.warning("args.domain='%s' is not valid.", args.domain) return 100 elif blacklist.is_blacklisted(args.domain): @@ -914,7 +914,7 @@ def fetch_instances(args: argparse.Namespace) -> int: logger.debug("args[]='%s' - CALLED!", type(args)) logger.debug("args.domain='%s' - checking ...", args.domain) - if not validators.hostname(args.domain): + if not validators.domain(args.domain): logger.warning("args.domain='%s' is not valid.", args.domain) return 100 elif blacklist.is_blacklisted(args.domain): @@ -1076,7 +1076,7 @@ def fetch_oliphant(args: argparse.Namespace) -> int: domain = utils.deobfuscate(domain, block["blocker"]) logger.debug("domain='%s' - AFTER!", domain) - if not validators.hostname(domain): + if not validators.domain(domain): logger.debug("domain='%s' is not a valid domain - SKIPPED!") continue elif blacklist.is_blacklisted(domain): @@ -1382,7 +1382,7 @@ def fetch_joinfediverse(args: argparse.Namespace) -> int: if not isinstance(text, str): logger.debug("text[]='%s' is not of type 'str' - SKIPPED!", type(text)) continue - elif validators.hostname(text.strip()): + elif validators.domain(text.strip()): logger.debug("text='%s' is a domain - SKIPPED!", text.strip()) continue @@ -1517,7 +1517,7 @@ def recheck_obfuscation(args: argparse.Namespace) -> int: if isinstance(args.domain, str) and args.domain != "" and domain_helper.is_wanted(args.domain): database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1 AND domain = ?", [args.domain]) - elif isinstance(args.software, str) and args.software != "" and validators.hostname(args.software) == args.software: + elif isinstance(args.software, str) and args.software != "" and validators.domain(args.software) == args.software: database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1 AND software = ?", [args.software]) else: database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1") @@ -2023,3 +2023,29 @@ def convert_idna(args: argparse.Namespace) -> int: logger.debug("Success! - EXIT!") return 0 + +def remove_invalid(args: argparse.Namespace) -> int: + logger.debug("args[]='%s' - CALLED!", type(args)) + + logger.debug("Invoking locking.acquire() ...") + locking.acquire() + + database.cursor.execute("SELECT domain FROM instances ORDER BY domain ASC") + rows = database.cursor.fetchall() + + logger.info("Checking %d domains ...", len(rows)) + for row in rows: + logger.debug("row[domain]='%s'", row["domain"]) + if not validators.domain(row["domain"].split("/")[0]): + logger.info("Invalid row[domain]='%s' found, removing ...", row["domain"]) + database.cursor.execute("DELETE FROM blocks WHERE blocker = ? OR blocked = ?", [row["domain"], row["domain"]]) + database.cursor.execute("DELETE FROM instances WHERE domain = ? LIMIT 1", [row["domain"]]) + + logger.debug("Invoking commit() ...") + database.connection.commit() + + logger.info("Vaccum cleaning database ...") + database.cursor.execute("VACUUM") + + logger.debug("Success! - EXIT!") + return 0 diff --git a/fba/helpers/domain.py b/fba/helpers/domain.py index 6804b3a..96aa189 100644 --- a/fba/helpers/domain.py +++ b/fba/helpers/domain.py @@ -35,7 +35,7 @@ def raise_on(domain: str): raise ValueError("Parameter 'domain' is empty") elif domain.lower() != domain: raise ValueError(f"Parameter domain='{domain}' must be all lower-case") - elif not validators.hostname(domain.split("/")[0]): + elif not validators.domain(domain.split("/")[0]): raise ValueError(f"domain='{domain}' is not a valid domain") elif domain.endswith(".arpa"): raise ValueError(f"domain='{domain}' is a domain for reversed IP addresses, please don't crawl them!") @@ -75,7 +75,7 @@ def is_wanted(domain: str) -> bool: raise ValueError("Parameter 'domain' is empty") elif domain.lower() != domain: wanted = False - elif not validators.hostname(domain.split("/")[0]): + elif not validators.domain(domain.split("/")[0]): logger.debug("domain='%s' is not a valid domain name - setting False ...", domain) wanted = False elif domain.endswith(".arpa"): diff --git a/fba/http/federation.py b/fba/http/federation.py index f4c9529..aa1adf9 100644 --- a/fba/http/federation.py +++ b/fba/http/federation.py @@ -478,7 +478,7 @@ def find_domains(tag: bs4.element.Tag) -> list: "reason": reason, }) continue - elif not validators.hostname(domain.split("/")[0]): + elif not validators.domain(domain.split("/")[0]): logger.warning("domain='%s' is not a valid domain - SKIPPED!", domain) continue diff --git a/fba/models/instances.py b/fba/models/instances.py index 5a801b0..1bd2884 100644 --- a/fba/models/instances.py +++ b/fba/models/instances.py @@ -174,7 +174,7 @@ def add(domain: str, origin: str, command: str, path: str = None, software: str raise ValueError(f"software[]='{type(software)}' is not of type 'str'") elif software == "": raise ValueError("Parameter 'software' is empty") - elif origin is not None and not validators.hostname(origin.split("/")[0]): + elif origin is not None and not validators.domain(origin.split("/")[0]): raise ValueError(f"Bad origin name='{origin}'") elif blacklist.is_blacklisted(domain): raise Exception(f"domain='{domain}' is blacklisted, but function invoked") diff --git a/nodeinfo.sh b/nodeinfo.sh new file mode 100755 index 0000000..53888d0 --- /dev/null +++ b/nodeinfo.sh @@ -0,0 +1,67 @@ +#!/bin/sh + +MODE="" +if [ "$1" = "--help" ] +then + echo "Usage: $0 [file|--software|--software2|--nodeinfo|--generator|--detection|--no-auto|--no-auto2|--timeout]" + exit 255 +elif [ -n "$1" -a -f "$1" ] +then + DOMAINS=$(cat "$1") + MODE="file" +elif [ "$1" = "--software" ] +then + DOMAINS=$(sqlite3 blocks.db "SELECT domain FROM instances WHERE software IS NULL ORDER BY last_updated ASC;") + MODE="software" +elif [ "$1" = "--nodeinfo" ] +then + DOMAINS=$(sqlite3 blocks.db "SELECT domain FROM instances WHERE software IS NULL AND nodeinfo_url IS NOT NULL ORDER BY last_updated ASC;") + MODE="nodeinfo" +elif [ "$1" = "--detection" ] +then + DOMAINS=$(sqlite3 blocks.db "SELECT domain FROM instances WHERE detection_mode IS NULL ORDER BY last_updated ASC;") + MODE="detection" +elif [ "$1" = "--no-auto" ] +then + DOMAINS=$(sqlite3 blocks.db "SELECT domain FROM instances WHERE detection_mode != 'AUTO_DISCOVERY' ORDER BY last_updated ASC;") + MODE="noauto" +elif [ "$1" = "--no-auto2" ] +then + DOMAINS=`sqlite3 blocks.db "SELECT domain FROM instances WHERE software IN ('pleroma', 'mastodon', 'lemmy', 'friendica', 'misskey', 'peertube', 'takahe', 'gotosocial', 'brighteon', 'wildebeest', 'bookwyrm', 'mitra', 'areionskey', 'mammuthus') AND detection_mode != 'AUTO_DISCOVERY' ORDER BY last_updated ASC;"` + MODE="noauto2" +elif [ "$1" = "--timeout" ] +then + DOMAINS=$(sqlite3 blocks.db "SELECT domain FROM instances WHERE last_error_details LIKE '%Timeout%' ORDER BY last_updated ASC;") + MODE="software" +elif [ "$1" = "--software2" ] +then + DOMAINS=$(sqlite3 blocks.db "SELECT domain FROM instances WHERE software IS NOT NULL AND detection_mode IS NULL ORDER BY last_updated ASC;") + MODE="software2" +elif [ "$1" = "--generator" ] +then + DOMAINS=$(sqlite3 blocks.db "SELECT domain FROM instances WHERE detection_mode='GENERATOR' ORDER BY last_updated ASC;") + MODE="generator" +else + DOMAINS=$(sqlite3 blocks.db "SELECT domain FROM instances WHERE software IS NULL AND nodeinfo_url IS NOT NULL ORDER BY last_updated ASC;") +fi + +if [ -z "${DOMAINS}" ] +then + echo "$0: No domains found! MODE='${MODE}'" + exit 255 +fi + +for DOMAIN in ${DOMAINS}; +do + echo "$0: DOMAIN='${DOMAIN}'" + if [ -n "$1" -a -z "${MODE}" ] + then + ./fba.py update_nodeinfo --domain=${DOMAIN} "$1" + elif [ -n "$2" ] + then + ./fba.py update_nodeinfo --domain=${DOMAIN} "$2" + else + ./fba.py update_nodeinfo --domain=${DOMAIN} + fi +done +echo "$0: All done." diff --git a/recheck.sh b/recheck.sh deleted file mode 100755 index 1c7fdaa..0000000 --- a/recheck.sh +++ /dev/null @@ -1,67 +0,0 @@ -#!/bin/sh - -MODE="" -if [ "$1" = "--help" ] -then - echo "Usage: $ [file|--software|--software2|--nodeinfo|--generator|--detection|--no-auto|--no-auto2|--timeout]" - exit 255 -elif [ -n "$1" -a -f "$1" ] -then - DOMAINS=$(cat "$1") - MODE="file" -elif [ "$1" = "--software" ] -then - DOMAINS=$(sqlite3 blocks.db "SELECT domain FROM instances WHERE software IS NULL ORDER BY last_updated ASC;") - MODE="software" -elif [ "$1" = "--nodeinfo" ] -then - DOMAINS=$(sqlite3 blocks.db "SELECT domain FROM instances WHERE software IS NULL AND nodeinfo_url IS NOT NULL ORDER BY last_updated ASC;") - MODE="nodeinfo" -elif [ "$1" = "--detection" ] -then - DOMAINS=$(sqlite3 blocks.db "SELECT domain FROM instances WHERE detection_mode IS NULL ORDER BY last_updated ASC;") - MODE="detection" -elif [ "$1" = "--no-auto" ] -then - DOMAINS=$(sqlite3 blocks.db "SELECT domain FROM instances WHERE detection_mode != 'AUTO_DISCOVERY' ORDER BY last_updated ASC;") - MODE="noauto" -elif [ "$1" = "--no-auto2" ] -then - DOMAINS=`sqlite3 blocks.db "SELECT domain FROM instances WHERE software IN ('pleroma', 'mastodon', 'lemmy', 'friendica', 'misskey', 'peertube', 'takahe', 'gotosocial', 'brighteon', 'wildebeest', 'bookwyrm', 'mitra', 'areionskey', 'mammuthus') AND detection_mode != 'AUTO_DISCOVERY' ORDER BY last_updated ASC;"` - MODE="noauto2" -elif [ "$1" = "--timeout" ] -then - DOMAINS=$(sqlite3 blocks.db "SELECT domain FROM instances WHERE last_error_details LIKE '%Timeout%' ORDER BY last_updated ASC;") - MODE="software" -elif [ "$1" = "--software2" ] -then - DOMAINS=$(sqlite3 blocks.db "SELECT domain FROM instances WHERE software IS NOT NULL AND detection_mode IS NULL ORDER BY last_updated ASC;") - MODE="software2" -elif [ "$1" = "--generator" ] -then - DOMAINS=$(sqlite3 blocks.db "SELECT domain FROM instances WHERE detection_mode='GENERATOR' ORDER BY last_updated ASC;") - MODE="generator" -else - DOMAINS=$(sqlite3 blocks.db "SELECT domain FROM instances WHERE software IS NULL AND nodeinfo_url IS NOT NULL ORDER BY last_updated ASC;") -fi - -if [ -z "${DOMAINS}" ] -then - echo "$0: No domains found! MODE='${MODE}'" - exit 255 -fi - -for DOMAIN in ${DOMAINS}; -do - echo "$0: DOMAIN='${DOMAIN}'" - if [ -n "$1" -a -z "${MODE}" ] - then - ./fba.py update_nodeinfo --domain=${DOMAIN} "$1" - elif [ -n "$2" ] - then - ./fba.py update_nodeinfo --domain=${DOMAIN} "$2" - else - ./fba.py update_nodeinfo --domain=${DOMAIN} - fi -done -echo "$0: All done."