X-Git-Url: https://git.mxchange.org/?a=blobdiff_plain;f=fba%2Fcommands.py;h=ef8ee42f8251967d773f651750836b7510429cf3;hb=a275c7d2091e905761f8b3e0a019a370b2f2a92e;hp=d9a00fc031df33c6b5d04898f2992a0db1b725b5;hpb=b3011a99b77acc8b0cca678d3ce45924fb78d92a;p=fba.git diff --git a/fba/commands.py b/fba/commands.py index d9a00fc..ef8ee42 100644 --- a/fba/commands.py +++ b/fba/commands.py @@ -29,11 +29,11 @@ import markdown import reqto import validators -from fba import csrf from fba import database from fba import utils from fba.helpers import blacklist +from fba.helpers import blocklists from fba.helpers import config from fba.helpers import cookies from fba.helpers import dicts as dict_helper @@ -43,6 +43,7 @@ from fba.helpers import processing from fba.helpers import software as software_helper from fba.helpers import tidyup +from fba.http import csrf from fba.http import federation from fba.http import network @@ -62,6 +63,7 @@ logger = logging.getLogger(__name__) def check_instance(args: argparse.Namespace) -> int: logger.debug("args.domain='%s' - CALLED!", args.domain) + status = 0 if not validators.domain(args.domain): logger.warning("args.domain='%s' is not valid", args.domain) @@ -110,7 +112,7 @@ def fetch_pixelfed_api(args: argparse.Namespace) -> int: if sources.is_recent(source_domain): logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain) - return 0 + return 1 else: logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain) sources.update(source_domain) @@ -131,7 +133,7 @@ def fetch_pixelfed_api(args: argparse.Namespace) -> int: (config.get("connection_timeout"), config.get("read_timeout")) ) - logger.debug("JSON API returned %d elements", len(fetched)) + logger.debug("fetched(%d)[]='%s'", len(fetched), type(fetched)) if "error_message" in fetched: logger.warning("API returned error_message='%s' - EXIT!", fetched["error_message"]) return 101 @@ -146,8 +148,8 @@ def fetch_pixelfed_api(args: argparse.Namespace) -> int: if "domain" not in row: logger.warning("row='%s' does not contain element 'domain' - SKIPPED!", row) continue - elif row["domain"] == "": - logger.debug("row[domain] is empty - SKIPPED!") + elif row["domain"] in [None, ""]: + logger.debug("row[domain]='%s' is empty - SKIPPED!", row["domain"]) continue logger.debug("row[domain]='%s' - BEFORE!", row["domain"]) @@ -183,7 +185,7 @@ def fetch_bkali(args: argparse.Namespace) -> int: source_domain = "gql.api.bka.li" if sources.is_recent(source_domain): logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain) - return 0 + return 1 else: logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain) sources.update(source_domain) @@ -222,8 +224,8 @@ def fetch_bkali(args: argparse.Namespace) -> int: if "domain" not in entry: logger.warning("entry()=%d does not contain 'domain' - SKIPPED!", len(entry)) continue - elif entry["domain"] == "": - logger.debug("entry[domain] is empty - SKIPPED!") + elif entry["domain"] in [None, ""]: + logger.debug("entry[domain]='%s' is empty - SKIPPED!", entry["domain"]) continue elif not domain_helper.is_wanted(entry["domain"]): logger.debug("entry[domain]='%s' is not wanted - SKIPPED!", entry["domain"]) @@ -252,7 +254,7 @@ def fetch_bkali(args: argparse.Namespace) -> int: try: logger.info("Fetching instances from domain='%s' ...", domain) - federation.fetch_instances(domain, 'tak.teleyal.blog', None, inspect.currentframe().f_code.co_name) + federation.fetch_instances(domain, "tak.teleyal.blog", None, inspect.currentframe().f_code.co_name) except network.exceptions as exception: logger.warning("Exception '%s' during fetching instances (fetch_bkali) from domain='%s'", type(exception), domain) instances.set_last_error(domain, exception) @@ -282,78 +284,77 @@ def fetch_blocks(args: argparse.Namespace) -> int: # Re-check single domain logger.debug("Querying database for args.domain='%s' ...", args.domain) database.cursor.execute( - "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE domain = ?", [args.domain] + "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE domain = ? LIMIT 1", [args.domain] ) elif args.software is not None and args.software != "": # Re-check single software logger.debug("Querying database for args.software='%s' ...", args.software) database.cursor.execute( - "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software = ? AND nodeinfo_url IS NOT NULL", [args.software] + "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software = ? AND nodeinfo_url IS NOT NULL ORDER BY total_blocks DESC, last_response_time ASC, last_updated ASC", [args.software] ) - elif args.force: - # Re-check all - logger.debug("Re-checking all instances ...") + elif args.only_none: + # Check only entries with total_blocked=None database.cursor.execute( - "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'lemmy', 'friendica', 'misskey') AND nodeinfo_url IS NOT NULL ORDER BY rowid DESC" + "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'lemmy', 'friendica', 'misskey') AND total_blocks IS NULL ORDER BY total_blocks DESC, last_response_time ASC, last_updated ASC" ) else: # Re-check after "timeout" (aka. minimum interval) database.cursor.execute( - "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'lemmy', 'friendica', 'misskey') AND (last_blocked IS NULL OR last_blocked < ?) AND nodeinfo_url IS NOT NULL ORDER BY rowid DESC", [time.time() - config.get("recheck_block")] + "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'lemmy', 'friendica', 'misskey') AND nodeinfo_url IS NOT NULL ORDER BY total_blocks DESC, last_response_time ASC, last_updated ASC" ) rows = database.cursor.fetchall() logger.info("Checking %d entries ...", len(rows)) for blocker, software, origin, nodeinfo_url in rows: logger.debug("blocker='%s',software='%s',origin='%s',nodeinfo_url='%s'", blocker, software, origin, nodeinfo_url) - blocker = tidyup.domain(blocker) - logger.debug("blocker='%s' - AFTER!", blocker) - if blocker == "": - logger.warning("blocker is now empty!") - continue - elif nodeinfo_url is None or nodeinfo_url == "": - logger.debug("blocker='%s',software='%s' has empty nodeinfo_url", blocker, software) + if not domain_helper.is_wanted(blocker): + logger.warning("blocker='%s' is not wanted - SKIPPED!", blocker) continue - elif not domain_helper.is_wanted(blocker): - logger.debug("blocker='%s' is not wanted - SKIPPED!", blocker) + elif not args.force and instances.is_recent(blocker, "last_blocked"): + logger.debug("blocker='%s' has been recently accessed - SKIPPED!", blocker) continue - logger.debug("blocker='%s'", blocker) + logger.debug("Setting last_blocked,has_obfuscation=false for blocker='%s' ...", blocker) instances.set_last_blocked(blocker) instances.set_has_obfuscation(blocker, False) - blocking = list() - if software == "pleroma": - logger.info("blocker='%s',software='%s'", blocker, software) - blocking = pleroma.fetch_blocks(blocker, nodeinfo_url) - logger.debug("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software) - elif software == "mastodon": - logger.info("blocker='%s',software='%s'", blocker, software) - blocking = mastodon.fetch_blocks(blocker, nodeinfo_url) - logger.debug("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software) - elif software == "lemmy": - logger.info("blocker='%s',software='%s'", blocker, software) - blocking = lemmy.fetch_blocks(blocker, nodeinfo_url) - logger.debug("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software) - elif software == "friendica": - logger.info("blocker='%s',software='%s'", blocker, software) - blocking = friendica.fetch_blocks(blocker) - logger.debug("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software) - elif software == "misskey": - logger.info("blocker='%s',software='%s'", blocker, software) - blocking = misskey.fetch_blocks(blocker) - logger.debug("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software) - else: - logger.warning("Unknown software: blocker='%s',software='%s'", blocker, software) + # c.s isn't part of oliphant's "hidden" blocklists + if blocker == "chaos.social" or software_helper.is_relay(software) or blocklists.has(blocker): + logger.debug("Skipping blocker='%s', run ./fba.py fetch_cs, fetch_oliphant, fetch_csv instead!", blocker) + continue - logger.debug("blocker='%s'", blocker) - if blocker != "chaos.social": - logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking)) - instances.set_total_blocks(blocker, blocking) + logger.debug("Invoking federation.fetch_blocks(%s) ...", blocker) + blocking = federation.fetch_blocks(blocker) + + logger.debug("blocker='%s',software='%s',blocking()=%d", blocker, software, len(blocking)) + if len(blocking) == 0: + logger.debug("blocker='%s',software='%s' - fetching blocklist ...", blocker, software) + if software == "pleroma": + blocking = pleroma.fetch_blocks(blocker) + logger.debug("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software) + elif software == "mastodon": + blocking = mastodon.fetch_blocks(blocker) + logger.debug("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software) + elif software == "lemmy": + blocking = lemmy.fetch_blocks(blocker) + logger.debug("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software) + elif software == "friendica": + blocking = friendica.fetch_blocks(blocker) + logger.debug("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software) + elif software == "misskey": + blocking = misskey.fetch_blocks(blocker) + logger.debug("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software) + else: + logger.warning("Unknown software: blocker='%s',software='%s'", blocker, software) + + logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking)) + instances.set_total_blocks(blocker, blocking) - logger.info("Checking %d entries from blocker='%s',software='%s' ...", len(blocking), blocker, software) blockdict = list() + deobfuscated = obfuscated = 0 + + logger.info("Checking %d entries from blocker='%s',software='%s' ...", len(blocking), blocker, software) for block in blocking: logger.debug("blocked='%s',block_level='%s',reason='%s'", block["blocked"], block["block_level"], block["reason"]) @@ -366,12 +367,15 @@ def fetch_blocks(args: argparse.Namespace) -> int: block["reason"] = tidyup.reason(block["reason"]) if block["reason"] is not None and block["reason"] != "" else None logger.debug("blocked='%s',reason='%s' - AFTER!", block["blocked"], block["reason"]) - if block["blocked"] == "": - logger.warning("blocked is empty, blocker='%s'", blocker) + if block["blocked"] in [None, ""]: + logger.warning("block[blocked]='%s' is empty, blocker='%s'", block["blocked"], blocker) continue elif block["blocked"].endswith(".onion"): logger.debug("blocked='%s' is a TOR .onion domain - SKIPPED", block["blocked"]) continue + elif block["blocked"].endswith(".i2p") and config.get("allow_i2p_domain") == "true": + logger.debug("blocked='%s' is an I2P .onion domain - SKIPPED", block["blocked"]) + continue elif block["blocked"].endswith(".arpa"): logger.debug("blocked='%s' is a reverse IP address - SKIPPED", block["blocked"]) continue @@ -380,38 +384,42 @@ def fetch_blocks(args: argparse.Namespace) -> int: continue elif block["blocked"].find("*") >= 0: logger.debug("blocker='%s' uses obfuscated domains", blocker) + instances.set_has_obfuscation(blocker, True) + obfuscated = obfuscated + 1 # Some friendica servers also obscure domains without hash - row = instances.deobfuscate("*", block["blocked"], block["hash"] if "hash" in block else None) + row = instances.deobfuscate("*", block["blocked"], block["digest"] if "digest" in block else None) logger.debug("row[]='%s'", type(row)) if row is None: logger.warning("Cannot deobfuscate blocked='%s',blocker='%s',software='%s' - SKIPPED!", block["blocked"], blocker, software) - instances.set_has_obfuscation(blocker, True) continue + deobfuscated = deobfuscated + 1 block["blocked"] = row["domain"] origin = row["origin"] nodeinfo_url = row["nodeinfo_url"] elif block["blocked"].find("?") >= 0: logger.debug("blocker='%s' uses obfuscated domains", blocker) + instances.set_has_obfuscation(blocker, True) + obfuscated = obfuscated + 1 # Some obscure them with question marks, not sure if that's dependent on version or not - row = instances.deobfuscate("?", block["blocked"], block["hash"] if "hash" in block else None) + row = instances.deobfuscate("?", block["blocked"], block["digest"] if "digest" in block else None) logger.debug("row[]='%s'", type(row)) if row is None: logger.warning("Cannot deobfuscate blocked='%s',blocker='%s',software='%s' - SKIPPED!", block["blocked"], blocker, software) - instances.set_has_obfuscation(blocker, True) continue + deobfuscated = deobfuscated + 1 block["blocked"] = row["domain"] origin = row["origin"] nodeinfo_url = row["nodeinfo_url"] - logger.debug("Looking up instance by domainm, blocked='%s'", block["blocked"]) - if block["blocked"] == "": - logger.debug("block[blocked] is empty - SKIPPED!") + logger.debug("Looking up instance by domain, blocked='%s'", block["blocked"]) + if block["blocked"] in [None, ""]: + logger.debug("block[blocked]='%s' is empty - SKIPPED!", block["blocked"]) continue logger.debug("block[blocked]='%s' - BEFORE!", block["blocked"]) @@ -440,10 +448,11 @@ def fetch_blocks(args: argparse.Namespace) -> int: logger.debug("Invoking cookies.clear(%s) ...", block["blocked"]) cookies.clear(block["blocked"]) - logger.debug("Checking if blocker='%s' has pending updates ...", blocker) - if instances.has_pending(blocker): - logger.debug("Flushing updates for blocker='%s' ...", blocker) - instances.update_data(blocker) + logger.info("blocker='%s' has %d obfuscated domain(s) and %d of them could be deobfuscated.", blocker, obfuscated, deobfuscated) + instances.set_obfuscated_blocks(blocker, obfuscated) + + logger.debug("Flushing updates for blocker='%s' ...", blocker) + instances.update(blocker) logger.debug("Invoking commit() ...") database.connection.commit() @@ -468,7 +477,7 @@ def fetch_observer(args: argparse.Namespace) -> int: source_domain = "fediverse.observer" if sources.is_recent(source_domain): logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain) - return 0 + return 1 else: logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain) sources.update(source_domain) @@ -510,7 +519,8 @@ def fetch_observer(args: argparse.Namespace) -> int: logger.info("Fetching %d different table data ...", len(types)) for software in types: - logger.debug("software='%s' - BEFORE!", software) + logger.debug("software='%s'", software) + if args.software is not None and args.software != software: logger.debug("args.software='%s' does not match software='%s' - SKIPPED!", args.software, software) continue @@ -536,10 +546,12 @@ def fetch_observer(args: argparse.Namespace) -> int: for item in items: logger.debug("item[]='%s'", type(item)) domain = item.decode_contents() + logger.debug("domain[%s]='%s'", type(domain), domain) + domain = tidyup.domain(domain) if domain not in [None, ""] else None logger.debug("domain='%s' - AFTER!", domain) - if domain == "": - logger.debug("domain is empty - SKIPPED!") + if domain in [None, ""]: + logger.debug("domain='%s' is empty after tidyup.domain() - SKIPPED!", domain) continue logger.debug("domain='%s' - BEFORE!", domain) @@ -552,11 +564,7 @@ def fetch_observer(args: argparse.Namespace) -> int: elif instances.is_registered(domain): logger.debug("domain='%s' is already registered - SKIPPED!", domain) continue - elif instances.is_recent(domain): - logger.debug("domain='%s' is recently being handled - SKIPPED!", domain) - continue - software = software_helper.alias(software) logger.info("Fetching instances for domain='%s'", domain) federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name) @@ -572,7 +580,7 @@ def fetch_todon_wiki(args: argparse.Namespace) -> int: source_domain = "wiki.todon.eu" if sources.is_recent(source_domain): logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain) - return 0 + return 1 else: logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain) sources.update(source_domain) @@ -624,7 +632,13 @@ def fetch_todon_wiki(args: argparse.Namespace) -> int: logger.warning("Exception '%s' during fetching instances (fetch_cs) from blocked='%s'", type(exception), blocked) instances.set_last_error(blocked, exception) - if blocks.is_instance_blocked(blocker, blocked, block_level): + if not domain_helper.is_wanted(blocked): + logger.warning("blocked='%s' is not wanted - SKIPPED!", blocked) + continue + elif not domain_helper.is_wanted(blocker): + logger.warning("blocker='%s' is not wanted - SKIPPED!", blocker) + continue + elif blocks.is_instance_blocked(blocker, blocked, block_level): logger.debug("blocked='%s',block_level='%s' is already blocked - SKIPPED!", blocked, block_level) continue @@ -647,7 +661,7 @@ def fetch_todon_wiki(args: argparse.Namespace) -> int: logger.debug("Checking if blocker='%s' has pending updates ...", blocker) if instances.has_pending(blocker): logger.debug("Flushing updates for blocker='%s' ...", blocker) - instances.update_data(blocker) + instances.update(blocker) logger.debug("Success! - EXIT!") return 0 @@ -686,7 +700,7 @@ def fetch_cs(args: argparse.Namespace): source_domain = "raw.githubusercontent.com" if sources.is_recent(source_domain): logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain) - return 0 + return 1 else: logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain) sources.update(source_domain) @@ -754,7 +768,7 @@ def fetch_cs(args: argparse.Namespace): logger.debug("Checking if blocker='%s' has pending updates ...", blocker) if instances.has_pending(blocker): logger.debug("Flushing updates for blocker='%s' ...", blocker) - instances.update_data(blocker) + instances.update(blocker) logger.debug("Success! - EXIT!") return 0 @@ -768,30 +782,33 @@ def fetch_fba_rss(args: argparse.Namespace) -> int: locking.acquire() components = urlparse(args.feed) + domain = components.netloc.lower().split(":")[0] - if sources.is_recent(components.netloc): - logger.info("API from components.netloc='%s' has recently being accessed - EXIT!", components.netloc) + logger.debug("domain='%s'", domain) + if sources.is_recent(domain): + logger.info("API from domain='%s' has recently being accessed - EXIT!", domain) return 0 else: - logger.debug("components.netloc='%s' has not been recently used, marking ...", components.netloc) - sources.update(components.netloc) + logger.debug("domain='%s' has not been recently used, marking ...", domain) + sources.update(domain) logger.info("Fetch FBA-specific RSS args.feed='%s' ...", args.feed) response = utils.fetch_url(args.feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))) logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text)) - if response.ok and response.status_code < 300 and len(response.text) > 0: + if response.ok and response.status_code == 200 and len(response.text) > 0: logger.debug("Parsing RSS feed (%d Bytes) ...", len(response.text)) rss = atoma.parse_rss_bytes(response.content) logger.debug("rss[]='%s'", type(rss)) for item in rss.items: logger.debug("item[%s]='%s'", type(item), item) - domain = tidyup.domain(item.link.split("=")[1]) + domain = item.link.split("=")[1] + domain = tidyup.domain(domain) if domain not in[None, ""] else None logger.debug("domain='%s' - AFTER!", domain) - if domain == "": - logger.debug("domain is empty - SKIPPED!") + if domain in [None, ""]: + logger.debug("domain='%s' is empty after tidyup.domain() - SKIPPED!", domain) continue logger.debug("domain='%s' - BEFORE!", domain) @@ -837,22 +854,28 @@ def fetch_fbabot_atom(args: argparse.Namespace) -> int: locking.acquire() source_domain = "ryona.agency" + feed = f"https://{source_domain}/users/fba/feed.atom" + + logger.debug("args.feed[%s]='%s'", type(args.feed), args.feed) + if args.feed is not None and validators.url(args.feed): + logger.debug("Setting feed='%s' ...", args.feed) + feed = str(args.feed) + source_domain = urlparse(args.feed).netloc + if sources.is_recent(source_domain): logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain) - return 0 + return 1 else: logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain) sources.update(source_domain) - feed = f"https://{source_domain}/users/fba/feed.atom" - domains = list() logger.info("Fetching ATOM feed='%s' from FBA bot account ...", feed) response = utils.fetch_url(feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))) logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text)) - if response.ok and response.status_code < 300 and len(response.text) > 0: + if response.ok and response.status_code == 200 and len(response.text) > 0: logger.debug("Parsing ATOM feed (%d Bytes) ...", len(response.text)) atom = atoma.parse_atom_bytes(response.content) @@ -861,15 +884,16 @@ def fetch_fbabot_atom(args: argparse.Namespace) -> int: logger.debug("entry[]='%s'", type(entry)) doc = bs4.BeautifulSoup(entry.content.value, "html.parser") logger.debug("doc[]='%s'", type(doc)) + for element in doc.findAll("a"): logger.debug("element[]='%s'", type(element)) for href in element["href"].split(","): logger.debug("href[%s]='%s' - BEFORE!", type(href), href) - domain = tidyup.domain(href) + domain = tidyup.domain(href) if href not in [None, ""] else None logger.debug("domain='%s' - AFTER!", domain) - if domain == "": - logger.debug("domain is empty - SKIPPED!") + if domain in [None, ""]: + logger.debug("domain='%s' is empty after tidyup.domain() - SKIPPED!", domain) continue logger.debug("domain='%s' - BEFORE!", domain) @@ -922,15 +946,33 @@ def fetch_instances(args: argparse.Namespace) -> int: logger.debug("Invoking locking.acquire() ...") locking.acquire() + # Initialize values + domain = tidyup.domain(args.domain) + origin = software = None + + # Fetch record + database.cursor.execute("SELECT origin, software FROM instances WHERE domain = ? LIMIT 1", [args.domain]) + row = database.cursor.fetchone() + if row is not None: + origin = row["origin"] + software = row["software"] + + if software is None: + logger.warning("args.domain='%s' has no software detected. You can try to run ./fba.py update_nodeinfo --domain=%s --force to get it updated.", args.domain, args.domain) + return 102 + elif software_helper.is_relay(software): + logger.warning("args.domain='%s' is of software type '%s' which is not supported by this command. Please invoke fetch_relays instead.", args.domain, software) + return 103 + # Initial fetch try: - logger.info("Fetching instances from args.domain='%s' ...", args.domain) - federation.fetch_instances(args.domain, None, None, inspect.currentframe().f_code.co_name) + logger.info("Fetching instances from args.domain='%s',origin='%s',software='%s' ...", domain, origin, software) + federation.fetch_instances(domain, origin, software, inspect.currentframe().f_code.co_name) except network.exceptions as exception: logger.warning("Exception '%s' during fetching instances (fetch_instances) from args.domain='%s'", type(exception), args.domain) instances.set_last_error(args.domain, exception) - instances.update_data(args.domain) - return 100 + instances.update(args.domain) + return 104 if args.single: logger.debug("Not fetching more instances - EXIT!") @@ -938,7 +980,7 @@ def fetch_instances(args: argparse.Namespace) -> int: # Loop through some instances database.cursor.execute( - "SELECT domain, origin, software, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'lemmy', 'peertube', 'takahe', 'gotosocial', 'brighteon', 'wildebeest', 'bookwyrm', 'mitra', 'areionskey', 'mammuthus') AND (last_instance_fetch IS NULL OR last_instance_fetch < ?) ORDER BY rowid DESC", [time.time() - config.get("recheck_instance")] + "SELECT domain, origin, software FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'lemmy', 'peertube', 'takahe', 'gotosocial', 'brighteon', 'wildebeest', 'bookwyrm', 'mitra', 'areionskey', 'mammuthus', 'neodb') AND (last_instance_fetch IS NULL OR last_instance_fetch < ?) ORDER BY total_peers DESC, last_response_time ASC, last_updated ASC", [time.time() - config.get("recheck_instance")] ) rows = database.cursor.fetchall() @@ -958,8 +1000,8 @@ def fetch_instances(args: argparse.Namespace) -> int: continue try: - logger.info("Fetching instances for domain='%s',origin='%s',software='%s',nodeinfo_url='%s'", domain, row["origin"], row["software"], row["nodeinfo_url"]) - federation.fetch_instances(domain, row["origin"], row["software"], inspect.currentframe().f_code.co_name, row["nodeinfo_url"]) + logger.info("Fetching instances for domain='%s',origin='%s',software='%s' ...", domain, row["origin"], row["software"]) + federation.fetch_instances(domain, row["origin"], row["software"], inspect.currentframe().f_code.co_name) except network.exceptions as exception: logger.warning("Exception '%s' during fetching instances (fetch_instances) from domain='%s'", type(exception), domain) instances.set_last_error(domain, exception) @@ -967,6 +1009,27 @@ def fetch_instances(args: argparse.Namespace) -> int: logger.debug("Success - EXIT!") return 0 +def fetch_csv(args: argparse.Namespace) -> int: + logger.debug("args[]='%s' - CALLED!", type(args)) + + logger.debug("Invoking locking.acquire() ...") + locking.acquire() + + logger.info("Checking %d CSV files ...", len(blocklists.csv_files)) + for block in blocklists.csv_files: + logger.debug("block[blocker]='%s',block[csv_url]='%s'", block["blocker"], block["csv_url"]) + + # Is domain given and not equal blocker? + if isinstance(args.domain, str) and args.domain != block["blocker"]: + logger.debug("Skipping blocker='%s', not matching args.domain='%s'", block["blocker"], args.domain) + continue + + logger.debug("Invoking processing.csv_block(%s, %s, fetch_csv) ...", block["blocker"], block["csv_url"]) + processing.csv_block(block["blocker"], block["csv_url"], inspect.currentframe().f_code.co_name) + + logger.debug("Success - EXIT!") + return 0 + def fetch_oliphant(args: argparse.Namespace) -> int: logger.debug("args[]='%s' - CALLED!", type(args)) @@ -976,7 +1039,7 @@ def fetch_oliphant(args: argparse.Namespace) -> int: source_domain = "codeberg.org" if sources.is_recent(source_domain): logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain) - return 0 + return 1 else: logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain) sources.update(source_domain) @@ -984,178 +1047,18 @@ def fetch_oliphant(args: argparse.Namespace) -> int: # Base URL base_url = f"https://{source_domain}/oliphant/blocklists/raw/branch/main/blocklists" - # URLs to fetch - blocklists = ( - { - "blocker": "artisan.chat", - "csv_url": "mastodon/artisan.chat.csv", - },{ - "blocker": "mastodon.art", - "csv_url": "mastodon/mastodon.art.csv", - },{ - "blocker": "pleroma.envs.net", - "csv_url": "mastodon/pleroma.envs.net.csv", - },{ - "blocker": "oliphant.social", - "csv_url": "mastodon/_unified_tier3_blocklist.csv", - },{ - "blocker": "mastodon.online", - "csv_url": "mastodon/mastodon.online.csv", - },{ - "blocker": "mastodon.social", - "csv_url": "mastodon/mastodon.social.csv", - },{ - "blocker": "mastodon.social", - "csv_url": "other/missing-tier0-mastodon.social.csv", - },{ - "blocker": "rage.love", - "csv_url": "mastodon/rage.love.csv", - },{ - "blocker": "sunny.garden", - "csv_url": "mastodon/sunny.garden.csv", - },{ - "blocker": "sunny.garden", - "csv_url": "mastodon/gardenfence.csv", - },{ - "blocker": "solarpunk.moe", - "csv_url": "mastodon/solarpunk.moe.csv", - },{ - "blocker": "toot.wales", - "csv_url": "mastodon/toot.wales.csv", - },{ - "blocker": "union.place", - "csv_url": "mastodon/union.place.csv", - },{ - "blocker": "oliphant.social", - "csv_url": "mastodon/birdsite.csv", - } - ) - - domains = list() - - logger.debug("Downloading %d files ...", len(blocklists)) - for block in blocklists: + logger.debug("Downloading %d files ...", len(blocklists.oliphant_blocklists)) + for block in blocklists.oliphant_blocklists: # Is domain given and not equal blocker? + logger.debug("block[blocker]='%s',block[csv_url]='%s'", block["blocker"], block["csv_url"]) if isinstance(args.domain, str) and args.domain != block["blocker"]: logger.debug("Skipping blocker='%s', not matching args.domain='%s'", block["blocker"], args.domain) continue - elif args.domain in domains: - logger.debug("args.domain='%s' already handled - SKIPPED!", args.domain) - continue - - instances.set_last_blocked(block["blocker"]) - - # Fetch this URL - logger.info("Fetching csv_url='%s' for blocker='%s' ...", block["csv_url"], block["blocker"]) - response = utils.fetch_url(f"{base_url}/{block['csv_url']}", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))) - - logger.debug("response.ok='%s',response.status_code=%d,response.content()=%d", response.ok, response.status_code, len(response.content)) - if not response.ok or response.status_code >= 300 or response.content == "": - logger.warning("Could not fetch csv_url='%s' for blocker='%s' - SKIPPED!", block["csv_url"], block["blocker"]) - continue - - logger.debug("Fetched %d Bytes, parsing CSV ...", len(response.content)) - reader = csv.DictReader(response.content.decode("utf-8").splitlines(), dialect="unix") - - blockdict = list() - - cnt = 0 - for row in reader: - logger.debug("row[%s]='%s'", type(row), row) - domain = severity = None - reject_media = reject_reports = False - if "#domain" in row: - domain = row["#domain"] - elif "domain" in row: - domain = row["domain"] - else: - logger.debug("row='%s' does not contain domain column", row) - continue - - if "#severity" in row: - severity = blocks.alias_block_level(row["#severity"]) - elif "severity" in row: - severity = blocks.alias_block_level(row["severity"]) - else: - logger.debug("row='%s' does not contain severity column", row) - continue + url = f"{base_url}/{block['csv_url']}" - if "#reject_media" in row and row["#reject_media"].lower() == "true": - reject_media = True - elif "reject_media" in row and row["reject_media"].lower() == "true": - reject_media = True - - if "#reject_reports" in row and row["#reject_reports"].lower() == "true": - reject_reports = True - elif "reject_reports" in row and row["reject_reports"].lower() == "true": - reject_reports = True - - cnt = cnt + 1 - logger.debug("domain='%s',severity='%s',reject_media='%s',reject_reports='%s'", domain, severity, reject_media, reject_reports) - if domain == "": - logger.debug("domain is empty - SKIPPED!") - continue - elif domain.endswith(".onion"): - logger.debug("domain='%s' is a TOR .onion domain - SKIPPED", domain) - continue - elif domain.endswith(".arpa"): - logger.debug("domain='%s' is a reverse IP address - SKIPPED", domain) - continue - elif domain.endswith(".tld"): - logger.debug("domain='%s' is a fake domain - SKIPPED", domain) - continue - elif domain.find("*") >= 0 or domain.find("?") >= 0: - logger.debug("domain='%s' is obfuscated - Invoking utils.deobfuscate(%s, %s) ...", domain, domain, block["blocker"]) - domain = utils.deobfuscate(domain, block["blocker"]) - logger.debug("domain='%s' - AFTER!", domain) - - if not validators.domain(domain): - logger.debug("domain='%s' is not a valid domain - SKIPPED!") - continue - elif blacklist.is_blacklisted(domain): - logger.warning("domain='%s' is blacklisted - SKIPPED!", domain) - continue - elif blocks.is_instance_blocked(block["blocker"], domain, severity): - logger.debug("block[blocker]='%s' has already blocked domain='%s' with severity='%s' - SKIPPED!", block["blocker"], domain, severity) - continue - - logger.debug("Marking domain='%s' as handled", domain) - domains.append(domain) - - logger.debug("Processing domain='%s' ...", domain) - processed = processing.domain(domain, block["blocker"], inspect.currentframe().f_code.co_name) - logger.debug("processed='%s'", processed) - - if processing.block(block["blocker"], domain, None, severity) and config.get("bot_enabled"): - logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", domain, block["block_level"], block["blocker"]) - blockdict.append({ - "blocked": domain, - "reason" : block["reason"], - }) - - if reject_media: - processing.block(block["blocker"], domain, None, "reject_media") - if reject_reports: - processing.block(block["blocker"], domain, None, "reject_reports") - - logger.debug("block[blocker]='%s'", block["blocker"]) - if block["blocker"] != "chaos.social": - logger.debug("Invoking instances.set_total_blocks(%s, domains()=%d) ...", block["blocker"], len(domains)) - instances.set_total_blocks(block["blocker"], domains) - - logger.debug("Checking if blocker='%s' has pending updates ...", block["blocker"]) - if instances.has_pending(block["blocker"]): - logger.debug("Flushing updates for block[blocker]='%s' ...", block["blocker"]) - instances.update_data(block["blocker"]) - - logger.debug("Invoking commit() ...") - database.connection.commit() - - logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict)) - if config.get("bot_enabled") and len(blockdict) > 0: - logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", block["blocker"], len(blockdict)) - network.send_bot_post(block["blocker"], blockdict) + logger.debug("Invoking processing.csv_block(%s, %s, fetch_oliphant) ...", block["blocker"], url) + processing.csv_block(block["blocker"], url, inspect.currentframe().f_code.co_name) logger.debug("Success! - EXIT!") return 0 @@ -1166,30 +1069,24 @@ def fetch_txt(args: argparse.Namespace) -> int: logger.debug("Invoking locking.acquire() ...") locking.acquire() - # Static URLs - urls = ({ - "blocker": "seirdy.one", - "url" : "https://seirdy.one/pb/bsl.txt", - },) - - logger.info("Checking %d text file(s) ...", len(urls)) - for row in urls: + logger.info("Checking %d text file(s) ...", len(blocklists.txt_files)) + for row in blocklists.txt_files: logger.debug("Fetching row[url]='%s' ...", row["url"]) response = utils.fetch_url(row["url"], network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))) logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text)) - if response.ok and response.status_code < 300 and response.text != "": + if response.ok and response.status_code == 200 and response.text != "": logger.debug("Returned %d Bytes for processing", len(response.text.strip())) - domains = response.text.split("\n") + domains = response.text.strip().split("\n") logger.info("Processing %d domains ...", len(domains)) for domain in domains: logger.debug("domain='%s' - BEFORE!", domain) - domain = tidyup.domain(domain) + domain = tidyup.domain(domain) if domain not in[None, ""] else None logger.debug("domain='%s' - AFTER!", domain) - if domain == "": - logger.debug("domain is empty - SKIPPED!") + if domain in [None, ""]: + logger.debug("domain='%s' is empty after tidyup.domain() - SKIPPED!", domain) continue elif not domain_helper.is_wanted(domain): logger.debug("domain='%s' is not wanted - SKIPPED!", domain) @@ -1199,7 +1096,7 @@ def fetch_txt(args: argparse.Namespace) -> int: continue logger.debug("Processing domain='%s',row[blocker]='%s'", domain, row["blocker"]) - processed = processing.domain(domain, row["blocker"], inspect.currentframe().f_code.co_name) + processed = processing.instance(domain, row["blocker"], inspect.currentframe().f_code.co_name) logger.debug("processed='%s'", processed) if not processed: @@ -1218,7 +1115,7 @@ def fetch_fedipact(args: argparse.Namespace) -> int: source_domain = "fedipact.online" if sources.is_recent(source_domain): logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain) - return 0 + return 1 else: logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain) sources.update(source_domain) @@ -1231,7 +1128,7 @@ def fetch_fedipact(args: argparse.Namespace) -> int: ) logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text)) - if response.ok and response.status_code < 300 and response.text != "": + if response.ok and response.status_code == 200 and response.text != "": logger.debug("Parsing %d Bytes ...", len(response.text)) doc = bs4.BeautifulSoup(response.text, "html.parser") @@ -1241,11 +1138,11 @@ def fetch_fedipact(args: argparse.Namespace) -> int: logger.info("Checking %d row(s) ...", len(rows)) for row in rows: logger.debug("row[]='%s'", type(row)) - domain = tidyup.domain(row.contents[0]) + domain = tidyup.domain(row.contents[0]) if row.contents[0] not in [None, ""] else None logger.debug("domain='%s' - AFTER!", domain) - if domain == "": - logger.debug("domain is empty - SKIPPED!") + if domain in [None, ""]: + logger.debug("domain='%s' is empty after tidyup.domain() - SKIPPED!", domain) continue logger.debug("domain='%s' - BEFORE!", domain) @@ -1277,7 +1174,7 @@ def fetch_joinmobilizon(args: argparse.Namespace) -> int: source_domain = "instances.joinmobilizon.org" if sources.is_recent(source_domain): logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain) - return 0 + return 1 else: logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain) sources.update(source_domain) @@ -1325,7 +1222,7 @@ def fetch_joinmisskey(args: argparse.Namespace) -> int: source_domain = "instanceapp.misskey.page" if sources.is_recent(source_domain): logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain) - return 0 + return 1 else: logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain) sources.update(source_domain) @@ -1364,185 +1261,6 @@ def fetch_joinmisskey(args: argparse.Namespace) -> int: logger.debug("Success! - EXIT!") return 0 -def fetch_joinfediverse(args: argparse.Namespace) -> int: - logger.debug("args[]='%s' - CALLED!", type(args)) - - logger.debug("Invoking locking.acquire() ...") - locking.acquire() - - source_domain = "joinfediverse.wiki" - if sources.is_recent(source_domain): - logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain) - return 0 - else: - logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain) - sources.update(source_domain) - - logger.info("Fetching /FediBlock wiki page from source_domain='%s' ...", source_domain) - raw = utils.fetch_url( - f"https://{source_domain}/FediBlock", - network.web_headers, - (config.get("connection_timeout"), config.get("read_timeout")) - ).text - logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw)) - - doc = bs4.BeautifulSoup(raw, "html.parser") - logger.debug("doc[]='%s'", type(doc)) - - tables = doc.findAll("table", {"class": "wikitable"}) - - logger.info("Analyzing %d table(s) ...", len(tables)) - blocklist = list() - for table in tables: - logger.debug("table[]='%s'", type(table)) - - rows = table.findAll("tr") - logger.info("Checking %d row(s) ...", len(rows)) - block_headers = dict() - for row in rows: - logger.debug("row[%s]='%s'", type(row), row) - - headers = row.findAll("th") - logger.debug("Found headers()=%d header(s)", len(headers)) - if len(headers) > 1: - block_headers = dict() - cnt = 0 - for header in headers: - cnt = cnt + 1 - logger.debug("header[]='%s',cnt=%d", type(header), cnt) - text = header.contents[0] - - logger.debug("text[]='%s'", type(text)) - if not isinstance(text, str): - logger.debug("text[]='%s' is not of type 'str' - SKIPPED!", type(text)) - continue - elif validators.domain(text.strip()): - logger.debug("text='%s' is a domain - SKIPPED!", text.strip()) - continue - - text = tidyup.domain(text.strip()) - logger.debug("text='%s' - AFTER!", text) - if text in ["domain", "instance", "subdomain(s)", "block reason(s)"]: - logger.debug("Found header: '%s'=%d", text, cnt) - block_headers[cnt] = text - - elif len(block_headers) == 0: - logger.debug("row is not scrapable - SKIPPED!") - continue - elif len(block_headers) > 0: - logger.debug("Found a row with %d scrapable headers ...", len(block_headers)) - cnt = 0 - block = dict() - - for element in row.find_all(["th", "td"]): - cnt = cnt + 1 - logger.debug("element[]='%s',cnt=%d", type(element), cnt) - if cnt in block_headers: - logger.debug("block_headers[%d]='%s'", cnt, block_headers[cnt]) - - text = element.text.strip() - key = block_headers[cnt] if block_headers[cnt] not in ["domain", "instance"] else "blocked" - - logger.debug("cnt=%d is wanted: key='%s',text[%s]='%s'", cnt, key, type(text), text) - if key in ["domain", "instance"]: - block[key] = text - elif key == "reason": - block[key] = tidyup.reason(text) - elif key == "subdomain(s)": - block[key] = list() - if text != "": - block[key] = text.split("/") - else: - logger.debug("key='%s'", key) - block[key] = text - - logger.debug("block()=%d ...", len(block)) - if len(block) > 0: - logger.debug("Appending block()=%d ...", len(block)) - blocklist.append(block) - - logger.debug("blocklist()=%d", len(blocklist)) - - database.cursor.execute("SELECT domain FROM instances WHERE domain LIKE 'climatejustice.%'") - domains = database.cursor.fetchall() - - logger.debug("domains(%d)[]='%s'", len(domains), type(domains)) - blocking = list() - for block in blocklist: - logger.debug("block='%s'", block) - if "subdomain(s)" in block and len(block["subdomain(s)"]) > 0: - origin = block["blocked"] - logger.debug("origin='%s'", origin) - for subdomain in block["subdomain(s)"]: - block["blocked"] = subdomain + "." + origin - logger.debug("block[blocked]='%s'", block["blocked"]) - blocking.append(block) - else: - blocking.append(block) - - logger.debug("blocking()=%d", blocking) - for block in blocking: - logger.debug("block[]='%s'", type(block)) - if "blocked" not in block: - raise KeyError(f"block()={len(block)} does not have element 'blocked'") - - block["blocked"] = tidyup.domain(block["blocked"]).encode("idna").decode("utf-8") - logger.debug("block[blocked]='%s' - AFTER!", block["blocked"]) - - if block["blocked"] == "": - logger.debug("block[blocked] is empty - SKIPPED!") - continue - elif not domain_helper.is_wanted(block["blocked"]): - logger.debug("block[blocked]='%s' is not wanted - SKIPPED!", block["blocked"]) - continue - elif instances.is_recent(block["blocked"]): - logger.debug("block[blocked]='%s' has been recently checked - SKIPPED!", block["blocked"]) - continue - - logger.debug("Proccessing blocked='%s' ...", block["blocked"]) - processing.domain(block["blocked"], "climatejustice.social", inspect.currentframe().f_code.co_name) - - blockdict = list() - for blocker in domains: - blocker = blocker[0] - logger.debug("blocker[%s]='%s'", type(blocker), blocker) - instances.set_last_blocked(blocker) - - for block in blocking: - logger.debug("block[blocked]='%s',block[block reason(s)]='%s' - BEFORE!", block["blocked"], block["block reason(s)"] if "block reason(s)" in block else None) - block["reason"] = tidyup.reason(block["block reason(s)"]) if "block reason(s)" in block else None - - logger.debug("block[blocked]='%s',block[reason]='%s' - AFTER!", block["blocked"], block["reason"]) - if block["blocked"] == "": - logger.debug("block[blocked] is empty - SKIPPED!") - continue - elif not domain_helper.is_wanted(block["blocked"]): - logger.debug("blocked='%s' is not wanted - SKIPPED!", block["blocked"]) - continue - - logger.debug("blocked='%s',reason='%s'", block["blocked"], block["reason"]) - if processing.block(blocker, block["blocked"], block["reason"], "reject") and config.get("bot_enabled"): - logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], blocker) - blockdict.append({ - "blocked": block["blocked"], - "reason" : block["reason"], - }) - - if instances.has_pending(blocker): - logger.debug("Flushing updates for blocker='%s' ...", blocker) - instances.update_data(blocker) - - logger.debug("Invoking commit() ...") - database.connection.commit() - - logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict)) - if config.get("bot_enabled") and len(blockdict) > 0: - logger.info("Sending bot POST for blocker='%s,blockdict()=%d ...", blocker, len(blockdict)) - network.send_bot_post(blocker, blockdict) - - logger.debug("Success! - EXIT!") - return 0 - def recheck_obfuscation(args: argparse.Namespace) -> int: logger.debug("args[]='%s' - CALLED!", type(args)) @@ -1550,11 +1268,11 @@ def recheck_obfuscation(args: argparse.Namespace) -> int: locking.acquire() if isinstance(args.domain, str) and args.domain != "" and domain_helper.is_wanted(args.domain): - database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1 AND domain = ?", [args.domain]) + database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE (has_obfuscation = 1 OR has_obfuscation IS NULL) AND domain = ?", [args.domain]) elif isinstance(args.software, str) and args.software != "" and validators.domain(args.software) == args.software: - database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1 AND software = ?", [args.software]) + database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE (has_obfuscation = 1 OR has_obfuscation IS NULL) AND software = ?", [args.software]) else: - database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1") + database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1 OR has_obfuscation IS NULL") rows = database.cursor.fetchall() logger.info("Checking %d domains ...", len(rows)) @@ -1563,29 +1281,37 @@ def recheck_obfuscation(args: argparse.Namespace) -> int: if (args.force is None or not args.force) and args.domain is None and args.software is None and instances.is_recent(row["domain"], "last_blocked"): logger.debug("row[domain]='%s' has been recently checked, args.force[]='%s' - SKIPPED!", row["domain"], type(args.force)) continue + elif blacklist.is_blacklisted(row["domain"]): + logger.warning("row[domain]='%s' is blacklisted - SKIPPED!", row["domain"]) + continue - blocking = list() - if row["software"] == "pleroma": - logger.debug("domain='%s',software='%s'", row["domain"], row["software"]) - blocking = pleroma.fetch_blocks(row["domain"], row["nodeinfo_url"]) - elif row["software"] == "mastodon": - logger.debug("domain='%s',software='%s'", row["domain"], row["software"]) - blocking = mastodon.fetch_blocks(row["domain"], row["nodeinfo_url"]) - elif row["software"] == "lemmy": - logger.debug("domain='%s',software='%s'", row["domain"], row["software"]) - blocking = lemmy.fetch_blocks(row["domain"], row["nodeinfo_url"]) - elif row["software"] == "friendica": - logger.debug("domain='%s',software='%s'", row["domain"], row["software"]) - blocking = friendica.fetch_blocks(row["domain"]) - elif row["software"] == "misskey": - logger.debug("domain='%s',software='%s'", row["domain"], row["software"]) - blocking = misskey.fetch_blocks(row["domain"]) - else: - logger.warning("Unknown software: domain='%s',software='%s'", row["domain"], row["software"]) + logger.debug("Invoking federation.fetch_blocks(%s) ...", row["domain"]) + blocking = federation.fetch_blocks(row["domain"]) + + logger.debug("blocking()=%d", len(blocking)) + if len(blocking) == 0: + logger.debug("Empty blocking list, trying individual fetch_blocks() for row[software]='%s' ...", row["software"]) + if row["software"] == "pleroma": + logger.debug("domain='%s',software='%s'", row["domain"], row["software"]) + blocking = pleroma.fetch_blocks(row["domain"]) + elif row["software"] == "mastodon": + logger.debug("domain='%s',software='%s'", row["domain"], row["software"]) + blocking = mastodon.fetch_blocks(row["domain"]) + elif row["software"] == "lemmy": + logger.debug("domain='%s',software='%s'", row["domain"], row["software"]) + blocking = lemmy.fetch_blocks(row["domain"]) + elif row["software"] == "friendica": + logger.debug("domain='%s',software='%s'", row["domain"], row["software"]) + blocking = friendica.fetch_blocks(row["domain"]) + elif row["software"] == "misskey": + logger.debug("domain='%s',software='%s'", row["domain"], row["software"]) + blocking = misskey.fetch_blocks(row["domain"]) + else: + logger.warning("Unknown software: domain='%s',software='%s'", row["domain"], row["software"]) + # c.s isn't part of oliphant's "hidden" blocklists logger.debug("row[domain]='%s'", row["domain"]) - # chaos.social requires special care ... - if row["domain"] != "chaos.social": + if row["domain"] != "chaos.social" and row["software"] is not None and not software_helper.is_relay(row["software"]) and not blocklists.has(row["domain"]): logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", row["domain"], len(blocking)) instances.set_last_blocked(row["domain"]) instances.set_total_blocks(row["domain"], blocking) @@ -1601,19 +1327,22 @@ def recheck_obfuscation(args: argparse.Namespace) -> int: if block["blocked"] == "": logger.debug("block[blocked] is empty - SKIPPED!") continue + elif block["blocked"].endswith(".onion"): + logger.debug("blocked='%s' is a TOR onion domain name - SKIPPED!", block["blocked"]) + continue + elif block["blocked"].endswith(".i2p") and config.get("allow_i2p_domain") == "true": + logger.debug("blocked='%s' is an I2P onion domain name - SKIPPED!", block["blocked"]) + continue elif block["blocked"].endswith(".arpa"): logger.debug("blocked='%s' is a reversed IP address - SKIPPED!", block["blocked"]) continue elif block["blocked"].endswith(".tld"): logger.debug("blocked='%s' is a fake domain name - SKIPPED!", block["blocked"]) continue - elif block["blocked"].endswith(".onion"): - logger.debug("blocked='%s' is a TOR onion domain name - SKIPPED!", block["blocked"]) - continue elif block["blocked"].find("*") >= 0 or block["blocked"].find("?") >= 0: logger.debug("block='%s' is obfuscated.", block["blocked"]) obfuscated = obfuscated + 1 - blocked = utils.deobfuscate(block["blocked"], row["domain"], block["hash"] if "hash" in block else None) + blocked = utils.deobfuscate(block["blocked"], row["domain"], block["digest"] if "digest" in block else None) elif not domain_helper.is_wanted(block["blocked"]): logger.debug("blocked='%s' is not wanted - SKIPPED!", block["blocked"]) continue @@ -1626,12 +1355,15 @@ def recheck_obfuscation(args: argparse.Namespace) -> int: logger.debug("blocked='%s' was deobfuscated to blocked='%s'", block["blocked"], blocked) obfuscated = obfuscated - 1 - if blocks.is_instance_blocked(row["domain"], blocked): - logger.debug("blocked='%s' is already blocked by domain='%s' - SKIPPED!", blocked, row["domain"]) - continue - elif blacklist.is_blacklisted(blocked): + if blacklist.is_blacklisted(blocked): logger.debug("blocked='%s' is blacklisted - SKIPPED!", blocked) continue + elif blacklist.is_blacklisted(row["domain"]): + logger.debug("row[domain]='%s' is blacklisted - SKIPPED!", row["domain"]) + continue + elif blocks.is_instance_blocked(row["domain"], blocked): + logger.debug("blocked='%s' is already blocked by domain='%s' - SKIPPED!", blocked, row["domain"]) + continue block["block_level"] = blocks.alias_block_level(block["block_level"]) @@ -1643,17 +1375,14 @@ def recheck_obfuscation(args: argparse.Namespace) -> int: "reason" : block["reason"], }) - logger.debug("Settings obfuscated=%d for row[domain]='%s' ...", obfuscated, row["domain"]) + logger.debug("Setting obfuscated=%d for row[domain]='%s' ...", obfuscated, row["domain"]) + instances.set_has_obfuscation(row["domain"], (obfuscated > 0)) instances.set_obfuscated_blocks(row["domain"], obfuscated) logger.info("domain='%s' has %d obfuscated domain(s)", row["domain"], obfuscated) - if obfuscated == 0 and len(blocking) > 0: - logger.info("Block list from domain='%s' has been fully deobfuscated.", row["domain"]) - instances.set_has_obfuscation(row["domain"], False) - if instances.has_pending(row["domain"]): logger.debug("Flushing updates for blocker='%s' ...", row["domain"]) - instances.update_data(row["domain"]) + instances.update(row["domain"]) logger.debug("Invoking commit() ...") database.connection.commit() @@ -1675,7 +1404,7 @@ def fetch_fedilist(args: argparse.Namespace) -> int: source_domain = "demo.fedilist.com" if sources.is_recent(source_domain): logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain) - return 0 + return 1 else: logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain) sources.update(source_domain) @@ -1694,7 +1423,7 @@ def fetch_fedilist(args: argparse.Namespace) -> int: ) logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text)) - if not response.ok or response.status_code >= 300 or len(response.content) == 0: + if not response.ok or response.status_code > 200 or len(response.content) == 0: logger.warning("Failed fetching url='%s': response.ok='%s',response.status_code=%d,response.content()=%d - EXIT!", url, response.ok, response.status_code, len(response.text)) return 1 @@ -1715,11 +1444,11 @@ def fetch_fedilist(args: argparse.Namespace) -> int: continue logger.debug("row[hostname]='%s' - BEFORE!", row["hostname"]) - domain = tidyup.domain(row["hostname"]) + domain = tidyup.domain(row["hostname"]) if row["hostname"] not in [None, ""] else None logger.debug("domain='%s' - AFTER!", domain) - if domain == "": - logger.debug("domain is empty after tidyup: row[hostname]='%s' - SKIPPED!", row["hostname"]) + if domain in [None, ""]: + logger.debug("domain='%s' is empty after tidyup.domain(): row[hostname]='%s' - SKIPPED!", domain, row["hostname"]) continue logger.debug("domain='%s' - BEFORE!", domain) @@ -1750,13 +1479,28 @@ def update_nodeinfo(args: argparse.Namespace) -> int: if args.domain is not None and args.domain != "": logger.debug("Fetching args.domain='%s'", args.domain) - database.cursor.execute("SELECT domain, software FROM instances WHERE domain = ?", [args.domain]) + database.cursor.execute("SELECT domain, software FROM instances WHERE domain = ? LIMIT 1", [args.domain]) elif args.software is not None and args.software != "": logger.info("Fetching domains for args.software='%s'", args.software) - database.cursor.execute("SELECT domain, software FROM instances WHERE software = ?", [args.software]) + database.cursor.execute("SELECT domain, software FROM instances WHERE software = ? ORDER BY last_updated ASC", [args.software]) + elif args.mode is not None and args.mode != "": + logger.info("Fetching domains for args.mode='%s'", args.mode.upper()) + database.cursor.execute("SELECT domain, software FROM instances WHERE detection_mode = ? ORDER BY last_updated ASC", [args.mode]) + elif args.no_software: + logger.info("Fetching domains with no software type detected ...") + database.cursor.execute("SELECT domain, software FROM instances WHERE software IS NULL ORDER BY last_updated ASC") + elif args.with_software: + logger.info("Fetching domains with any software type detected ...") + database.cursor.execute("SELECT domain, software FROM instances WHERE software IS NOT NULL ORDER BY last_updated ASC") + elif args.no_auto: + logger.info("Fetching domains with other detection mode than AUTO_DISOVERY being set ...") + database.cursor.execute("SELECT domain, software FROM instances WHERE detection_mode IS NOT NULL AND detection_mode != 'AUTO_DISCOVERY' ORDER BY last_updated ASC") + elif args.no_detection: + logger.info("Fetching domains with no detection mode being set ...") + database.cursor.execute("SELECT domain, software FROM instances WHERE detection_mode IS NULL ORDER BY last_updated ASC") else: logger.info("Fetching domains for recently updated ...") - database.cursor.execute("SELECT domain, software FROM instances WHERE last_nodeinfo < ? OR last_nodeinfo IS NULL", [time.time() - config.get("recheck_nodeinfo")]) + database.cursor.execute("SELECT domain, software FROM instances ORDER BY last_updated ASC") domains = database.cursor.fetchall() @@ -1767,6 +1511,9 @@ def update_nodeinfo(args: argparse.Namespace) -> int: if not args.force and instances.is_recent(row["domain"], "last_nodeinfo"): logger.debug("row[domain]='%s' has been recently checked - SKIPPED!", row["domain"]) continue + elif blacklist.is_blacklisted(row["domain"]): + logger.debug("row[domain]='%s' is blacklisted - SKIPPED!", row["domain"]) + continue try: logger.info("Checking nodeinfo for row[domain]='%s',row[software]='%s' (%s%%) ...", row["domain"], row["software"], "{:5.1f}".format(cnt / len(domains) * 100)) @@ -1774,6 +1521,11 @@ def update_nodeinfo(args: argparse.Namespace) -> int: logger.debug("Determined software='%s'", software) if (software != row["software"] and software is not None) or args.force is True: + logger.debug("software='%s'", software) + if software is None: + logger.debug("Setting nodeinfo_url to 'None' for row[domain]='%s' ...", row["domain"]) + instances.set_nodeinfo_url(row["domain"], None) + logger.warning("Software type for row[domain]='%s' has changed from '%s' to '%s'!", row["domain"], row["software"], software) instances.set_software(row["domain"], software) @@ -1785,7 +1537,7 @@ def update_nodeinfo(args: argparse.Namespace) -> int: instances.set_last_error(row["domain"], exception) instances.set_last_nodeinfo(row["domain"]) - instances.update_data(row["domain"]) + instances.update(row["domain"]) cnt = cnt + 1 logger.debug("Success! - EXIT!") @@ -1804,7 +1556,7 @@ def fetch_instances_social(args: argparse.Namespace) -> int: return 1 elif sources.is_recent(source_domain): logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain) - return 0 + return 2 else: logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain) sources.update(source_domain) @@ -1817,10 +1569,10 @@ def fetch_instances_social(args: argparse.Namespace) -> int: fetched = network.get_json_api( source_domain, "/api/1.0/instances/list?count=0&sort_by=name", - headers, - (config.get("connection_timeout"), config.get("read_timeout")) + headers=headers, + timeout=(config.get("connection_timeout"), config.get("read_timeout")) ) - logger.debug("fetched[]='%s'", type(fetched)) + logger.debug("fetched(%d)[]='%s'", len(fetched), type(fetched)) if "error_message" in fetched: logger.warning("Error during fetching API result: '%s' - EXIT!", fetched["error_message"]) @@ -1841,11 +1593,80 @@ def fetch_instances_social(args: argparse.Namespace) -> int: logger.info("Checking %d row(s) ...", len(rows)) for row in rows: logger.debug("row[]='%s'", type(row)) - domain = tidyup.domain(row["name"]) + domain = tidyup.domain(row["name"]) if row["name"] not in [None, ""] else None + logger.debug("domain='%s' - AFTER!", domain) + + if domain is None and domain == "": + logger.debug("domain='%s' is empty after tidyup.domain() - SKIPPED!", domain) + continue + + logger.debug("domain='%s' - BEFORE!", domain) + domain = domain.encode("idna").decode("utf-8") + logger.debug("domain='%s' - AFTER!", domain) + + if not domain_helper.is_wanted(domain): + logger.debug("domain='%s' is not wanted - SKIPPED!", domain) + continue + elif domain in domains: + logger.debug("domain='%s' is already added - SKIPPED!", domain) + continue + elif instances.is_registered(domain): + logger.debug("domain='%s' is already registered - SKIPPED!", domain) + continue + elif instances.is_recent(domain): + logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain) + continue + + logger.info("Fetching instances from domain='%s'", domain) + federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name) + + logger.debug("Success! - EXIT!") + return 0 + +def fetch_relaylist(args: argparse.Namespace) -> int: + logger.debug("args[]='%s' - CALLED!", type(args)) + + logger.debug("Invoking locking.acquire() ...") + locking.acquire() + + source_domain = "api.relaylist.com" + + if sources.is_recent(source_domain): + logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain) + return 1 + else: + logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain) + sources.update(source_domain) + + logger.info("Fetching list from source_domain='%s' ...", source_domain) + fetched = network.get_json_api( + source_domain, + "/relays", + {}, + (config.get("connection_timeout"), config.get("read_timeout")) + ) + logger.debug("fetched(%d)[]='%s'", len(fetched), type(fetched)) + + if "error_message" in fetched: + logger.warning("Error during fetching API result: '%s' - EXIT!", fetched["error_message"]) + return 2 + elif "exception" in fetched: + logger.warning("Exception '%s' during fetching API result - EXIT!", type(fetched["exception"])) + return 3 + elif "json" not in fetched: + logger.warning("fetched has no element 'json' - EXIT!") + return 4 + + domains = list() + + logger.info("Checking %d row(s) ...", len(fetched["json"])) + for row in fetched["json"]: + logger.debug("row[]='%s'", type(row)) + domain = urlparse(row["url"]).netloc.lower().split(":")[0] logger.debug("domain='%s' - AFTER!", domain) - if domain == "": - logger.debug("domain is empty - SKIPPED!") + if domain is None and domain == "": + logger.debug("domain='%s' is empty after tidyup.domain() - SKIPPED!", domain) continue logger.debug("domain='%s' - BEFORE!", domain) @@ -1878,9 +1699,11 @@ def fetch_relays(args: argparse.Namespace) -> int: locking.acquire() if args.domain is not None and args.domain != "": - database.cursor.execute("SELECT domain, software FROM instances WHERE software IN ('activityrelay', 'aoderelay', 'selective-relay') AND domain = ? LIMIT 1", [args.domain]) + database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE software IN ('activityrelay', 'aoderelay', 'selective-relay', 'pub-relay') AND domain = ? LIMIT 1", [args.domain]) + elif args.software is not None and args.software != "": + database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE software IN ('activityrelay', 'aoderelay', 'selective-relay', 'pub-relay') AND software = ?", [args.software]) else: - database.cursor.execute("SELECT domain, software FROM instances WHERE software IN ('activityrelay', 'aoderelay', 'selective-relay')") + database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE software IN ('activityrelay', 'aoderelay', 'selective-relay', 'pub-relay')") domains = list() rows = database.cursor.fetchall() @@ -1888,29 +1711,57 @@ def fetch_relays(args: argparse.Namespace) -> int: logger.info("Checking %d relays ...", len(rows)) for row in rows: logger.debug("row[domain]='%s',row[software]='%s' ...", row["domain"], row["software"]) - peers = list() if not args.force and instances.is_recent(row["domain"]): logger.debug("row[domain]='%s' has been recently fetched - SKIPPED!", row["domain"]) continue + peers = list() try: - logger.info("Fetching / from relay row[domain]='%s',row[software]='%s' ...", row["domain"], row["software"]) - raw = utils.fetch_url( - f"https://{row['domain']}", - network.web_headers, - (config.get("connection_timeout"), config.get("read_timeout")) - ).text - logger.debug("raw[%s]()=%d", type(raw), len(raw)) + if row["software"] == "pub-relay": + logger.info("Fetching row[nodeinfo_url]='%s' from relay row[domain]='%s',row[software]='%s' ...", row["nodeinfo_url"], row["domain"], row["software"]) + raw = network.fetch_api_url( + row["nodeinfo_url"], + (config.get("connection_timeout"), config.get("read_timeout")) + ) + + logger.debug("raw[%s]()=%d", type(raw), len(raw)) + if "exception" in raw: + logger.warning("row[domain]='%s' has caused an exception: '%s' - raising again ...", row["domain"], type(raw["exception"])) + raise raw["exception"] + elif "error_message" in raw: + logger.warning("row[domain]='%s' has caused error message: '%s' - SKIPPED!", row["domain"], raw["error_message"]) + instances.set_last_error(row["domain"], raw) + instances.set_last_instance_fetch(row["domain"]) + instances.update(row["domain"]) + continue + elif "json" not in raw: + logger.warning("raw()=%d does not contain key 'json' in response - SKIPPED!", len(raw)) + continue + elif not "metadata" in raw["json"]: + logger.warning("raw[json]()=%d does not contain key 'json' in response - SKIPPED!", len(raw["json"])) + continue + elif not "peers" in raw["json"]["metadata"]: + logger.warning("raw[json][metadata()=%d does not contain key 'json' in response - SKIPPED!", len(raw["json"]["metadata"])) + continue + else: + logger.info("Fetching / from relay row[domain]='%s',row[software]='%s' ...", row["domain"], row["software"]) + raw = utils.fetch_url( + f"https://{row['domain']}", + network.web_headers, + (config.get("connection_timeout"), config.get("read_timeout")) + ).text + logger.debug("raw[%s]()=%d", type(raw), len(raw)) + + doc = bs4.BeautifulSoup(raw, features="html.parser") + logger.debug("doc[]='%s'", type(doc)) + except network.exceptions as exception: logger.warning("Exception '%s' during fetching from relay '%s': '%s'", type(exception), row["domain"], str(exception)) instances.set_last_error(row["domain"], exception) instances.set_last_instance_fetch(row["domain"]) - instances.update_data(row["domain"]) + instances.update(row["domain"]) continue - doc = bs4.BeautifulSoup(raw, features="html.parser") - logger.debug("doc[]='%s'", type(doc)) - logger.debug("row[software]='%s'", row["software"]) if row["software"] == "activityrelay": logger.debug("Checking row[domain]='%s' ...", row["domain"]) @@ -1939,16 +1790,17 @@ def fetch_relays(args: argparse.Namespace) -> int: continue logger.debug("domain='%s' - BEFORE!", domain) - domain = tidyup.domain(domain) + domain = tidyup.domain(domain) if domain not in[None, ""] else None logger.debug("domain='%s' - AFTER!", domain) - if domain == "": - logger.debug("Empty domain after tidyup.domain() from origin='%s' - SKIPPED!", row["domain"]) + if domain in [None, ""]: + logger.debug("domain='%s' is empty after tidyup.domain() from origin='%s' - SKIPPED!", domain, row["domain"]) continue elif domain not in peers: logger.debug("Appending domain='%s' to peers list for relay='%s' ...", domain, row["domain"]) peers.append(domain) + logger.debug("domains()=%d,domain='%s'", len(domains), domain) if dict_helper.has_key(domains, "domain", domain): logger.debug("domain='%s' already added", domain) continue @@ -1971,39 +1823,62 @@ def fetch_relays(args: argparse.Namespace) -> int: link = tag.find("a") logger.debug("link[%s]='%s'", type(link), link) - if link is None: - logger.warning("tag='%s' has no a-tag ...", tag) + if not isinstance(link, bs4.element.Tag): + logger.warning("tag[%s]='%s' is not type of 'bs4.element.Tag' - SKIPPED!", type(tag), tag) continue - components = urlparse(link["href"]) - domain = components.netloc.lower() + components = urlparse(link.get("href")) + logger.debug("components(%d)='%s'", len(components), components) + domain = components.netloc.lower().split(":")[0] - if not domain_helper.is_wanted(domain): - logger.debug("domain='%s' is not wanted - SKIPPED!", domain) + logger.debug("domain='%s' - BEFORE!", domain) + domain = tidyup.domain(domain) if domain not in[None, ""] else None + logger.debug("domain='%s' - AFTER!", domain) + + if domain in [None, ""]: + logger.debug("domain='%s' is empty after tidyup.domain() from origin='%s' - SKIPPED!", domain, row["domain"]) + continue + elif domain not in peers: + logger.debug("Appending domain='%s' to peers list for relay='%s' ...", domain, row["domain"]) + peers.append(domain) + + logger.debug("domains()=%d,domain='%s'", len(domains), domain) + if dict_helper.has_key(domains, "domain", domain): + logger.debug("domain='%s' already added", domain) continue + logger.debug("Appending domain='%s',origin='%s',software='%s'", domain, row["domain"], row["software"]) + domains.append({ + "domain": domain, + "origin": row["domain"], + }) + elif row["software"] == "pub-relay": + logger.debug("Checking %d peer(s) row[domain]='%s' ...", len(raw["json"]["metadata"]["peers"]), row["domain"]) + for domain in raw["json"]["metadata"]["peers"]: logger.debug("domain='%s' - BEFORE!", domain) - domain = tidyup.domain(domain) + domain = tidyup.domain(domain) if domain not in[None, ""] else None logger.debug("domain='%s' - AFTER!", domain) - if domain == "": - logger.debug("Empty domain after tidyup.domain() from origin='%s' - SKIPPED!", row["domain"]) + if domain in [None, ""]: + logger.debug("domain='%s' is empty after tidyup.domain() from origin='%s' - SKIPPED!", domain, row["domain"]) continue elif domain not in peers: logger.debug("Appending domain='%s' to peers list for relay='%s' ...", domain, row["domain"]) peers.append(domain) + logger.debug("domains()=%d,domain='%s'", len(domains), domain) if dict_helper.has_key(domains, "domain", domain): logger.debug("domain='%s' already added", domain) continue - logger.debug("Appending domain='%s',origin='%s',software='%s'", domain, row["domain"], row["software"]) + logger.debug("Appending domain='%s',origin='%s',software='%s' ...", domain, row["domain"], row["software"]) domains.append({ "domain": domain, "origin": row["domain"], }) else: logger.warning("row[domain]='%s',row[software]='%s' is not supported", row["domain"], row["software"]) + continue logger.debug("Updating last_instance_fetch for row[domain]='%s' ...", row["domain"]) instances.set_last_instance_fetch(row["domain"]) @@ -2012,12 +1887,15 @@ def fetch_relays(args: argparse.Namespace) -> int: instances.set_total_peers(row["domain"], peers) logger.debug("Flushing data for row[domain]='%s'", row["domain"]) - instances.update_data(row["domain"]) + instances.update(row["domain"]) logger.info("Checking %d domains ...", len(domains)) for row in domains: logger.debug("row[domain]='%s',row[origin]='%s'", row["domain"], row["origin"]) - if instances.is_registered(row["domain"]): + if not domain_helper.is_wanted(row["domain"]): + logger.debug("row[domain]='%s' is not wanted - SKIPPED!", row["domain"]) + continue + elif instances.is_registered(row["domain"]): logger.debug("row[domain]='%s' is already registered - SKIPPED!", row["domain"]) continue @@ -2056,3 +1934,29 @@ def convert_idna(args: argparse.Namespace) -> int: logger.debug("Success! - EXIT!") return 0 + +def remove_invalid(args: argparse.Namespace) -> int: + logger.debug("args[]='%s' - CALLED!", type(args)) + + logger.debug("Invoking locking.acquire() ...") + locking.acquire() + + database.cursor.execute("SELECT domain FROM instances ORDER BY domain ASC") + rows = database.cursor.fetchall() + + logger.info("Checking %d domains ...", len(rows)) + for row in rows: + logger.debug("row[domain]='%s'", row["domain"]) + if not validators.domain(row["domain"].split("/")[0]): + logger.info("Invalid row[domain]='%s' found, removing ...", row["domain"]) + database.cursor.execute("DELETE FROM blocks WHERE blocker = ? OR blocked = ?", [row["domain"], row["domain"]]) + database.cursor.execute("DELETE FROM instances WHERE domain = ? LIMIT 1", [row["domain"]]) + + logger.debug("Invoking commit() ...") + database.connection.commit() + + logger.info("Vaccum cleaning database ...") + database.cursor.execute("VACUUM") + + logger.debug("Success! - EXIT!") + return 0