X-Git-Url: https://git.mxchange.org/?a=blobdiff_plain;f=fba%2Fcommands.py;h=1ecf9e2b30d8cd5e9f73fb3f40248d32a8e19312;hb=ae5597c66caace7a3f8d053503c80d92a5afaeea;hp=a58440b77dff126cf6b4978f13530c699780877d;hpb=d17192b8a95e9d57b1e9d9feb23b618ea2f683c0;p=fba.git diff --git a/fba/commands.py b/fba/commands.py index a58440b..1ecf9e2 100644 --- a/fba/commands.py +++ b/fba/commands.py @@ -316,7 +316,6 @@ def fetch_blocks(args: argparse.Namespace) -> int: instances.set_has_obfuscation(blocker, False) blocking = list() - blockdict = list() if software == "pleroma": logger.info("blocker='%s',software='%s'", blocker, software) blocking = pleroma.fetch_blocks(blocker, nodeinfo_url) @@ -911,7 +910,7 @@ def fetch_instances(args: argparse.Namespace) -> int: # Loop through some instances database.cursor.execute( - "SELECT domain, origin, software, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'lemmy', 'peertube', 'takahe') AND (last_instance_fetch IS NULL OR last_instance_fetch < ?) ORDER BY rowid DESC", [time.time() - config.get("recheck_instance")] + "SELECT domain, origin, software, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'lemmy', 'peertube', 'takahe', 'gotosocial', 'brighteon', 'wildebeest', 'bookwyrm') AND (last_instance_fetch IS NULL OR last_instance_fetch < ?) ORDER BY rowid DESC", [time.time() - config.get("recheck_instance")] ) rows = database.cursor.fetchall() @@ -1291,7 +1290,7 @@ def fetch_joinfediverse(args: argparse.Namespace) -> int: continue text = tidyup.domain(text.strip()) - logger.debug("text='%s'", text) + logger.debug("text='%s' - AFTER!", text) if text in ["domain", "instance", "subdomain(s)", "block reason(s)"]: logger.debug("Found header: '%s'=%d", text, cnt) block_headers[cnt] = text @@ -1353,17 +1352,20 @@ def fetch_joinfediverse(args: argparse.Namespace) -> int: logger.debug("blocking()=%d", blocking) for block in blocking: logger.debug("block[]='%s'", type(block)) - block["blocked"] = tidyup.domain(block["blocked"]) + if "blocked" not in block: + raise KeyError(f"block()={len(block)} does not have element 'blocked'") + block["blocked"] = tidyup.domain(block["blocked"]).encode("idna").decode("utf-8") logger.debug("block[blocked]='%s' - AFTER!", block["blocked"]) + if block["blocked"] == "": logger.debug("block[blocked] is empty - SKIPPED!") continue elif not utils.is_domain_wanted(block["blocked"]): - logger.warning("blocked='%s' is not wanted - SKIPPED!", block["blocked"]) + logger.warning("block[blocked]='%s' is not wanted - SKIPPED!", block["blocked"]) continue elif instances.is_recent(block["blocked"]): - logger.debug("blocked='%s' has been recently checked - SKIPPED!", block["blocked"]) + logger.debug("block[blocked]='%s' has been recently checked - SKIPPED!", block["blocked"]) continue logger.info("Proccessing blocked='%s' ...", block["blocked"]) @@ -1454,9 +1456,10 @@ def recheck_obfuscation(args: argparse.Namespace) -> int: logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", row["domain"], len(blocking)) instances.set_total_blocks(row["domain"], blocking) - logger.info("Checking %d block(s) from domain='%s' ...", len(blocking), row["domain"]) obfuscated = 0 blockdict = list() + + logger.info("Checking %d block(s) from domain='%s' ...", len(blocking), row["domain"]) for block in blocking: logger.debug("block[blocked]='%s'", block["blocked"]) blocked = None @@ -1551,15 +1554,19 @@ def fetch_fedilist(args: argparse.Namespace) -> int: logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text)) if not response.ok or response.status_code >= 300 or len(response.content) == 0: - logger.warning("Failed fetching url='%s': response.ok='%s',response.status_code=%d,response.content()=%d - EXIT!", response.ok, response.status_code, len(response.text)) + logger.warning("Failed fetching url='%s': response.ok='%s',response.status_code=%d,response.content()=%d - EXIT!", url, response.ok, response.status_code, len(response.text)) return 1 reader = csv.DictReader(response.content.decode("utf-8").splitlines(), dialect="unix") logger.debug("reader[]='%s'", type(reader)) - blockdict = list() for row in reader: logger.debug("row[]='%s'", type(row)) + if "hostname" not in row: + logger.warning("row()=%d has no element 'hostname' - SKIPPED!", len(row)) + continue + + logger.debug("row[hostname]='%s' - BEFORE!", row["hostname"]) domain = tidyup.domain(row["hostname"]) logger.debug("domain='%s' - AFTER!", domain) @@ -1575,7 +1582,7 @@ def fetch_fedilist(args: argparse.Namespace) -> int: logger.warning("domain='%s' is not wanted - SKIPPED!", domain) continue elif (args.all is None or not args.all) and instances.is_registered(domain): - logger.debug("domain='%s' is already registered, --all not specified: args.all[]='%s'", type(args.all)) + logger.debug("domain='%s' is already registered, --all not specified: args.all[]='%s'", domain, type(args.all)) continue elif instances.is_recent(domain): logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)