From 1a44daf9f016420fc6a7b0761f734a5d93b9d2f5 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Roland=20H=C3=A4der?= Date: Sun, 25 Jun 2023 20:07:43 +0200 Subject: [PATCH] Continued: - introduced utils.process_block() to encapsulate handling adding/updating blocks --- fba/commands.py | 86 +++++++++++++++++++---------------------- fba/models/instances.py | 2 +- fba/utils.py | 63 +++++++++++++++--------------- 3 files changed, 71 insertions(+), 80 deletions(-) diff --git a/fba/commands.py b/fba/commands.py index 4923bf5..c4b1d0a 100644 --- a/fba/commands.py +++ b/fba/commands.py @@ -334,21 +334,7 @@ def fetch_blocks(args: argparse.Namespace) -> int: logger.debug("Block level 'suspend' has been changed to 'suspended'") block['block_level'] = "suspended" - if not blocks.is_instance_blocked(blocker, block["blocked"], block['block_level']): - logger.debug("Invoking blocks.add_instance(%s, %s, %s, %s)", blocker, block["blocked"], block["reason"], block['block_level']) - blocks.add_instance(blocker, block["blocked"], block["reason"], block['block_level']) - - logger.debug("block_level='%s',config[bot_enabled]='%s'", block['block_level'], config.get("bot_enabled")) - if block['block_level'] == "reject" and config.get("bot_enabled"): - logger.debug("blocker='%s' has blocked '%s' with reason='%s' - Adding to bot notification ...", blocker, block["blocked"], block["reason"]) - blockdict.append({ - "blocked": block["blocked"], - "reason" : block["reason"], - }) - else: - logger.debug("Updating block last seen and reason for blocker='%s',blocked='%s' ...", blocker, block["blocked"]) - blocks.update_last_seen(blocker, block["blocked"], block['block_level']) - blocks.update_reason(block["reason"], blocker, block["blocked"], block['block_level']) + utils.process_block(blocker, block['blocked'], block['reason'], block['block_level']) logger.debug("Invoking cookies.clear(%s) ...", block["blocked"]) cookies.clear(block["blocked"]) @@ -568,9 +554,7 @@ def fetch_cs(args: argparse.Namespace): logger.warning("Exception '%s' during fetching instances (fetch_cs) from row[domain]='%s'", type(exception), row["domain"]) instances.set_last_error(row["domain"], exception) - if not blocks.is_instance_blocked('chaos.social', row["domain"], block_level): - logger.debug("domain='%s',block_level='%s' blocked by chaos.social, adding ...", row["domain"], block_level) - blocks.add_instance('chaos.social', row["domain"], row["reason"], block_level) + utils.process_block('chaos.social', row['domain'], row['reason'], block_level) logger.debug("Invoking commit() ...") database.connection.commit() @@ -799,7 +783,8 @@ def fetch_oliphant(args: argparse.Namespace) -> int: logger.debug("reader[]='%s'", type(reader)) for row in reader: logger.debug("row[%s]='%s'", type(row), row) - domain = None + domain = severity = None + reject_media = reject_reports = False if "#domain" in row: domain = row["#domain"] elif "domain" in row: @@ -808,22 +793,42 @@ def fetch_oliphant(args: argparse.Namespace) -> int: logger.debug("row='%s' does not contain domain column", row) continue - logger.debug("domain='%s'", domain) + if "#severity" in row: + severity = row["#severity"] + elif "severity" in row: + severity = row["severity"] + else: + logger.debug("row='%s' does not contain severity column", row) + continue + + if "#reject_media" in row and row["#reject_media"].lower() == "true": + reject_media = True + elif "reject_media" in row and row["reject_media"].lower() == "true": + reject_media = True + + if "#reject_reports" in row and row["#reject_reports"].lower() == "true": + reject_reports = True + elif "reject_reports" in row and row["reject_reports"].lower() == "true": + reject_reports = True + + logger.debug("domain='%s',severity='%s',reject_media='%s',reject_reports='%s'", domain, severity, reject_media, reject_reports) if not utils.is_domain_wanted(domain): logger.debug("domain='%s' is not wanted - SKIPPED!", domain) continue - elif instances.is_recent(domain): - logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain) - continue logger.debug("Marking domain='%s' as handled", domain) domains.append(domain) logger.debug("Processing domain='%s' ...", domain) processed = utils.process_domain(domain, block["blocker"], inspect.currentframe().f_code.co_name) - logger.debug("processed='%s'", processed) + utils.process_block(block['blocker'], domain, None, "reject") + if reject_media: + utils.process_block(block['blocker'], domain, None, "reject_media") + if reject_reports: + utils.process_block(block['blocker'], domain, None, "reject_reports") + logger.debug("Success! - EXIT!") return 0 @@ -931,33 +936,34 @@ def fetch_joinfediverse(args: argparse.Namespace) -> int: logger.info("Checking %d row(s) ...", len(rows)) block_headers = dict() for row in rows: - #logger.debug("row[%s]='%s'", type(row), row) + logger.debug("row[%s]='%s'", type(row), row) headers = row.findAll("th") - #logger.debug("Found headers()=%d header(s)", len(headers)) + logger.debug("Found headers()=%d header(s)", len(headers)) if len(headers) > 1: block_headers = dict() cnt = 0 for header in headers: cnt = cnt + 1 - #logger.debug("header[]='%s',cnt=%d", type(header), cnt) + logger.debug("header[]='%s',cnt=%d", type(header), cnt) text = header.contents[0] - #logger.debug("text[]='%s'", type(text)) + logger.debug("text[]='%s'", type(text)) if not isinstance(text, str): - #logger.debug("text[]='%s' is not 'str' - SKIPPED!", type(text)) + logger.debug("text[]='%s' is not 'str' - SKIPPED!", type(text)) continue elif validators.domain(text.strip()): - #logger.debug("text='%s' is a domain - SKIPPED!", text.strip()) + logger.debug("text='%s' is a domain - SKIPPED!", text.strip()) continue text = tidyup.domain(text.strip()) - #logger.debug("text='%s'", text) + logger.debug("text='%s'", text) if text in ["domain", "instance", "subdomain(s)", "block reason(s)"]: logger.debug("Found header: '%s'=%d", text, cnt) block_headers[cnt] = text + elif len(block_headers) == 0: - #logger.debug("row is not scrapable - SKIPPED!") + logger.debug("row is not scrapable - SKIPPED!") continue elif len(block_headers) > 0: logger.debug("Found a row with %d scrapable headers ...", len(block_headers)) @@ -1035,21 +1041,7 @@ def fetch_joinfediverse(args: argparse.Namespace) -> int: continue logger.debug("blocked='%s',reason='%s'", block['blocked'], block['reason']) - if not blocks.is_instance_blocked(blocker, block['blocked'], "reject"): - logger.debug("Invoking blocks.add_instance(%s, %s, %s, %s)", blocker, block['blocked'], block["reason"], "reject") - blocks.add_instance(blocker, block['blocked'], block["reason"], "reject") - - logger.debug("block_level='%s',config[bot_enabled]='%s'", "reject", config.get("bot_enabled")) - if config.get("bot_enabled"): - logger.debug("blocker='%s' has blocked '%s' with reason='%s' - Adding to bot notification ...", blocker, block['blocked'], block["reason"]) - blockdict.append({ - "blocked": block['blocked'], - "reason" : block["reason"], - }) - else: - logger.debug("Updating block last seen and reason for blocker='%s',blocked='%s' ...", blocker, block['blocked']) - blocks.update_last_seen(blocker, block['blocked'], "reject") - blocks.update_reason(block["reason"], blocker, block['blocked'], "reject") + utils.process_block(blocker, block['blocked'], block['reason'], "reject") if instances.has_pending(blocker): logger.debug("Flushing updates for blocker='%s' ...", blocker) diff --git a/fba/models/instances.py b/fba/models/instances.py index 320a24f..f113bee 100644 --- a/fba/models/instances.py +++ b/fba/models/instances.py @@ -283,7 +283,7 @@ def is_registered(domain: str) -> bool: return registered def is_recent(domain: str, column: str = "last_instance_fetch") -> bool: - logger.debug("domain='%s',column='%s' - CALLED!", domain) + logger.debug("domain='%s',column='%s' - CALLED!", domain, column) domain_helper.raise_on(domain) if not isinstance(column, str): diff --git a/fba/utils.py b/fba/utils.py index c62de4b..c7f35d5 100644 --- a/fba/utils.py +++ b/fba/utils.py @@ -23,12 +23,14 @@ import requests import validators from fba.helpers import blacklist +from fba.helpers import config from fba.helpers import domain as domain_helper from fba.helpers import tidyup from fba.http import federation from fba.http import network +from fba.models import blocks from fba.models import instances logging.basicConfig(level=logging.INFO) @@ -81,37 +83,7 @@ def process_domain(domain: str, blocker: str, command: str) -> bool: raise ValueError("Parameter 'command' is empty") logger.debug("domain='%s' - BEFORE!") - if domain.find("*") > 0: - logger.debug("blocker='%s' uses obfuscated domains, marking ...", blocker) - instances.set_has_obfuscation(blocker, True) - - # Try to de-obscure it - row = instances.deobfuscate("*", domain) - - logger.debug("row[%s]='%s'", type(row), row) - if row is None: - logger.warning("Cannot de-obfuscate domain='%s' - SKIPPED!", domain) - return False - - logger.debug("domain='%s' de-obscured to '%s'", domain, row[0]) - domain = row[0] - elif domain.find("?") > 0: - logger.debug("blocker='%s' uses obfuscated domains, marking ...", blocker) - instances.set_has_obfuscation(blocker, True) - - # Try to de-obscure it - row = instances.deobfuscate("?", domain) - - logger.debug("row[%s]='%s'", type(row), row) - if row is None: - logger.warning("Cannot de-obfuscate domain='%s' - SKIPPED!", domain) - return False - - logger.debug("domain='%s' de-obscured to '%s'", domain, row[0]) - domain = row[0] - else: - logger.debug("blocker='%s' has NO obfuscation on their block list", blocker) - instances.set_has_obfuscation(blocker, False) + domain = deobfuscate_domain(domain, blocker) logger.debug("domain='%s' - DEOBFUSCATED!", domain) if instances.has_pending(blocker): @@ -131,7 +103,7 @@ def process_domain(domain: str, blocker: str, command: str) -> bool: federation.fetch_instances(domain, blocker, None, command) processed = True except network.exceptions as exception: - logger.warning("Exception '%s' during fetching instances (fetch_oliphant) from domain='%s'", type(exception), domain) + logger.warning("Exception '%s' during fetching instances (%s) from domain='%s'", type(exception), command, domain) instances.set_last_error(domain, exception) logger.debug("Checking if domain='%s' has pending updates ...") @@ -236,3 +208,30 @@ def deobfuscate_domain(domain: str, blocker: str) -> str: logger.debug("domain='%s' - EXIT!", domain) return domain + +def process_block(blocker: str, blocked: str, reason: str, block_level: str): + logger.debug("blocker='%s',blocked='%s',reason='%s',block_level='%s' - CALLED!", blocker, blocked, reason, block_level) + domain_helper.raise_on(blocker) + domain_helper.raise_on(blocked) + + if not isinstance(reason, str) and reason is not None: + raise ValueError("Parameter reason[]='%s' is not of type 'str'", type(reason)) + elif not isinstance(block_level, str): + raise ValueError("Parameter block_level[]='%s' is not of type 'str'", type(block_level)) + elif block_level == "": + raise ValueError("Parameter block_level is empty") + + if not blocks.is_instance_blocked(blocker, blocked, block_level): + logger.debug("Invoking blocks.add_instance(%s, %s, %s, %s)", blocker, blocked, reason, block_level) + blocks.add_instance(blocker, blocked, reason, block_level) + + logger.debug("block_level='%s',config[bot_enabled]='%s'", block_level, config.get("bot_enabled")) + if config.get("bot_enabled"): + logger.debug("blocker='%s' has blocked '%s' with reason='%s' - Adding to bot notification ...", blocker, blocked, reason) + blockdict.append({ + "blocked": blocked, + "reason" : reason, + }) + else: + logger.debug("Updating block last seen and reason for blocker='%s',blocked='%s' ...", blocker, blocked) + blocks.update_last_seen(blocker, blocked, block_level) -- 2.39.5