]> git.mxchange.org Git - fba.git/commitdiff
Continued:
authorRoland Häder <roland@mxchange.org>
Sun, 25 Jun 2023 18:07:43 +0000 (20:07 +0200)
committerRoland Häder <roland@mxchange.org>
Sun, 25 Jun 2023 18:07:43 +0000 (20:07 +0200)
- introduced utils.process_block() to encapsulate handling adding/updating blocks

fba/commands.py
fba/models/instances.py
fba/utils.py

index 4923bf56828ab4e7c3fd963ef04a2679a251e663..c4b1d0a34d37300127f3adc9abd1e24e518fd585 100644 (file)
@@ -334,21 +334,7 @@ def fetch_blocks(args: argparse.Namespace) -> int:
                 logger.debug("Block level 'suspend' has been changed to 'suspended'")
                 block['block_level'] = "suspended"
 
-            if not blocks.is_instance_blocked(blocker, block["blocked"], block['block_level']):
-                logger.debug("Invoking blocks.add_instance(%s, %s, %s, %s)", blocker, block["blocked"], block["reason"], block['block_level'])
-                blocks.add_instance(blocker, block["blocked"], block["reason"], block['block_level'])
-
-                logger.debug("block_level='%s',config[bot_enabled]='%s'", block['block_level'], config.get("bot_enabled"))
-                if block['block_level'] == "reject" and config.get("bot_enabled"):
-                    logger.debug("blocker='%s' has blocked '%s' with reason='%s' - Adding to bot notification ...", blocker, block["blocked"], block["reason"])
-                    blockdict.append({
-                        "blocked": block["blocked"],
-                        "reason" : block["reason"],
-                    })
-            else:
-                logger.debug("Updating block last seen and reason for blocker='%s',blocked='%s' ...", blocker, block["blocked"])
-                blocks.update_last_seen(blocker, block["blocked"], block['block_level'])
-                blocks.update_reason(block["reason"], blocker, block["blocked"], block['block_level'])
+            utils.process_block(blocker, block['blocked'], block['reason'], block['block_level'])
 
             logger.debug("Invoking cookies.clear(%s) ...", block["blocked"])
             cookies.clear(block["blocked"])
@@ -568,9 +554,7 @@ def fetch_cs(args: argparse.Namespace):
                         logger.warning("Exception '%s' during fetching instances (fetch_cs) from row[domain]='%s'", type(exception), row["domain"])
                         instances.set_last_error(row["domain"], exception)
 
-                if not blocks.is_instance_blocked('chaos.social', row["domain"], block_level):
-                    logger.debug("domain='%s',block_level='%s' blocked by chaos.social, adding ...", row["domain"], block_level)
-                    blocks.add_instance('chaos.social', row["domain"], row["reason"], block_level)
+                utils.process_block('chaos.social', row['domain'], row['reason'], block_level)
 
         logger.debug("Invoking commit() ...")
         database.connection.commit()
@@ -799,7 +783,8 @@ def fetch_oliphant(args: argparse.Namespace) -> int:
             logger.debug("reader[]='%s'", type(reader))
             for row in reader:
                 logger.debug("row[%s]='%s'", type(row), row)
-                domain = None
+                domain = severity = None
+                reject_media = reject_reports = False
                 if "#domain" in row:
                     domain = row["#domain"]
                 elif "domain" in row:
@@ -808,22 +793,42 @@ def fetch_oliphant(args: argparse.Namespace) -> int:
                     logger.debug("row='%s' does not contain domain column", row)
                     continue
 
-                logger.debug("domain='%s'", domain)
+                if "#severity" in row:
+                    severity = row["#severity"]
+                elif "severity" in row:
+                    severity = row["severity"]
+                else:
+                    logger.debug("row='%s' does not contain severity column", row)
+                    continue
+
+                if "#reject_media" in row and row["#reject_media"].lower() == "true":
+                    reject_media = True
+                elif "reject_media" in row and row["reject_media"].lower() == "true":
+                    reject_media = True
+
+                if "#reject_reports" in row and row["#reject_reports"].lower() == "true":
+                    reject_reports = True
+                elif "reject_reports" in row and row["reject_reports"].lower() == "true":
+                    reject_reports = True
+
+                logger.debug("domain='%s',severity='%s',reject_media='%s',reject_reports='%s'", domain, severity, reject_media, reject_reports)
                 if not utils.is_domain_wanted(domain):
                     logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
                     continue
-                elif instances.is_recent(domain):
-                    logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
-                    continue
 
                 logger.debug("Marking domain='%s' as handled", domain)
                 domains.append(domain)
 
                 logger.debug("Processing domain='%s' ...", domain)
                 processed = utils.process_domain(domain, block["blocker"], inspect.currentframe().f_code.co_name)
-
                 logger.debug("processed='%s'", processed)
 
+                utils.process_block(block['blocker'], domain, None, "reject")
+                if reject_media:
+                    utils.process_block(block['blocker'], domain, None, "reject_media")
+                if reject_reports:
+                    utils.process_block(block['blocker'], domain, None, "reject_reports")
+
     logger.debug("Success! - EXIT!")
     return 0
 
@@ -931,33 +936,34 @@ def fetch_joinfediverse(args: argparse.Namespace) -> int:
         logger.info("Checking %d row(s) ...", len(rows))
         block_headers = dict()
         for row in rows:
-            #logger.debug("row[%s]='%s'", type(row), row)
+            logger.debug("row[%s]='%s'", type(row), row)
 
             headers = row.findAll("th")
-            #logger.debug("Found headers()=%d header(s)", len(headers))
+            logger.debug("Found headers()=%d header(s)", len(headers))
             if len(headers) > 1:
                 block_headers = dict()
                 cnt = 0
                 for header in headers:
                     cnt = cnt + 1
-                    #logger.debug("header[]='%s',cnt=%d", type(header), cnt)
+                    logger.debug("header[]='%s',cnt=%d", type(header), cnt)
                     text = header.contents[0]
 
-                    #logger.debug("text[]='%s'", type(text))
+                    logger.debug("text[]='%s'", type(text))
                     if not isinstance(text, str):
-                        #logger.debug("text[]='%s' is not 'str' - SKIPPED!", type(text))
+                        logger.debug("text[]='%s' is not 'str' - SKIPPED!", type(text))
                         continue
                     elif validators.domain(text.strip()):
-                        #logger.debug("text='%s' is a domain - SKIPPED!", text.strip())
+                        logger.debug("text='%s' is a domain - SKIPPED!", text.strip())
                         continue
 
                     text = tidyup.domain(text.strip())
-                    #logger.debug("text='%s'", text)
+                    logger.debug("text='%s'", text)
                     if text in ["domain", "instance", "subdomain(s)", "block reason(s)"]:
                         logger.debug("Found header: '%s'=%d", text, cnt)
                         block_headers[cnt] = text
+
             elif len(block_headers) == 0:
-                #logger.debug("row is not scrapable - SKIPPED!")
+                logger.debug("row is not scrapable - SKIPPED!")
                 continue
             elif len(block_headers) > 0:
                 logger.debug("Found a row with %d scrapable headers ...", len(block_headers))
@@ -1035,21 +1041,7 @@ def fetch_joinfediverse(args: argparse.Namespace) -> int:
                 continue
 
             logger.debug("blocked='%s',reason='%s'", block['blocked'], block['reason'])
-            if not blocks.is_instance_blocked(blocker, block['blocked'], "reject"):
-                logger.debug("Invoking blocks.add_instance(%s, %s, %s, %s)", blocker, block['blocked'], block["reason"], "reject")
-                blocks.add_instance(blocker, block['blocked'], block["reason"], "reject")
-
-                logger.debug("block_level='%s',config[bot_enabled]='%s'", "reject", config.get("bot_enabled"))
-                if config.get("bot_enabled"):
-                    logger.debug("blocker='%s' has blocked '%s' with reason='%s' - Adding to bot notification ...", blocker, block['blocked'], block["reason"])
-                    blockdict.append({
-                        "blocked": block['blocked'],
-                        "reason" : block["reason"],
-                    })
-            else:
-                logger.debug("Updating block last seen and reason for blocker='%s',blocked='%s' ...", blocker, block['blocked'])
-                blocks.update_last_seen(blocker, block['blocked'], "reject")
-                blocks.update_reason(block["reason"], blocker, block['blocked'], "reject")
+            utils.process_block(blocker, block['blocked'], block['reason'], "reject")
 
         if instances.has_pending(blocker):
             logger.debug("Flushing updates for blocker='%s' ...", blocker)
index 320a24f69d90d639dc7fb29c5a99b85329aad928..f113beef563d9cf5652f00619bac2e1b600c57ba 100644 (file)
@@ -283,7 +283,7 @@ def is_registered(domain: str) -> bool:
     return registered
 
 def is_recent(domain: str, column: str = "last_instance_fetch") -> bool:
-    logger.debug("domain='%s',column='%s' - CALLED!", domain)
+    logger.debug("domain='%s',column='%s' - CALLED!", domain, column)
     domain_helper.raise_on(domain)
 
     if not isinstance(column, str):
index c62de4b5d9e72c119b1b8c476ef19b66f1baa9c7..c7f35d5b481852c92d34a9b20e041f5679945a51 100644 (file)
@@ -23,12 +23,14 @@ import requests
 import validators
 
 from fba.helpers import blacklist
+from fba.helpers import config
 from fba.helpers import domain as domain_helper
 from fba.helpers import tidyup
 
 from fba.http import federation
 from fba.http import network
 
+from fba.models import blocks
 from fba.models import instances
 
 logging.basicConfig(level=logging.INFO)
@@ -81,37 +83,7 @@ def process_domain(domain: str, blocker: str, command: str) -> bool:
         raise ValueError("Parameter 'command' is empty")
 
     logger.debug("domain='%s' - BEFORE!")
-    if domain.find("*") > 0:
-        logger.debug("blocker='%s' uses obfuscated domains, marking ...", blocker)
-        instances.set_has_obfuscation(blocker, True)
-
-        # Try to de-obscure it
-        row = instances.deobfuscate("*", domain)
-
-        logger.debug("row[%s]='%s'", type(row), row)
-        if row is None:
-            logger.warning("Cannot de-obfuscate domain='%s' - SKIPPED!", domain)
-            return False
-
-        logger.debug("domain='%s' de-obscured to '%s'", domain, row[0])
-        domain = row[0]
-    elif domain.find("?") > 0:
-        logger.debug("blocker='%s' uses obfuscated domains, marking ...", blocker)
-        instances.set_has_obfuscation(blocker, True)
-
-        # Try to de-obscure it
-        row = instances.deobfuscate("?", domain)
-
-        logger.debug("row[%s]='%s'", type(row), row)
-        if row is None:
-            logger.warning("Cannot de-obfuscate domain='%s' - SKIPPED!", domain)
-            return False
-
-        logger.debug("domain='%s' de-obscured to '%s'", domain, row[0])
-        domain = row[0]
-    else:
-        logger.debug("blocker='%s' has NO obfuscation on their block list", blocker)
-        instances.set_has_obfuscation(blocker, False)
+    domain = deobfuscate_domain(domain, blocker)
 
     logger.debug("domain='%s' - DEOBFUSCATED!", domain)
     if instances.has_pending(blocker):
@@ -131,7 +103,7 @@ def process_domain(domain: str, blocker: str, command: str) -> bool:
         federation.fetch_instances(domain, blocker, None, command)
         processed = True
     except network.exceptions as exception:
-        logger.warning("Exception '%s' during fetching instances (fetch_oliphant) from domain='%s'", type(exception), domain)
+        logger.warning("Exception '%s' during fetching instances (%s) from domain='%s'", type(exception), command, domain)
         instances.set_last_error(domain, exception)
 
     logger.debug("Checking if domain='%s' has pending updates ...")
@@ -236,3 +208,30 @@ def deobfuscate_domain(domain: str, blocker: str) -> str:
 
     logger.debug("domain='%s' - EXIT!", domain)
     return domain
+
+def process_block(blocker: str, blocked: str, reason: str, block_level: str):
+    logger.debug("blocker='%s',blocked='%s',reason='%s',block_level='%s' - CALLED!", blocker, blocked, reason, block_level)
+    domain_helper.raise_on(blocker)
+    domain_helper.raise_on(blocked)
+
+    if not isinstance(reason, str) and reason is not None:
+        raise ValueError("Parameter reason[]='%s' is not of type 'str'", type(reason))
+    elif not isinstance(block_level, str):
+        raise ValueError("Parameter block_level[]='%s' is not of type 'str'", type(block_level))
+    elif block_level == "":
+        raise ValueError("Parameter block_level is empty")
+
+    if not blocks.is_instance_blocked(blocker, blocked, block_level):
+        logger.debug("Invoking blocks.add_instance(%s, %s, %s, %s)", blocker, blocked, reason, block_level)
+        blocks.add_instance(blocker, blocked, reason, block_level)
+
+        logger.debug("block_level='%s',config[bot_enabled]='%s'", block_level, config.get("bot_enabled"))
+        if config.get("bot_enabled"):
+            logger.debug("blocker='%s' has blocked '%s' with reason='%s' - Adding to bot notification ...", blocker, blocked, reason)
+            blockdict.append({
+                "blocked": blocked,
+                "reason" : reason,
+            })
+    else:
+        logger.debug("Updating block last seen and reason for blocker='%s',blocked='%s' ...", blocker, blocked)
+        blocks.update_last_seen(blocker, blocked, block_level)