]> git.mxchange.org Git - fba.git/commitdiff
Continued:
authorRoland Häder <roland@mxchange.org>
Mon, 4 Sep 2023 07:54:14 +0000 (09:54 +0200)
committerRoland Häder <roland@mxchange.org>
Mon, 4 Sep 2023 07:56:57 +0000 (09:56 +0200)
- functions in module fba.helpers.tidyup are relatively "expensive", means they
  need a lot of CPU cycles
- let's avoid invoking them on empty string

fba/helpers/tidyup.py
fba/networks/friendica.py
fba/networks/lemmy.py
fba/networks/mastodon.py
fba/networks/pleroma.py

index 580cb225dcc0fbd307ca30c430a926afd4d69154..68abb8dffea12436d89f7399a680455342876869 100644 (file)
@@ -36,6 +36,8 @@ def domain(string: str) -> str:
 
     if not isinstance(string, str):
         raise ValueError(f"Parameter string[]='{type(string)}' is not of type 'str'")
+    elif string == "":
+        raise ValueError("Parameter string is empty")
 
     # All lower-case and strip spaces out + last dot
     string = string.lower().strip().rstrip(".")
index f30f1263492318af0007716460e3398d91c43d70..317a8caf37ac078482e4f92282bb51b9ad16b5a5 100644 (file)
@@ -78,12 +78,18 @@ def fetch_blocks(domain: str) -> list:
     logger.debug("Found rows()=%d", len(rows))
     for line in rows:
         logger.debug("line='%s'", line)
-        blocked = tidyup.domain(line.find_all("td")[0].text)
+        blocked = line.find_all("td")[0].text
+        logger.debug("blocked='%s'", blocked)
+
+        blocked = tidyup.domain(blocked) if blocked != "" else None
         reason  = tidyup.reason(line.find_all("td")[1].text)
-        logger.debug("blocked='%s',reason='%s'", blocked, reason)
+        logger.debug("blocked='%s',reason='%s' - AFTER!", blocked, reason)
 
-        if blocked == "":
-            logger.debug("line[]='%s' returned empty blocked domain - SKIPPED!", type(line))
+        if blocked is None:
+            logger.warning("blocked is empty - SKIPPED!")
+            continue
+        elif blocked == "":
+            logger.warning("line[]='%s' returned empty blocked domain - SKIPPED!", type(line))
             continue
         elif not domain_helper.is_wanted(blocked):
             logger.debug("blocked='%s' is not wanted - SKIPPED!", blocked)
@@ -92,8 +98,8 @@ def fetch_blocks(domain: str) -> list:
         logger.debug("Appending blocked='%s',reason='%s'", blocked, reason)
         blocklist.append({
             "blocker"    : domain,
-            "blocked"    : tidyup.domain(blocked),
-            "reason"     : tidyup.reason(reason),
+            "blocked"    : blocked,
+            "reason"     : reason,
             "block_level": "reject",
         })
 
index efc85de953927d325a9a76f6f84d14dde05752ff..e37c2b800d08a59f6f4685326b522b031522b29a 100644 (file)
@@ -200,10 +200,13 @@ def fetch_blocks(domain: str) -> list:
             logger.debug("Found %d blocked instance(s) ...", len(blocking))
             for tag in blocking:
                 logger.debug("tag[]='%s'", type(tag))
-                blocked = tidyup.domain(tag.contents[0])
+                blocked = tidyup.domain(tag.contents[0]) if tag.contents[0] != "" else None
                 logger.debug("blocked='%s'", blocked)
 
-                if blocked == "":
+                if blocked is None:
+                    logger.warning("blocked is empty - SKIPPED!")
+                    continue
+                elif blocked == "":
                     logger.warning("blocked='%s' is empty after tidyup.domain() - SKIPPED!", tag.contents[0])
                     continue
                 elif not domain_helper.is_wanted(blocked):
@@ -264,11 +267,16 @@ def fetch_instances(domain: str, origin: str) -> list:
                     for tag in rows:
                         logger.debug("tag[]='%s'", type(tag))
                         text = tag.contents[0] if isinstance(tag.contents[0], str) else tag.contents[0].text
-                        peer = tidyup.domain(text)
-                        logger.debug("peer='%s'", peer)
+                        logger.debug("text='%s' - BEFORE!", text)
+
+                        peer = tidyup.domain(text) if text != "" else None
+                        logger.debug("peer='%s' - AFTER", peer)
 
-                        if peer == "":
-                            logger.debug("peer is empty - SKIPPED!")
+                        if peer is None:
+                            logger.warning("peer is empty - SKIPPED!")
+                            continue
+                        elif peer == "":
+                            logger.warning("peer is an empty string, text='%s' - SKIPPED!", text)
                             continue
                         elif not domain_helper.is_wanted(peer):
                             logger.debug("peer='%s' is not wanted - SKIPPED!", peer)
@@ -367,8 +375,11 @@ def parse_script(doc: bs4.BeautifulSoup, only: str = None) -> list:
                 peer = tidyup.domain(row["domain"])
                 logger.debug("peer='%s' - AFTER!", peer)
 
-                if peer == "":
-                    logger.debug("peer is empty - SKIPPED!")
+                if peer is None:
+                    logger.warning("peer is empty - SKIPPED!")
+                    continue
+                elif peer == "":
+                    logger.warning("peer is an empty string, row[domain]='%s' - SKIPPED!", row["domain"])
                     continue
                 elif not domain_helper.is_wanted(peer):
                     logger.debug("peer='%s' is not wanted - SKIPPED!", peer)
index cda3cc4786b328cc2e20307de4b9721da831dbd8..3b57892c8f307a1dbaf1527a67f8298e10ddb5af 100644 (file)
@@ -115,10 +115,27 @@ def fetch_blocks_from_about(domain: str) -> dict:
         if header_text in blocklist or header_text.lower() in blocklist:
             # replaced find_next_siblings with find_all_next to account for instances that e.g. hide lists in dropdown menu
             for line in header.find_all_next("table")[0].find_all("tr")[1:]:
+                domain = line.find("span").text
+                hash   = line.find("span")["title"][9:]
+                reason = line.find_all("td")[1].text
+
+                logger.debug("domain='%s',reason='%s' - BEFORE!", domain, reason)
+                domain = tidyup.domain(domain) if domain != "" else None
+                reason = tidyup.reason(reason) if reason != "" else None
+
+                logger.debug("domain='%s',reason='%s' - AFTER!", domain, reason)
+                if domain is None:
+                    logger.warning("domain is empty,line='%s' - SKIPPED!", line)
+                    continue
+                elif domain == "":
+                    logger.warning("domain is an empty string,line='%s' - SKIPPED!", line)
+                    continue
+
+                logger.debug("Appending domain='%s',hash='%s',reason='%s' to blocklist header_text='%s' ...", domain, hash, reason, blocklist)
                 blocklist[header_text].append({
-                    "domain": tidyup.domain(line.find("span").text),
-                    "hash"  : tidyup.domain(line.find("span")["title"][9:]),
-                    "reason": tidyup.reason(line.find_all("td")[1].text),
+                    "domain": domain,
+                    "hash"  : hash,
+                    "reason": reason,
                 })
         else:
             logger.warning("header_text='%s' not found in blocklist()=%d", header_text, len(blocklist))
@@ -157,6 +174,7 @@ def fetch_blocks(domain: str) -> list:
                 logger.debug("block[]='%s' is of type 'dict' - SKIPPED!", type(block))
                 continue
             elif "domain" not in block:
+                logger.debug("block='%s'", block)
                 logger.warning("block()=%d does not contain element 'domain' - SKIPPED!", len(block))
                 continue
             elif not domain_helper.is_wanted(block["domain"]):
index ebd5b789c1b16f4ec9e892fbdb08a984b3b844ff..1a69c239a7d42d374f7da7968391c790b720a983 100644 (file)
@@ -108,7 +108,7 @@ def fetch_blocks(domain: str) -> list:
             }
         ).items():
             logger.debug("block_level='%s', blocklist()=%d", block_level, len(blocklist))
-            block_level = tidyup.domain(block_level)
+            block_level = tidyup.domain(block_level) if block_level != "" else None
             logger.debug("block_level='%s' - AFTER!", block_level)
 
             if block_level == "":
@@ -124,11 +124,14 @@ def fetch_blocks(domain: str) -> list:
             if len(blocklist) > 0:
                 for blocked in blocklist:
                     logger.debug("blocked='%s' - BEFORE!", blocked)
-                    blocked = tidyup.domain(blocked)
+                    blocked = tidyup.domain(blocked) if blocked != "" else None
                     logger.debug("blocked='%s' - AFTER!", blocked)
 
-                    if blocked == "":
-                        logger.warning("blocked is empty after tidyup.domain(): domain='%s',block_level='%s' - SKIPPED!", domain, block_level)
+                    if blocked is None:
+                        logger.warning("blocked is empty - SKIPPED!")
+                        continue
+                    elif blocked == "":
+                        logger.warning("blocked is an empty string after tidyup.domain(): domain='%s',block_level='%s' - SKIPPED!", domain, block_level)
                         continue
 
                     logger.debug("Invoking utils.deobfuscate(%s, %s) ...", blocked, domain)
@@ -154,7 +157,7 @@ def fetch_blocks(domain: str) -> list:
 
         for blocked in data["quarantined_instances"]:
             logger.debug("blocked='%s' - BEFORE!", blocked)
-            blocked = tidyup.domain(blocked)
+            blocked = tidyup.domain(blocked) if blocked != "" else None
             logger.debug("blocked='%s' - AFTER!", blocked)
 
             if blocked == "":
@@ -189,7 +192,7 @@ def fetch_blocks(domain: str) -> list:
             }
         ).items():
             logger.debug("block_level='%s', info.items()=%d", block_level, len(info.items()))
-            block_level = tidyup.domain(block_level)
+            block_level = tidyup.domain(block_level) if block_level != "" else None
             logger.debug("block_level='%s' - AFTER!", block_level)
 
             if block_level == "":
@@ -204,7 +207,7 @@ def fetch_blocks(domain: str) -> list:
             logger.debug("Checking %d entries from domain='%s',block_level='%s' ...", len(info.items()), domain, block_level)
             for blocked, reason in info.items():
                 logger.debug("blocked='%s',reason[%s]='%s' - BEFORE!", blocked, type(reason), reason)
-                blocked = tidyup.domain(blocked)
+                blocked = tidyup.domain(blocked) if blocked != "" else None
                 logger.debug("blocked='%s' - AFTER!", blocked)
 
                 if isinstance(reason, str):
@@ -239,7 +242,7 @@ def fetch_blocks(domain: str) -> list:
         for blocked in rows:
             logger.debug("blocked='%s' - BEFORE!", blocked)
             reason = tidyup.reason(rows[blocked]["reason"])
-            blocked = tidyup.domain(blocked)
+            blocked = tidyup.domain(blocked) if blocked != "" else None
             logger.debug("blocked='%s',reason='%s' - AFTER!", blocked, reason)
 
             if blocked not in rows or "reason" not in rows[blocked]:
@@ -365,8 +368,12 @@ def fetch_blocks_from_about(domain: str) -> dict:
             logger.debug("Found block_level='%s', importing domain blocks ...", block_level)
             for line in header.find_next("table").find_all("tr")[1:]:
                 logger.debug("line[]='%s'", type(line))
-                blocked = tidyup.domain(line.find_all("td")[0].text)
+                blocked = line.find_all("td")[0].text
+                logger.debug("blocked='%s'", blocked)
+
+                blocked = tidyup.domain(blocked) if blocked != "" else None
                 reason = tidyup.reason(line.find_all("td")[1].text)
+                logger.debig("blocked='%s',reason='%s' - AFTER!", blocked, reason)
 
                 if blocked is None or blocked == "":
                     logger.debug("domain='%s',block_level='%s': blocked is empty - SKIPPED!", domain, block_level)