]> git.mxchange.org Git - fba.git/commitdiff
Continued:
authorRoland Häder <roland@mxchange.org>
Sun, 25 Jun 2023 06:47:13 +0000 (08:47 +0200)
committerRoland Häder <roland@mxchange.org>
Sun, 25 Jun 2023 07:00:16 +0000 (09:00 +0200)
- let's be nice and only fetch instances/blocks from not recently (aka.
  out-dated) records so we keep bandwidth low on these servers

fba/commands.py

index f19d51b68415952901ff9ab389bb24f527544f6e..f443520a6e1cc54e568a2febb4c459081c37d0ab 100644 (file)
@@ -108,17 +108,20 @@ def fetch_pixelfed_api(args: argparse.Namespace) -> int:
         for row in rows:
             logger.debug("row[]='%s'", type(row))
             if "domain" not in row:
-                logger.warning("row='%s' does not contain element 'domain' - SKIPPED!")
+                logger.warning("row='%s' does not contain element 'domain' - SKIPPED!", row)
                 continue
-            elif not utils.is_domain_wanted(row['domain']):
-                logger.debug("row[domain]='%s' is not wanted - SKIPPED!", row['domain'])
+            elif not utils.is_domain_wanted(row["domain"]):
+                logger.debug("row[domain]='%s' is not wanted - SKIPPED!", row["domain"])
                 continue
-            elif instances.is_registered(row['domain']):
-                logger.debug("row[domain]='%s' is already registered - SKIPPED!", row['domain'])
+            elif instances.is_registered(row["domain"]):
+                logger.debug("row[domain]='%s' is already registered - SKIPPED!", row["domain"])
+                continue
+            elif instances.is_recent(row["domain"]):
+                logger.debug("row[domain]='%s' has been recently crawled - SKIPPED!", row["domain"])
                 continue
 
-            logger.debug("Fetching instances from row[domain]='%s' ...", row['domain'])
-            federation.fetch_instances(row['domain'], None, None, inspect.currentframe().f_code.co_name)
+            logger.debug("Fetching instances from row[domain]='%s' ...", row["domain"])
+            federation.fetch_instances(row["domain"], None, None, inspect.currentframe().f_code.co_name)
 
     except network.exceptions as exception:
         logger.warning("Cannot fetch graphql,exception[%s]:'%s' - EXIT!", type(exception), str(exception))
@@ -162,10 +165,13 @@ def fetch_bkali(args: argparse.Namespace) -> int:
                 logger.debug("entry[domain]='%s' is not wanted - SKIPPED!")
                 continue
             elif instances.is_registered(entry["domain"]):
-                logger.debug("domain='%s' is already registered - SKIPPED!", entry['domain'])
+                logger.debug("entry[domain]='%s' is already registered - SKIPPED!", entry["domain"])
+                continue
+            elif instances.is_recent(entry["domain"]):
+                logger.debug("entry[domain]='%s' has been recently crawled - SKIPPED!", entry["domain"])
                 continue
 
-            logger.debug("Adding domain='%s' ...", entry['domain'])
+            logger.debug("Adding domain='%s' ...", entry["domain"])
             domains.append(entry["domain"])
 
     except network.exceptions as exception:
@@ -551,7 +557,10 @@ def fetch_cs(args: argparse.Namespace):
 
             for row in domains[block_level]:
                 logger.debug("row[%s]='%s'", type(row), row)
-                if not instances.is_registered(row["domain"]):
+                if instances.is_recent(row["domain"], "last_blocked"):
+                    logger.debug("row[domain]='%s' has been recently crawled - SKIPPED!", row["domain"])
+                    continue
+                elif not instances.is_registered(row["domain"]):
                     try:
                         logger.info("Fetching instances from domain='%s' ...", row["domain"])
                         federation.fetch_instances(row["domain"], 'chaos.social', None, inspect.currentframe().f_code.co_name)
@@ -586,8 +595,8 @@ def fetch_fba_rss(args: argparse.Namespace) -> int:
             logger.debug("item='%s'", item)
             domain = item.link.split("=")[1]
 
-            if blacklist.is_blacklisted(domain):
-                logger.debug("domain='%s' is blacklisted - SKIPPED!", domain)
+            if not utils.is_domain_wanted(domain):
+                logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
                 continue
             elif domain in domains:
                 logger.debug("domain='%s' is already added - SKIPPED!", domain)
@@ -595,6 +604,9 @@ def fetch_fba_rss(args: argparse.Namespace) -> int:
             elif instances.is_registered(domain):
                 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
                 continue
+            elif instances.is_recent(domain):
+                logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
+                continue
 
             logger.debug("Adding domain='%s'", domain)
             domains.append(domain)
@@ -649,11 +661,14 @@ def fetch_fbabot_atom(args: argparse.Namespace) -> int:
                     elif instances.is_registered(domain):
                         logger.debug("domain='%s' is already registered - SKIPPED!", domain)
                         continue
+                    elif instances.is_recent(domain):
+                        logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
+                        continue
 
                     logger.debug("Adding domain='%s',domains()=%d", domain, len(domains))
                     domains.append(domain)
 
-    logger.debug("domains(%d)='%s", len(domains), domains)
+    logger.debug("domains()='%d", len(domains))
     if len(domains) > 0:
         locking.acquire()
 
@@ -696,7 +711,7 @@ def fetch_instances(args: argparse.Namespace) -> int:
     for row in rows:
         logger.debug("domain='%s'", row[0])
         if not utils.is_domain_wanted(row[0]):
-            logger.debug("Domain is not wanted: row[0]='%s'", row[0])
+            logger.debug("Domain row[0]='%s' is not wanted - SKIPPED!", row[0])
             continue
 
         try:
@@ -768,6 +783,9 @@ def fetch_oliphant(args: argparse.Namespace) -> int:
         elif args.domain in domains:
             logger.debug("args.domain='%s' already handled - SKIPPED!", args.domain)
             continue
+        elif instances.is_recent(block["blocker"]):
+            logger.debug("block[blocker]='%s' has been recently crawled - SKIPPED!", block["blocker"])
+            continue
 
         # Fetch this URL
         logger.info("Fetching csv_url='%s' for blocker='%s' ...", block['csv_url'], block["blocker"])
@@ -794,6 +812,9 @@ def fetch_oliphant(args: argparse.Namespace) -> int:
                 if not utils.is_domain_wanted(domain):
                     logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
                     continue
+                elif instances.is_recent(domain):
+                    logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
+                    continue
 
                 logger.debug("Marking domain='%s' as handled", domain)
                 domains.append(domain)
@@ -835,6 +856,9 @@ def fetch_txt(args: argparse.Namespace) -> int:
                 elif not utils.is_domain_wanted(domain):
                     logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
                     continue
+                elif instances.is_recent(domain):
+                    logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
+                    continue
 
                 logger.debug("domain='%s',row[blocker]='%s'", domain, row["blocker"])
                 processed = utils.process_domain(domain, row["blocker"], inspect.currentframe().f_code.co_name)
@@ -876,6 +900,9 @@ def fetch_fedipact(args: argparse.Namespace) -> int:
             elif instances.is_registered(domain):
                 logger.debug("domain='%s' is already registered - SKIPPED!", domain)
                 continue
+            elif instances.is_recent(domain):
+                logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
+                continue
 
             logger.info("Fetching domain='%s' ...", domain)
             federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)