]> git.mxchange.org Git - fba.git/commitdiff
Continued:
authorRoland Häder <roland@mxchange.org>
Wed, 21 Jun 2023 13:36:10 +0000 (15:36 +0200)
committerRoland Häder <roland@mxchange.org>
Wed, 21 Jun 2023 13:40:44 +0000 (15:40 +0200)
- added command fetch_fedipact() where some instances might be gathered (who
  are happy to block)
- a warning was to noisy for missing keys

fba/boot.py
fba/commands.py
fba/http/federation.py

index 0e083b81cdd6c8fa01cc9846a3075348a94ca063..aed135a814a310663faca5aaae803e6106ad1e30 100644 (file)
@@ -122,6 +122,13 @@ def init_parser():
     )
     parser.set_defaults(command=commands.fetch_observer)
 
+    ### Fetch instances from fedipact.online ###
+    parser = subparser_command.add_parser(
+        "fetch_fedipact",
+        help="Fetches blocks from fedipact.online.",
+    )
+    parser.set_defaults(command=commands.fetch_fedipact)
+
     logger.debug("init_parser(): EXIT!")
 
 def run_command():
index a55e8a35a31e9d98290579f3c06dd6a53e99ac8b..ab82b24a278a39efa58eb61027bd23c43b18025e 100644 (file)
@@ -671,7 +671,7 @@ def fetch_oliphant(args: argparse.Namespace):
         logger.info(f"Fetching csv_url='{block['csv_url']}' for blocker='{block['blocker']}' ...")
         response = fba.fetch_url(f"{base_url}/{block['csv_url']}", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
 
-        logger.debug("response[]='%s'", type(response))
+        logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
         if response.ok and response.content != "":
             logger.debug(f"Fetched {len(response.content)} Bytes, parsing CSV ...")
             reader = csv.DictReader(response.content.decode('utf-8').splitlines(), dialect="unix")
@@ -724,8 +724,8 @@ def fetch_txt(args: argparse.Namespace):
         logger.debug("Fetching url='%s' ...", url)
         response = fba.fetch_url(url, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
 
-        logger.debug("response[]='%s'", type(response))
-        if response.ok and response.text != "":
+        logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
+        if response.ok and response.status_code < 300 and response.text != "":
             logger.debug(f"Returned {len(response.text.strip())} Bytes for processing")
             domains = response.text.split("\n")
 
@@ -757,3 +757,47 @@ def fetch_txt(args: argparse.Namespace):
                     continue
 
     logger.debug("EXIT!")
+
+def fetch_fedipact(args: argparse.Namespace):
+    logger.debug("args[]='%s' - CALLED!", type(args))
+    locking.acquire()
+
+    response = fba.fetch_url("https://fedipact.online", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
+
+    logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
+    if response.ok and response.status_code < 300 and response.text != "":
+        logger.debug("Parsing %d Bytes ...", len(response.text))
+
+        doc = bs4.BeautifulSoup(response.text, "html.parser")
+        logger.debug("doc[]='%s'", type(doc))
+
+        rows = doc.findAll("li")
+        logger.info("Checking %d row(s) ...", len(rows))
+        for row in rows:
+            logger.debug("row[]='%s'", type(row))
+            domain = tidyup.domain(row.contents[0])
+
+            logger.debug("domain='%s'", domain)
+            if domain == "":
+                logger.debug("domain is empty - SKIPPED!")
+                continue
+            elif not validators.domain(domain):
+                logger.warning("domain='%s' is not a valid domain name - SKIPPED!", domain)
+                continue
+            elif domain.endswith(".arpa"):
+                logger.debug("domain='%s' is a domain for reversed IP addresses - SKIPPED!", domain)
+                continue
+            elif domain.endswith(".tld"):
+                logger.debug("domain='%s' is a fake domain - SKIPPED!", domain)
+                continue
+            elif blacklist.is_blacklisted(domain):
+                logger.debug("domain='%s' is blacklisted - SKIPPED!", domain)
+                continue
+            elif instances.is_registered(domain):
+                logger.debug("domain='%s' is already registered - SKIPPED!", domain)
+                continue
+
+            logger.info("Fetching domain='%s' ...", domain)
+            federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
+
+    logger.debug("EXIT!")
index a7b2db4621c87d2738053ed6158c1a06a2da7a15..a8291c7e01b22d4d347666545ca9d262282c9ec1 100644 (file)
@@ -645,7 +645,7 @@ def add_peers(rows: dict) -> list:
     for key in ["linked", "allowed", "blocked"]:
         logger.debug(f"Checking key='{key}'")
         if key not in rows or rows[key] is None:
-            logger.warning(f"Cannot find key='{key}' or it is NoneType - SKIPPED!")
+            logger.debug(f"Cannot find key='{key}' or it is NoneType - SKIPPED!")
             continue
 
         logger.debug(f"Adding {len(rows[key])} peer(s) to peers list ...")