]> git.mxchange.org Git - fba.git/commitdiff
Continued:
authorRoland Häder <roland@mxchange.org>
Sun, 2 Jul 2023 07:09:30 +0000 (09:09 +0200)
committerRoland Häder <roland@mxchange.org>
Sun, 2 Jul 2023 07:11:35 +0000 (09:11 +0200)
- added command fetch_fedilist()
- please don't overdose these commands, fetching instances is limited to only
  not recently fetched but from static websites like fediverse.observer is NOT
  limited!)
- flush pending data here, too

fba/boot.py
fba/commands.py
fba/http/federation.py

index dc53f92b01ac68ae919c91aa3d6a6f16be1ae986..07b4a6093a0ce79125a3681586fe45ded782878f 100644 (file)
@@ -172,6 +172,14 @@ def init_parser():
     )
     parser.set_defaults(command=commands.check_nodeinfo)
 
+    ### Fetch CSV from fedilist.com ###
+    parser = subparser_command.add_parser(
+        "fetch_fedilist",
+        help="Fetches CSV from fedilist.com",
+    )
+    parser.set_defaults(command=commands.fetch_fedilist)
+    parser.add_argument("--software", help="Name of software, e.g. 'lemmy'")
+
     logger.debug("EXIT!")
 
 def run_command():
index fc5de72c60f58a785b0cbce34fd261786db2605a..7e363115077cfa0a142d8ee898a0e63ba3a4d6e0 100644 (file)
@@ -1279,3 +1279,47 @@ def recheck_obfuscation(args: argparse.Namespace) -> int:
 
     logger.debug("Success! - EXIT!")
     return 0
+
+def fetch_fedilist(args: argparse.Namespace) -> int:
+    logger.debug("args[]='%s' - CALLED!", type(args))
+
+    url = "http://demo.fedilist.com/instance/csv?onion=not"
+    if args.software is not None and args.software != "":
+        logger.debug("args.software='%s'", args.software)
+        url = f"http://demo.fedilist.com/instance/csv?software={args.software}&onion=not"
+
+    locking.acquire()
+
+    logger.info("Fetching url='%s' from fedilist.com ...", url)
+    response = reqto.get(
+        url,
+        headers=network.web_headers,
+        timeout=(config.get("connection_timeout"), config.get("read_timeout")),
+        allow_redirects=False
+    )
+
+    logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
+    reader = csv.DictReader(response.content.decode('utf-8').splitlines(), dialect="unix")
+
+    logger.debug("reader[]='%s'", type(reader))
+    blockdict = list()
+    for row in reader:
+        logger.debug("row[]='%s'", type(row))
+        domain = tidyup.domain(row["hostname"])
+        logger.debug("domain='%s' - AFTER!", domain)
+
+        if domain == "":
+            logger.debug("domain is empty after tidyup: row[hostname]='%s' - SKIPPED!", row["hostname"])
+            continue
+        elif not utils.is_domain_wanted(domain):
+            logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
+            continue
+        elif instances.is_recent(domain):
+            logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
+            continue
+
+        logger.info("Fetching instances from domain='%s' ...", domain)
+        federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
+
+    logger.debug("Success! - EXIT!")
+    return 0
index 4e3881505ee5dd270c6432bf60595c73836ea08c..09397221bc7bb3a4cd06d94262acc5b78a20556e 100644 (file)
@@ -63,6 +63,7 @@ def fetch_instances(domain: str, origin: str, software: str, command: str, path:
     elif not isinstance(software, str):
         raise ValueError(f"Parameter software[]='{type(software)}' is not 'str'")
 
+    logger.debug("Checking if domain='%s' is registered ...", domain)
     if not instances.is_registered(domain):
         logger.debug("Adding new domain='%s',origin='%s',command='%s',path='%s',software='%s'", domain, origin, command, path, software)
         instances.add(domain, origin, command, path, software)