From: Roland Häder Date: Thu, 8 Feb 2024 17:00:57 +0000 (+0100) Subject: Continued: X-Git-Url: https://git.mxchange.org/?a=commitdiff_plain;h=618a7b0293a20f9f19608523450ae6178e3c2322;p=fba.git Continued: - make --domain parameter for command fetch_instances optional so --software can be handled, too - skip recently crawled domains in same loop - args. can be both of type None, too --- diff --git a/fba/boot.py b/fba/boot.py index ea31d7e..0fb55ec 100644 --- a/fba/boot.py +++ b/fba/boot.py @@ -144,7 +144,8 @@ def init_parser(): help="Fetches instances (aka. \"domains\") from an initial instance. You may want to re-run this command several times (at least 3 with big instances) to have a decent amount of valid instances.", ) parser.set_defaults(command=commands.fetch_instances) - parser.add_argument("--domain", required=True, help="Instance name (aka. domain) to fetch further instances from. Start with a large instance, e.g. mastodon.social .") + parser.add_argument("--domain", help="Instance name (aka. domain) to fetch further instances from. Start with a large instance, e.g. mastodon.social .") + parser.add_argument("--software", help="Name of software, e.g. 'lemmy'") parser.add_argument("--single", action="store_true", help="Only fetch given instance.") ### Fetch blocks from static text file(s) ### diff --git a/fba/commands.py b/fba/commands.py index 561815c..6999e2a 100644 --- a/fba/commands.py +++ b/fba/commands.py @@ -962,7 +962,7 @@ def fetch_instances(args: argparse.Namespace) -> int: rows = list() # Is domain or software set? - if args.domain != "": + if args.domain not in [None, ""]: logger.debug("args.domain='%s' - checking ...", args.domain) if not validators.domain(args.domain): logger.warning("args.domain='%s' is not valid.", args.domain) @@ -978,6 +978,14 @@ def fetch_instances(args: argparse.Namespace) -> int: # Fetch record database.cursor.execute("SELECT domain, origin, software FROM instances WHERE domain = ? LIMIT 1", [domain]) rows = database.cursor.fetchall() + elif args.software not in [None, ""]: + logger.debug("args.software='%s' - BEFORE!", args.software) + software = software_helper.alias(args.software) + logger.debug("software='%s' - AFTER!", software) + + # Fetch records + database.cursor.execute("SELECT domain, origin, software FROM instances WHERE software = ? ORDER BY last_updated ASC", [software]) + rows = database.cursor.fetchall() logger.info("Checking %d entries ...", len(rows)) for row in rows: @@ -988,6 +996,9 @@ def fetch_instances(args: argparse.Namespace) -> int: elif instances.is_registered(row["domain"]) and software_helper.is_relay(row["software"]): logger.warning("row[domain]='%s' is of software type '%s' which is not supported by this command. Please invoke fetch_relays instead - SKIPPED!", row["domain"], row["software"]) continue + elif instances.is_recent(row["domain"]): + logger.debug("row[domain]='%s' has been recently crawled - SKIPPED!", row["domain"]) + continue # Initial fetch try: