help="Fetches instances (aka. \"domains\") from an initial instance. You may want to re-run this command several times (at least 3 with big instances) to have a decent amount of valid instances.",
)
parser.set_defaults(command=commands.fetch_instances)
- parser.add_argument("--domain", required=True, help="Instance name (aka. domain) to fetch further instances from. Start with a large instance, e.g. mastodon.social .")
+ parser.add_argument("--domain", help="Instance name (aka. domain) to fetch further instances from. Start with a large instance, e.g. mastodon.social .")
+ parser.add_argument("--software", help="Name of software, e.g. 'lemmy'")
parser.add_argument("--single", action="store_true", help="Only fetch given instance.")
### Fetch blocks from static text file(s) ###
rows = list()
# Is domain or software set?
- if args.domain != "":
+ if args.domain not in [None, ""]:
logger.debug("args.domain='%s' - checking ...", args.domain)
if not validators.domain(args.domain):
logger.warning("args.domain='%s' is not valid.", args.domain)
# Fetch record
database.cursor.execute("SELECT domain, origin, software FROM instances WHERE domain = ? LIMIT 1", [domain])
rows = database.cursor.fetchall()
+ elif args.software not in [None, ""]:
+ logger.debug("args.software='%s' - BEFORE!", args.software)
+ software = software_helper.alias(args.software)
+ logger.debug("software='%s' - AFTER!", software)
+
+ # Fetch records
+ database.cursor.execute("SELECT domain, origin, software FROM instances WHERE software = ? ORDER BY last_updated ASC", [software])
+ rows = database.cursor.fetchall()
logger.info("Checking %d entries ...", len(rows))
for row in rows:
elif instances.is_registered(row["domain"]) and software_helper.is_relay(row["software"]):
logger.warning("row[domain]='%s' is of software type '%s' which is not supported by this command. Please invoke fetch_relays instead - SKIPPED!", row["domain"], row["software"])
continue
+ elif instances.is_recent(row["domain"]):
+ logger.debug("row[domain]='%s' has been recently crawled - SKIPPED!", row["domain"])
+ continue
# Initial fetch
try: