for row in rows:
logger.debug("row[]='%s'", type(row))
if "domain" not in row:
- logger.warning("row='%s' does not contain element 'domain' - SKIPPED!")
+ logger.warning("row='%s' does not contain element 'domain' - SKIPPED!", row)
continue
- elif not utils.is_domain_wanted(row['domain']):
- logger.debug("row[domain]='%s' is not wanted - SKIPPED!", row['domain'])
+ elif not utils.is_domain_wanted(row["domain"]):
+ logger.debug("row[domain]='%s' is not wanted - SKIPPED!", row["domain"])
continue
- elif instances.is_registered(row['domain']):
- logger.debug("row[domain]='%s' is already registered - SKIPPED!", row['domain'])
+ elif instances.is_registered(row["domain"]):
+ logger.debug("row[domain]='%s' is already registered - SKIPPED!", row["domain"])
+ continue
+ elif instances.is_recent(row["domain"]):
+ logger.debug("row[domain]='%s' has been recently crawled - SKIPPED!", row["domain"])
continue
- logger.debug("Fetching instances from row[domain]='%s' ...", row['domain'])
- federation.fetch_instances(row['domain'], None, None, inspect.currentframe().f_code.co_name)
+ logger.debug("Fetching instances from row[domain]='%s' ...", row["domain"])
+ federation.fetch_instances(row["domain"], None, None, inspect.currentframe().f_code.co_name)
except network.exceptions as exception:
logger.warning("Cannot fetch graphql,exception[%s]:'%s' - EXIT!", type(exception), str(exception))
logger.debug("entry[domain]='%s' is not wanted - SKIPPED!")
continue
elif instances.is_registered(entry["domain"]):
- logger.debug("domain='%s' is already registered - SKIPPED!", entry['domain'])
+ logger.debug("entry[domain]='%s' is already registered - SKIPPED!", entry["domain"])
+ continue
+ elif instances.is_recent(entry["domain"]):
+ logger.debug("entry[domain]='%s' has been recently crawled - SKIPPED!", entry["domain"])
continue
- logger.debug("Adding domain='%s' ...", entry['domain'])
+ logger.debug("Adding domain='%s' ...", entry["domain"])
domains.append(entry["domain"])
except network.exceptions as exception:
for row in domains[block_level]:
logger.debug("row[%s]='%s'", type(row), row)
- if not instances.is_registered(row["domain"]):
+ if instances.is_recent(row["domain"], "last_blocked"):
+ logger.debug("row[domain]='%s' has been recently crawled - SKIPPED!", row["domain"])
+ continue
+ elif not instances.is_registered(row["domain"]):
try:
logger.info("Fetching instances from domain='%s' ...", row["domain"])
federation.fetch_instances(row["domain"], 'chaos.social', None, inspect.currentframe().f_code.co_name)
logger.debug("item='%s'", item)
domain = item.link.split("=")[1]
- if blacklist.is_blacklisted(domain):
- logger.debug("domain='%s' is blacklisted - SKIPPED!", domain)
+ if not utils.is_domain_wanted(domain):
+ logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
continue
elif domain in domains:
logger.debug("domain='%s' is already added - SKIPPED!", domain)
elif instances.is_registered(domain):
logger.debug("domain='%s' is already registered - SKIPPED!", domain)
continue
+ elif instances.is_recent(domain):
+ logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
+ continue
logger.debug("Adding domain='%s'", domain)
domains.append(domain)
elif instances.is_registered(domain):
logger.debug("domain='%s' is already registered - SKIPPED!", domain)
continue
+ elif instances.is_recent(domain):
+ logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
+ continue
logger.debug("Adding domain='%s',domains()=%d", domain, len(domains))
domains.append(domain)
- logger.debug("domains(%d)='%s", len(domains), domains)
+ logger.debug("domains()='%d", len(domains))
if len(domains) > 0:
locking.acquire()
for row in rows:
logger.debug("domain='%s'", row[0])
if not utils.is_domain_wanted(row[0]):
- logger.debug("Domain is not wanted: row[0]='%s'", row[0])
+ logger.debug("Domain row[0]='%s' is not wanted - SKIPPED!", row[0])
continue
try:
elif args.domain in domains:
logger.debug("args.domain='%s' already handled - SKIPPED!", args.domain)
continue
+ elif instances.is_recent(block["blocker"]):
+ logger.debug("block[blocker]='%s' has been recently crawled - SKIPPED!", block["blocker"])
+ continue
# Fetch this URL
logger.info("Fetching csv_url='%s' for blocker='%s' ...", block['csv_url'], block["blocker"])
if not utils.is_domain_wanted(domain):
logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
continue
+ elif instances.is_recent(domain):
+ logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
+ continue
logger.debug("Marking domain='%s' as handled", domain)
domains.append(domain)
elif not utils.is_domain_wanted(domain):
logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
continue
+ elif instances.is_recent(domain):
+ logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
+ continue
logger.debug("domain='%s',row[blocker]='%s'", domain, row["blocker"])
processed = utils.process_domain(domain, row["blocker"], inspect.currentframe().f_code.co_name)
elif instances.is_registered(domain):
logger.debug("domain='%s' is already registered - SKIPPED!", domain)
continue
+ elif instances.is_recent(domain):
+ logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
+ continue
logger.info("Fetching domain='%s' ...", domain)
federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)