$taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_node_communicator_task_class');
$handlerInstance->registerTask('crawler_node_communicator', $taskInstance);
- // 2) Local URL getter (gets URLs locally and adds them to the analyzer's input stack)
- $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_local_url_getter_task_class');
- $handlerInstance->registerTask('crawler_local_url_getter', $taskInstance);
+ // 2) Local URL crawler (gets URLs locally and adds them to the analyzer's input stack)
+ $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_local_url_crawler_task_class');
+ $handlerInstance->registerTask('crawler_local_url_crawler', $taskInstance);
- // 3) Remote URL getter (gets URLs locally for other nodes, also includes the crawled URL in local index)
- $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_remote_url_getter_task_class');
- $handlerInstance->registerTask('crawler_remote_url_getter', $taskInstance);
+ // 3) Remote URL crawler (gets URLs locally for other nodes, also includes the crawled URL in local index)
+ $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_remote_url_crawler_task_class');
+ $handlerInstance->registerTask('crawler_remote_url_crawler', $taskInstance);
// 4) Remote-crawl publisher (publishes crawl jobs for remote retrieval)
$taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_remote_job_publisher_task_class');
$taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_ping_task_class');
$handlerInstance->registerTask('crawler_ping', $taskInstance);
- // 10) URL source: local start
- $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_url_source_local_start_task_class');
- $handlerInstance->registerTask('crawler_url_source_local_start', $taskInstance);
+ // 10) URL sources
+ foreach (explode(':', $this->getConfigInstance()->getConfigEntry('crawler_url_stacks')( as $stack) {
+ // Init task instance
+ $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_url_source_' . $stack . '_task_class');
- // 11) URL source: uploaded list
- $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_url_source_uploaded_list_task_class');
- $handlerInstance->registerTask('crawler_url_source_uploaded_list', $taskInstance);
+ // And register it
+ $handlerInstance->registerTask('crawler_url_source_' . $stack, $taskInstance);
+ } // END - foreach
- // 12) URL source: RSS feed
- $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_url_source_rss_start_task_class');
- $handlerInstance->registerTask('crawler_url_source_rss_start', $taskInstance);
-
- // 13) URL source: found RSS/ATOM feed
- $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_url_source_found_rss_task_class');
- $handlerInstance->registerTask('crawler_url_source_found_rss', $taskInstance);
-
- // 14) Uploaded list scanner (checks for wanted files)
+ // 11) Uploaded list scanner (checks for wanted files)
$taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_uploaded_list_scanner_task_class');
$handlerInstance->registerTask('crawler_uploaded_list_scanner', $taskInstance);