* @param $responseInstance An instance of a class with an Responseable interface
* @return void
* @throws FilterChainException If the filter chain needs to be interrupted
- * @todo 5% done
+ * @todo 10% done
*/
public function execute (Requestable $requestInstance, Responseable $responseInstance) {
// Get crawler instance
// Get a new task handler instance
$handlerInstance = ObjectFactory::createObjectByConfiguredName('task_handler_class');
+ // Put the task handler in registry
+ Registry::getRegistry()->addInstance('task_handler', $handlerInstance);
+
/*
* Register all tasks:
*
$taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_node_communicator_task_class');
$handlerInstance->registerTask('crawler_node_communicator', $taskInstance);
- // 2) Local URL fetcher (fetches URLs locally and adds them to the analyzer's input stack)
- $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_local_url_fetcher_task_class');
- $handlerInstance->registerTask('crawler_local_url_fetcher', $taskInstance);
+ // 2) Local URL crawler (gets URLs locally and adds them to the analyzer's input stack)
+ $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_local_url_crawler_task_class');
+ $handlerInstance->registerTask('crawler_local_url_crawler', $taskInstance);
- // 3) Remote URL fetcher (let fetch URLs by other crawler nodes and also adds them to the stack)
- $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_remote_url_fetcher_task_class');
- $handlerInstance->registerTask('crawler_remote_url_fetcher', $taskInstance);
+ // 3) Remote URL crawler (gets URLs locally for other nodes, also includes the crawled URL in local index)
+ $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_remote_url_crawler_task_class');
+ $handlerInstance->registerTask('crawler_remote_url_crawler', $taskInstance);
- // Put the task handler in registry
- Registry::getRegistry()->addInstance('task_handler', $handlerInstance);
+ // 4) Remote-crawl publisher (publishes crawl jobs for remote retrieval)
+ $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_remote_job_publisher_task_class');
+ $handlerInstance->registerTask('crawler_remote_job_publisher', $taskInstance);
+
+ // 5) MIME sniffer
+ $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_mime_sniffer_task_class');
+ $handlerInstance->registerTask('crawler_mime_sniffer', $taskInstance);
+
+ // 6) Document parser (converts document to meta format)
+ $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_document_parser_task_class');
+ $handlerInstance->registerTask('crawler_document_parser', $taskInstance);
+
+ // 7) Document structure analyzer
+ $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_structure_analyzer_task_class');
+ $handlerInstance->registerTask('crawler_structure_analyzer', $taskInstance);
+
+ // 8) Snippet extractor
+ $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_snippet_extractor_task_class');
+ $handlerInstance->registerTask('crawler_snippet_extractor', $taskInstance);
+
+ // 9) Node ping
+ $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_ping_task_class');
+ $handlerInstance->registerTask('crawler_ping', $taskInstance);
+
+ // 10) URL sources
+ foreach (explode(':', $this->getConfigInstance()->getConfigEntry('crawler_url_stacks')) as $stack) {
+ // Init task instance
+ $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_url_source_' . $stack . '_task_class');
+
+ // And register it
+ $handlerInstance->registerTask('crawler_url_source_' . $stack, $taskInstance);
+ } // END - foreach
+
+ // 11) Uploaded list scanner (checks for wanted files)
+ $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_uploaded_list_scanner_task_class');
+ $handlerInstance->registerTask('crawler_uploaded_list_scanner', $taskInstance);
}
}