]> git.mxchange.org Git - hub.git/blobdiff - application/hub/main/filter/task/crawler/class_CrawlerTaskHandlerInitializerFilter.php
Register the task handler after it has been created as some tasks may need it
[hub.git] / application / hub / main / filter / task / crawler / class_CrawlerTaskHandlerInitializerFilter.php
index 799f9b60a9eecdb51fbf610c5de824642e573af4..1a5ee9eb7cc8c25f42c85417088c72779b612915 100644 (file)
@@ -52,7 +52,7 @@ class CrawlerTaskHandlerInitializerFilter extends BaseCrawlerFilter implements F
         * @param       $responseInstance       An instance of a class with an Responseable interface
         * @return      void
         * @throws      FilterChainException    If the filter chain needs to be interrupted
-        * @todo        5% done
+        * @todo        10% done
         */
        public function execute (Requestable $requestInstance, Responseable $responseInstance) {
                // Get crawler instance
@@ -61,6 +61,9 @@ class CrawlerTaskHandlerInitializerFilter extends BaseCrawlerFilter implements F
                // Get a new task handler instance
                $handlerInstance = ObjectFactory::createObjectByConfiguredName('task_handler_class');
 
+               // Put the task handler in registry
+               Registry::getRegistry()->addInstance('task_handler', $handlerInstance);
+
                /*
                 * Register all tasks:
                 *
@@ -69,16 +72,50 @@ class CrawlerTaskHandlerInitializerFilter extends BaseCrawlerFilter implements F
                $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_node_communicator_task_class');
                $handlerInstance->registerTask('crawler_node_communicator', $taskInstance);
 
-               // 2) Local URL fetcher (fetches URLs locally and adds them to the analyzer's input stack)
-               $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_local_url_fetcher_task_class');
-               $handlerInstance->registerTask('crawler_local_url_fetcher', $taskInstance);
+               // 2) Local URL crawler (gets URLs locally and adds them to the analyzer's input stack)
+               $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_local_url_crawler_task_class');
+               $handlerInstance->registerTask('crawler_local_url_crawler', $taskInstance);
 
-               // 3) Remote URL fetcher (let fetch URLs by other crawler nodes and also adds them to the stack)
-               $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_remote_url_fetcher_task_class');
-               $handlerInstance->registerTask('crawler_remote_url_fetcher', $taskInstance);
+               // 3) Remote URL crawler (gets URLs locally for other nodes, also includes the crawled URL in local index)
+               $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_remote_url_crawler_task_class');
+               $handlerInstance->registerTask('crawler_remote_url_crawler', $taskInstance);
 
-               // Put the task handler in registry
-               Registry::getRegistry()->addInstance('task_handler', $handlerInstance);
+               // 4) Remote-crawl publisher (publishes crawl jobs for remote retrieval)
+               $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_remote_job_publisher_task_class');
+               $handlerInstance->registerTask('crawler_remote_job_publisher', $taskInstance);
+
+               // 5) MIME sniffer
+               $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_mime_sniffer_task_class');
+               $handlerInstance->registerTask('crawler_mime_sniffer', $taskInstance);
+
+               // 6) Document parser (converts document to meta format)
+               $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_document_parser_task_class');
+               $handlerInstance->registerTask('crawler_document_parser', $taskInstance);
+
+               // 7) Document structure analyzer
+               $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_structure_analyzer_task_class');
+               $handlerInstance->registerTask('crawler_structure_analyzer', $taskInstance);
+
+               // 8) Snippet extractor
+               $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_snippet_extractor_task_class');
+               $handlerInstance->registerTask('crawler_snippet_extractor', $taskInstance);
+
+               // 9) Node ping
+               $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_ping_task_class');
+               $handlerInstance->registerTask('crawler_ping', $taskInstance);
+
+               // 10) URL sources
+               foreach (explode(':', $this->getConfigInstance()->getConfigEntry('crawler_url_stacks')) as $stack) {
+                       // Init task instance
+                       $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_url_source_' . $stack . '_task_class');
+
+                       // And register it
+                       $handlerInstance->registerTask('crawler_url_source_' . $stack, $taskInstance);
+               } // END - foreach
+
+               // 11) Uploaded list scanner (checks for wanted files)
+               $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_uploaded_list_scanner_task_class');
+               $handlerInstance->registerTask('crawler_uploaded_list_scanner', $taskInstance);
        }
 }