]> git.mxchange.org Git - hub.git/blobdiff - application/hub/main/filter/task/crawler/class_CrawlerTaskHandlerInitializerFilter.php
Renamed fetcher -> getter + added task for remote-crawl jobs.
[hub.git] / application / hub / main / filter / task / crawler / class_CrawlerTaskHandlerInitializerFilter.php
index 799f9b60a9eecdb51fbf610c5de824642e573af4..43a7be491f7b6c994a411a0216c65e3f296d5cfd 100644 (file)
@@ -52,7 +52,7 @@ class CrawlerTaskHandlerInitializerFilter extends BaseCrawlerFilter implements F
         * @param       $responseInstance       An instance of a class with an Responseable interface
         * @return      void
         * @throws      FilterChainException    If the filter chain needs to be interrupted
-        * @todo        5% done
+        * @todo        10% done
         */
        public function execute (Requestable $requestInstance, Responseable $responseInstance) {
                // Get crawler instance
@@ -69,13 +69,17 @@ class CrawlerTaskHandlerInitializerFilter extends BaseCrawlerFilter implements F
                $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_node_communicator_task_class');
                $handlerInstance->registerTask('crawler_node_communicator', $taskInstance);
 
-               // 2) Local URL fetcher (fetches URLs locally and adds them to the analyzer's input stack)
-               $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_local_url_fetcher_task_class');
-               $handlerInstance->registerTask('crawler_local_url_fetcher', $taskInstance);
+               // 2) Local URL getter (gets URLs locally and adds them to the analyzer's input stack)
+               $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_local_url_getter_task_class');
+               $handlerInstance->registerTask('crawler_local_url_getter', $taskInstance);
 
-               // 3) Remote URL fetcher (let fetch URLs by other crawler nodes and also adds them to the stack)
-               $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_remote_url_fetcher_task_class');
-               $handlerInstance->registerTask('crawler_remote_url_fetcher', $taskInstance);
+               // 3) Remote URL getter (gets URLs locally for other nodes, also includes the crawled URL in local index)
+               $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_remote_url_getter_task_class');
+               $handlerInstance->registerTask('crawler_remote_url_getter', $taskInstance);
+
+               // 4) Remote-crawl publisher (publishes crawl jobs for remote retrieval)
+               $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_remote_job_publisher_task_class');
+               $handlerInstance->registerTask('crawler_remote_job_publisher', $taskInstance);
 
                // Put the task handler in registry
                Registry::getRegistry()->addInstance('task_handler', $handlerInstance);