]> git.mxchange.org Git - hub.git/blobdiff - application/hub/main/filter/task/crawler/class_CrawlerTaskHandlerInitializerFilter.php
Now all URL sources (stacks) are initialized in a loop. This config entry will
[hub.git] / application / hub / main / filter / task / crawler / class_CrawlerTaskHandlerInitializerFilter.php
index 46a84ffef9ef7c10ae24524dbb7ee479332fdb98..36fe3946c9a95b5cf68ac8e97105d6246704cfc1 100644 (file)
@@ -69,13 +69,13 @@ class CrawlerTaskHandlerInitializerFilter extends BaseCrawlerFilter implements F
                $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_node_communicator_task_class');
                $handlerInstance->registerTask('crawler_node_communicator', $taskInstance);
 
-               // 2) Local URL getter (gets URLs locally and adds them to the analyzer's input stack)
-               $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_local_url_getter_task_class');
-               $handlerInstance->registerTask('crawler_local_url_getter', $taskInstance);
+               // 2) Local URL crawler (gets URLs locally and adds them to the analyzer's input stack)
+               $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_local_url_crawler_task_class');
+               $handlerInstance->registerTask('crawler_local_url_crawler', $taskInstance);
 
-               // 3) Remote URL getter (gets URLs locally for other nodes, also includes the crawled URL in local index)
-               $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_remote_url_getter_task_class');
-               $handlerInstance->registerTask('crawler_remote_url_getter', $taskInstance);
+               // 3) Remote URL crawler (gets URLs locally for other nodes, also includes the crawled URL in local index)
+               $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_remote_url_crawler_task_class');
+               $handlerInstance->registerTask('crawler_remote_url_crawler', $taskInstance);
 
                // 4) Remote-crawl publisher (publishes crawl jobs for remote retrieval)
                $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_remote_job_publisher_task_class');
@@ -101,23 +101,16 @@ class CrawlerTaskHandlerInitializerFilter extends BaseCrawlerFilter implements F
                $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_ping_task_class');
                $handlerInstance->registerTask('crawler_ping', $taskInstance);
 
-               // 10) URL source: local start
-               $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_url_source_local_start_task_class');
-               $handlerInstance->registerTask('crawler_url_source_local_start', $taskInstance);
+               // 10) URL sources
+               foreach (explode(':', $this->getConfigInstance()->getConfigEntry('crawler_url_stacks')( as $stack) {
+                       // Init task instance
+                       $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_url_source_' . $stack . '_task_class');
 
-               // 11) URL source: uploaded list
-               $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_url_source_uploaded_list_task_class');
-               $handlerInstance->registerTask('crawler_url_source_uploaded_list', $taskInstance);
+                       // And register it
+                       $handlerInstance->registerTask('crawler_url_source_' . $stack, $taskInstance);
+               } // END - foreach
 
-               // 12) URL source: RSS feed
-               $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_url_source_rss_start_task_class');
-               $handlerInstance->registerTask('crawler_url_source_rss_start', $taskInstance);
-
-               // 13) URL source: found RSS/ATOM feed
-               $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_url_source_found_rss_task_class');
-               $handlerInstance->registerTask('crawler_url_source_found_rss', $taskInstance);
-
-               // 14) Uploaded list scanner (checks for wanted files)
+               // 11) Uploaded list scanner (checks for wanted files)
                $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_uploaded_list_scanner_task_class');
                $handlerInstance->registerTask('crawler_uploaded_list_scanner', $taskInstance);