+++ /dev/null
-<?php
-/**
- * A TaskHandlerInitializer filter for crawler
- *
- * @author Roland Haeder <webmaster@shipsimu.org>
- * @version 0.0.0
- * @copyright Copyright (c) 2014 Crawler Developer Team
- * @license GNU GPL 3.0 or any newer version
- * @link http://www.shipsimu.org
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
-class CrawlerTaskHandlerInitializerFilter extends BaseCrawlerFilter implements Filterable {
- /**
- * Protected constructor
- *
- * @return void
- */
- protected function __construct () {
- // Call parent constructor
- parent::__construct(__CLASS__);
- }
-
- /**
- * Creates an instance of this filter class
- *
- * @return $filterInstance An instance of this filter class
- */
- public static final function createCrawlerTaskHandlerInitializerFilter () {
- // Get a new instance
- $filterInstance = new CrawlerTaskHandlerInitializerFilter();
-
- // Return the instance
- return $filterInstance;
- }
-
- /**
- * Executes the filter with given request and response objects
- *
- * @param $requestInstance An instance of a class with an Requestable interface
- * @param $responseInstance An instance of a class with an Responseable interface
- * @return void
- * @throws FilterChainException If the filter chain needs to be interrupted
- * @todo 10% done
- */
- public function execute (Requestable $requestInstance, Responseable $responseInstance) {
- // Get crawler instance
- //$crawlerInstance = Registry::getRegistry()->getInstance('crawler');
-
- // Get a new task handler instance
- $handlerInstance = ObjectFactory::createObjectByConfiguredName('task_handler_class');
-
- // Put the task handler in registry
- Registry::getRegistry()->addInstance('task_handler', $handlerInstance);
-
- /*
- * Register all tasks:
- *
- * 1) "Communicator" for crawler->node communication
- */
- $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_node_communicator_task_class');
- $handlerInstance->registerTask('crawler_node_communicator', $taskInstance);
-
- // 2) Local URL crawler (gets URLs locally and adds them to the analyzer's input stack)
- $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_local_url_crawler_task_class');
- $handlerInstance->registerTask('crawler_local_url_crawler', $taskInstance);
-
- // 3) Remote URL crawler (gets URLs locally for other nodes, also includes the crawled URL in local index)
- $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_remote_url_crawler_task_class');
- $handlerInstance->registerTask('crawler_remote_url_crawler', $taskInstance);
-
- // 4) Remote-crawl publisher (publishes crawl jobs for remote retrieval)
- $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_remote_job_publisher_task_class');
- $handlerInstance->registerTask('crawler_remote_job_publisher', $taskInstance);
-
- // 5) MIME sniffer
- $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_mime_sniffer_task_class');
- $handlerInstance->registerTask('crawler_mime_sniffer', $taskInstance);
-
- // 6) Document parser (converts document to meta format)
- $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_document_parser_task_class');
- $handlerInstance->registerTask('crawler_document_parser', $taskInstance);
-
- // 7) Document structure analyzer
- $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_structure_analyzer_task_class');
- $handlerInstance->registerTask('crawler_structure_analyzer', $taskInstance);
-
- // 8) Snippet extractor
- $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_snippet_extractor_task_class');
- $handlerInstance->registerTask('crawler_snippet_extractor', $taskInstance);
-
- // 9) Node ping
- $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_ping_task_class');
- $handlerInstance->registerTask('crawler_ping', $taskInstance);
-
- // 10) URL sources
- foreach (explode(':', $this->getConfigInstance()->getConfigEntry('crawler_url_stacks')) as $stack) {
- // Init task instance
- $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_url_source_' . $stack . '_task_class');
-
- // And register it
- $handlerInstance->registerTask('crawler_url_source_' . $stack, $taskInstance);
- } // END - foreach
-
- // 11) Uploaded list scanner (checks for wanted files)
- $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_uploaded_list_scanner_task_class');
- $handlerInstance->registerTask('crawler_uploaded_list_scanner', $taskInstance);
- }
-}
-
-// [EOF]
-?>