* @version 0.0.0 * @copyright Copyright (c) 2014 Crawler Developer Team * @license GNU GPL 3.0 or any newer version * @link http://www.shipsimu.org * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ class CrawlerTaskHandlerInitializerFilter extends BaseCrawlerFilter implements Filterable { /** * Protected constructor * * @return void */ protected function __construct () { // Call parent constructor parent::__construct(__CLASS__); } /** * Creates an instance of this filter class * * @return $filterInstance An instance of this filter class */ public static final function createCrawlerTaskHandlerInitializerFilter () { // Get a new instance $filterInstance = new CrawlerTaskHandlerInitializerFilter(); // Return the instance return $filterInstance; } /** * Executes the filter with given request and response objects * * @param $requestInstance An instance of a class with an Requestable interface * @param $responseInstance An instance of a class with an Responseable interface * @return void * @throws FilterChainException If the filter chain needs to be interrupted * @todo 10% done */ public function execute (Requestable $requestInstance, Responseable $responseInstance) { // Get crawler instance //$crawlerInstance = Registry::getRegistry()->getInstance('crawler'); // Get a new task handler instance $handlerInstance = ObjectFactory::createObjectByConfiguredName('task_handler_class'); /* * Register all tasks: * * 1) "Communicator" for crawler->node communication */ $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_node_communicator_task_class'); $handlerInstance->registerTask('crawler_node_communicator', $taskInstance); // 2) Local URL getter (gets URLs locally and adds them to the analyzer's input stack) $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_local_url_getter_task_class'); $handlerInstance->registerTask('crawler_local_url_getter', $taskInstance); // 3) Remote URL getter (gets URLs locally for other nodes, also includes the crawled URL in local index) $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_remote_url_getter_task_class'); $handlerInstance->registerTask('crawler_remote_url_getter', $taskInstance); // 4) Remote-crawl publisher (publishes crawl jobs for remote retrieval) $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_remote_job_publisher_task_class'); $handlerInstance->registerTask('crawler_remote_job_publisher', $taskInstance); // 5) MIME sniffer $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_mime_sniffer_task_class'); $handlerInstance->registerTask('crawler_mime_sniffer', $taskInstance); // 6) Document parser (converts document to meta format) $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_document_parser_task_class'); $handlerInstance->registerTask('crawler_document_parser', $taskInstance); // 7) Document structure analyzer $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_structure_analyzer_task_class'); $handlerInstance->registerTask('crawler_structure_analyzer', $taskInstance); // 8) Snippet extractor $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_snippet_extractor_task_class'); $handlerInstance->registerTask('crawler_snippet_extractor', $taskInstance); // 9) Node ping $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_ping_task_class'); $handlerInstance->registerTask('crawler_ping', $taskInstance); // 10) URL source: local start $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_url_source_local_start_task_class'); $handlerInstance->registerTask('crawler_url_source_local_start', $taskInstance); // 11) URL source: uploaded list $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_url_source_uploaded_list_task_class'); $handlerInstance->registerTask('crawler_url_source_uploaded_list', $taskInstance); // 12) URL source: RSS feed $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_url_source_rss_start_task_class'); $handlerInstance->registerTask('crawler_url_source_rss_start', $taskInstance); // 13) URL source: found RSS/ATOM feed $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_url_source_found_rss_task_class'); $handlerInstance->registerTask('crawler_url_source_found_rss', $taskInstance); // 14) Uploaded list scanner (checks for wanted files) $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_uploaded_list_scanner_task_class'); $handlerInstance->registerTask('crawler_uploaded_list_scanner', $taskInstance); // Put the task handler in registry Registry::getRegistry()->addInstance('task_handler', $handlerInstance); } } // [EOF] ?>