3 * A TaskHandlerInitializer filter for crawler
5 * @author Roland Haeder <webmaster@shipsimu.org>
7 * @copyright Copyright (c) 2014 Crawler Developer Team
8 * @license GNU GPL 3.0 or any newer version
9 * @link http://www.shipsimu.org
11 * This program is free software: you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation, either version 3 of the License, or
14 * (at your option) any later version.
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
21 * You should have received a copy of the GNU General Public License
22 * along with this program. If not, see <http://www.gnu.org/licenses/>.
24 class CrawlerTaskHandlerInitializerFilter extends BaseCrawlerFilter implements Filterable {
26 * Protected constructor
30 protected function __construct () {
31 // Call parent constructor
32 parent::__construct(__CLASS__);
36 * Creates an instance of this filter class
38 * @return $filterInstance An instance of this filter class
40 public static final function createCrawlerTaskHandlerInitializerFilter () {
42 $filterInstance = new CrawlerTaskHandlerInitializerFilter();
44 // Return the instance
45 return $filterInstance;
49 * Executes the filter with given request and response objects
51 * @param $requestInstance An instance of a class with an Requestable interface
52 * @param $responseInstance An instance of a class with an Responseable interface
54 * @throws FilterChainException If the filter chain needs to be interrupted
57 public function execute (Requestable $requestInstance, Responseable $responseInstance) {
58 // Get crawler instance
59 //$crawlerInstance = Registry::getRegistry()->getInstance('crawler');
61 // Get a new task handler instance
62 $handlerInstance = ObjectFactory::createObjectByConfiguredName('task_handler_class');
67 * 1) "Communicator" for crawler->node communication
69 $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_node_communicator_task_class');
70 $handlerInstance->registerTask('crawler_node_communicator', $taskInstance);
72 // 2) Local URL crawler (gets URLs locally and adds them to the analyzer's input stack)
73 $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_local_url_crawler_task_class');
74 $handlerInstance->registerTask('crawler_local_url_crawler', $taskInstance);
76 // 3) Remote URL crawler (gets URLs locally for other nodes, also includes the crawled URL in local index)
77 $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_remote_url_crawler_task_class');
78 $handlerInstance->registerTask('crawler_remote_url_crawler', $taskInstance);
80 // 4) Remote-crawl publisher (publishes crawl jobs for remote retrieval)
81 $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_remote_job_publisher_task_class');
82 $handlerInstance->registerTask('crawler_remote_job_publisher', $taskInstance);
85 $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_mime_sniffer_task_class');
86 $handlerInstance->registerTask('crawler_mime_sniffer', $taskInstance);
88 // 6) Document parser (converts document to meta format)
89 $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_document_parser_task_class');
90 $handlerInstance->registerTask('crawler_document_parser', $taskInstance);
92 // 7) Document structure analyzer
93 $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_structure_analyzer_task_class');
94 $handlerInstance->registerTask('crawler_structure_analyzer', $taskInstance);
96 // 8) Snippet extractor
97 $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_snippet_extractor_task_class');
98 $handlerInstance->registerTask('crawler_snippet_extractor', $taskInstance);
101 $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_ping_task_class');
102 $handlerInstance->registerTask('crawler_ping', $taskInstance);
105 foreach (explode(':', $this->getConfigInstance()->getConfigEntry('crawler_url_stacks')) as $stack) {
106 // Init task instance
107 $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_url_source_' . $stack . '_task_class');
110 $handlerInstance->registerTask('crawler_url_source_' . $stack, $taskInstance);
113 // 11) Uploaded list scanner (checks for wanted files)
114 $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_uploaded_list_scanner_task_class');
115 $handlerInstance->registerTask('crawler_uploaded_list_scanner', $taskInstance);
117 // Put the task handler in registry
118 Registry::getRegistry()->addInstance('task_handler', $handlerInstance);