]> git.mxchange.org Git - hub.git/blob - application/hub/main/filter/task/crawler/class_CrawlerTaskHandlerInitializerFilter.php
4ba0ed0b178ef9b93da8490fe0038aa423281d76
[hub.git] / application / hub / main / filter / task / crawler / class_CrawlerTaskHandlerInitializerFilter.php
1 <?php
2 /**
3  * A TaskHandlerInitializer filter for crawler
4  *
5  * @author              Roland Haeder <webmaster@shipsimu.org>
6  * @version             0.0.0
7  * @copyright   Copyright (c) 2014 Crawler Developer Team
8  * @license             GNU GPL 3.0 or any newer version
9  * @link                http://www.shipsimu.org
10  *
11  * This program is free software: you can redistribute it and/or modify
12  * it under the terms of the GNU General Public License as published by
13  * the Free Software Foundation, either version 3 of the License, or
14  * (at your option) any later version.
15  *
16  * This program is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19  * GNU General Public License for more details.
20  *
21  * You should have received a copy of the GNU General Public License
22  * along with this program. If not, see <http://www.gnu.org/licenses/>.
23  */
24 class CrawlerTaskHandlerInitializerFilter extends BaseCrawlerFilter implements Filterable {
25         /**
26          * Protected constructor
27          *
28          * @return      void
29          */
30         protected function __construct () {
31                 // Call parent constructor
32                 parent::__construct(__CLASS__);
33         }
34
35         /**
36          * Creates an instance of this filter class
37          *
38          * @return      $filterInstance         An instance of this filter class
39          */
40         public static final function createCrawlerTaskHandlerInitializerFilter () {
41                 // Get a new instance
42                 $filterInstance = new CrawlerTaskHandlerInitializerFilter();
43
44                 // Return the instance
45                 return $filterInstance;
46         }
47
48         /**
49          * Executes the filter with given request and response objects
50          *
51          * @param       $requestInstance        An instance of a class with an Requestable interface
52          * @param       $responseInstance       An instance of a class with an Responseable interface
53          * @return      void
54          * @throws      FilterChainException    If the filter chain needs to be interrupted
55          * @todo        10% done
56          */
57         public function execute (Requestable $requestInstance, Responseable $responseInstance) {
58                 // Get crawler instance
59                 //$crawlerInstance = Registry::getRegistry()->getInstance('crawler');
60
61                 // Get a new task handler instance
62                 $handlerInstance = ObjectFactory::createObjectByConfiguredName('task_handler_class');
63
64                 /*
65                  * Register all tasks:
66                  *
67                  * 1) "Communicator" for crawler->node communication
68                  */
69                 $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_node_communicator_task_class');
70                 $handlerInstance->registerTask('crawler_node_communicator', $taskInstance);
71
72                 // 2) Local URL getter (gets URLs locally and adds them to the analyzer's input stack)
73                 $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_local_url_getter_task_class');
74                 $handlerInstance->registerTask('crawler_local_url_getter', $taskInstance);
75
76                 // 3) Remote URL getter (gets URLs locally for other nodes, also includes the crawled URL in local index)
77                 $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_remote_url_getter_task_class');
78                 $handlerInstance->registerTask('crawler_remote_url_getter', $taskInstance);
79
80                 // 4) Remote-crawl publisher (publishes crawl jobs for remote retrieval)
81                 $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_remote_job_publisher_task_class');
82                 $handlerInstance->registerTask('crawler_remote_job_publisher', $taskInstance);
83
84                 // 5) MIME sniffer
85                 $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_mime_sniffer_task_class');
86                 $handlerInstance->registerTask('crawler_mime_sniffer', $taskInstance);
87
88                 // 6) Document parser (converts document to meta format)
89                 $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_document_parser_task_class');
90                 $handlerInstance->registerTask('crawler_document_parser', $taskInstance);
91
92                 // 7) Document structure analyzer
93                 $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_structure_analyzer_task_class');
94                 $handlerInstance->registerTask('crawler_structure_analyzer', $taskInstance);
95
96                 // 8) Snippet extractor
97                 $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_snippet_extractor_task_class');
98                 $handlerInstance->registerTask('crawler_snippet_extractor', $taskInstance);
99
100                 // 9) Node ping
101                 $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_ping_task_class');
102                 $handlerInstance->registerTask('crawler_ping', $taskInstance);
103
104                 // 10) URL source: local start
105                 $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_url_source_local_start_task_class');
106                 $handlerInstance->registerTask('crawler_url_source_local_start', $taskInstance);
107
108                 // 11) URL source: uploaded list
109                 $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_url_source_uploaded_list_task_class');
110                 $handlerInstance->registerTask('crawler_url_source_uploaded_list', $taskInstance);
111
112                 // 12) URL source: RSS feed
113                 $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_url_source_rss_start_task_class');
114                 $handlerInstance->registerTask('crawler_url_source_rss_start', $taskInstance);
115
116                 // 13) URL source: found RSS/ATOM feed
117                 $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_url_source_found_rss_task_class');
118                 $handlerInstance->registerTask('crawler_url_source_found_rss', $taskInstance);
119
120                 // Put the task handler in registry
121                 Registry::getRegistry()->addInstance('task_handler', $handlerInstance);
122         }
123 }
124
125 // [EOF]
126 ?>