]> git.mxchange.org Git - hub.git/blob
a9d1d99763d161ff5095b5538d49866668c46b9c
[hub.git] /
1 <?php
2 // Own namespace
3 namespace Org\Shipsimu\Hub\Filter\Initializer\Crawler\Handler\Task;
4
5 // Import application-specific stuff
6 use Org\Shipsimu\Hub\Crawler\Filter\Task\BaseCrawlerTaskFilter;
7
8 // Import framework stuff
9 use Org\Mxchange\CoreFramework\Bootstrap\FrameworkBootstrap;
10 use Org\Mxchange\CoreFramework\Factory\Object\ObjectFactory;
11 use Org\Mxchange\CoreFramework\Filter\Filterable;
12 use Org\Mxchange\CoreFramework\Registry\GenericRegistry;
13 use Org\Mxchange\CoreFramework\Request\Requestable;
14 use Org\Mxchange\CoreFramework\Response\Responseable;
15
16 /**
17  * A TaskHandlerInitializer filter for crawler
18  *
19  * @author              Roland Haeder <webmaster@shipsimu.org>
20  * @version             0.0.0
21  * @copyright   Copyright (c) 2014 - 2020 Crawler Developer Team
22  * @license             GNU GPL 3.0 or any newer version
23  * @link                http://www.shipsimu.org
24  *
25  * This program is free software: you can redistribute it and/or modify
26  * it under the terms of the GNU General Public License as published by
27  * the Free Software Foundation, either version 3 of the License, or
28  * (at your option) any later version.
29  *
30  * This program is distributed in the hope that it will be useful,
31  * but WITHOUT ANY WARRANTY; without even the implied warranty of
32  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
33  * GNU General Public License for more details.
34  *
35  * You should have received a copy of the GNU General Public License
36  * along with this program. If not, see <http://www.gnu.org/licenses/>.
37  */
38 class CrawlerDistributedTaskHandlerInitializerFilter extends BaseCrawlerTaskFilter implements Filterable {
39         /**
40          * Protected constructor
41          *
42          * @return      void
43          */
44         private function __construct () {
45                 // Call parent constructor
46                 parent::__construct(__CLASS__);
47         }
48
49         /**
50          * Creates an instance of this filter class
51          *
52          * @return      $filterInstance         An instance of this filter class
53          */
54         public static final function createCrawlerDistributedTaskHandlerInitializerFilter () {
55                 // Get a new instance
56                 $filterInstance = new CrawlerDistributedTaskHandlerInitializerFilter();
57
58                 // Return the instance
59                 return $filterInstance;
60         }
61
62         /**
63          * Executes the filter with given request and response objects
64          *
65          * @param       $requestInstance        An instance of a class with an Requestable interface
66          * @param       $responseInstance       An instance of a class with an Responseable interface
67          * @return      void
68          * @throws      FilterChainException    If the filter chain needs to be interrupted
69          * @todo        10% done
70          */
71         public function execute (Requestable $requestInstance, Responseable $responseInstance) {
72                 // Run parent method first
73                 parent::execute($requestInstance, $responseInstance);
74
75                 // Get task handler instance
76                 $handlerInstance = GenericRegistry::getRegistry()->getInstance('task_handler');
77
78                 /*
79                  * Register all tasks for a distributed crawler:
80                  *
81                  * 1) "Communicator" for crawler->node communication
82                  */
83                 $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_node_communicator_task_class');
84                 $handlerInstance->registerTask('crawler_node_communicator', $taskInstance);
85
86                 // 2) Local URL crawler (gets URLs locally and adds them to the analyzer's input stack)
87                 $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_local_url_crawler_task_class');
88                 $handlerInstance->registerTask('crawler_local_url_crawler', $taskInstance);
89
90                 // 3) Remote URL crawler (gets URLs locally for other nodes, also includes the crawled URL in local index)
91                 $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_remote_url_crawler_task_class');
92                 $handlerInstance->registerTask('crawler_remote_url_crawler', $taskInstance);
93
94                 // 4) Remote-crawl publisher (publishes crawl jobs for remote retrieval)
95                 $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_remote_job_publisher_task_class');
96                 $handlerInstance->registerTask('crawler_remote_job_publisher', $taskInstance);
97
98                 // 5) Node ping
99                 $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_ping_task_class');
100                 $handlerInstance->registerTask('crawler_ping', $taskInstance);
101
102                 // 6) URL sources
103                 foreach (explode(':', FrameworkBootstrap::getConfigurationInstance()->getConfigEntry('crawler_url_stacks')) as $stack) {
104                         // Init task instance
105                         $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_url_source_' . $stack . '_task_class');
106
107                         // And register it
108                         $handlerInstance->registerTask('crawler_url_source_' . $stack, $taskInstance);
109                 }
110         }
111
112 }