3 namespace Org\Shipsimu\Hub\Filter\Initializer\Crawler\Handler\Task;
5 // Import application-specific stuff
6 use Org\Shipsimu\Hub\Crawler\Filter\Task\BaseCrawlerTaskFilter;
8 // Import framework stuff
9 use Org\Mxchange\CoreFramework\Bootstrap\FrameworkBootstrap;
10 use Org\Mxchange\CoreFramework\Factory\Object\ObjectFactory;
11 use Org\Mxchange\CoreFramework\Filter\Filterable;
12 use Org\Mxchange\CoreFramework\Registry\GenericRegistry;
13 use Org\Mxchange\CoreFramework\Request\Requestable;
14 use Org\Mxchange\CoreFramework\Response\Responseable;
17 * A TaskHandlerInitializer filter for crawler
19 * @author Roland Haeder <webmaster@shipsimu.org>
21 * @copyright Copyright (c) 2014 - 2020 Crawler Developer Team
22 * @license GNU GPL 3.0 or any newer version
23 * @link http://www.shipsimu.org
25 * This program is free software: you can redistribute it and/or modify
26 * it under the terms of the GNU General Public License as published by
27 * the Free Software Foundation, either version 3 of the License, or
28 * (at your option) any later version.
30 * This program is distributed in the hope that it will be useful,
31 * but WITHOUT ANY WARRANTY; without even the implied warranty of
32 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
33 * GNU General Public License for more details.
35 * You should have received a copy of the GNU General Public License
36 * along with this program. If not, see <http://www.gnu.org/licenses/>.
38 class CrawlerDistributedTaskHandlerInitializerFilter extends BaseCrawlerTaskFilter implements Filterable {
40 * Protected constructor
44 private function __construct () {
45 // Call parent constructor
46 parent::__construct(__CLASS__);
50 * Creates an instance of this filter class
52 * @return $filterInstance An instance of this filter class
54 public static final function createCrawlerDistributedTaskHandlerInitializerFilter () {
56 $filterInstance = new CrawlerDistributedTaskHandlerInitializerFilter();
58 // Return the instance
59 return $filterInstance;
63 * Executes the filter with given request and response objects
65 * @param $requestInstance An instance of a class with an Requestable interface
66 * @param $responseInstance An instance of a class with an Responseable interface
68 * @throws FilterChainException If the filter chain needs to be interrupted
71 public function execute (Requestable $requestInstance, Responseable $responseInstance) {
72 // Run parent method first
73 parent::execute($requestInstance, $responseInstance);
75 // Get task handler instance
76 $handlerInstance = GenericRegistry::getRegistry()->getInstance('task_handler');
79 * Register all tasks for a distributed crawler:
81 * 1) "Communicator" for crawler->node communication
83 $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_node_communicator_task_class');
84 $handlerInstance->registerTask('crawler_node_communicator', $taskInstance);
86 // 2) Local URL crawler (gets URLs locally and adds them to the analyzer's input stack)
87 $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_local_url_crawler_task_class');
88 $handlerInstance->registerTask('crawler_local_url_crawler', $taskInstance);
90 // 3) Remote URL crawler (gets URLs locally for other nodes, also includes the crawled URL in local index)
91 $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_remote_url_crawler_task_class');
92 $handlerInstance->registerTask('crawler_remote_url_crawler', $taskInstance);
94 // 4) Remote-crawl publisher (publishes crawl jobs for remote retrieval)
95 $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_remote_job_publisher_task_class');
96 $handlerInstance->registerTask('crawler_remote_job_publisher', $taskInstance);
99 $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_ping_task_class');
100 $handlerInstance->registerTask('crawler_ping', $taskInstance);
103 foreach (explode(':', FrameworkBootstrap::getConfigurationInstance()->getConfigEntry('crawler_url_stacks')) as $stack) {
104 // Init task instance
105 $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_url_source_' . $stack . '_task_class');
108 $handlerInstance->registerTask('crawler_url_source_' . $stack, $taskInstance);