From: Roland Haeder Date: Sun, 4 May 2014 12:31:38 +0000 (+0200) Subject: Renamed fetcher -> getter + added task for remote-crawl jobs. X-Git-Url: https://git.mxchange.org/?a=commitdiff_plain;h=a5f1bd1c1a8fa9b506492e92b910947c21891c89;p=hub.git Renamed fetcher -> getter + added task for remote-crawl jobs. Signed-off-by: Roland Haeder --- diff --git a/application/hub/config.php b/application/hub/config.php index 4b53d4697..da3c26053 100644 --- a/application/hub/config.php +++ b/application/hub/config.php @@ -1229,29 +1229,41 @@ $cfg->setConfigEntry('task_crawler_node_communicator_interval_delay', 250); // CFG: TASK-CRAWLER-NODE-COMMUNICATOR-MAX-RUNS $cfg->setConfigEntry('task_crawler_node_communicator_max_runs', 0); -// CFG: CRAWLER-LOCAL-URL-FETCHER-TASK-CLASS -$cfg->setConfigEntry('crawler_local_url_fetcher_task_class', 'CrawlerLocalUrlFetcherTask'); +// CFG: CRAWLER-LOCAL-URL-GETTER-TASK-CLASS +$cfg->setConfigEntry('crawler_local_url_getter_task_class', 'CrawlerLocalUrlGetterTask'); -// CFG: TASK-CRAWLER-LOCAL-URL-FETCHER-STARTUP-DELAY -$cfg->setConfigEntry('task_crawler_local_url_fetcher_startup_delay', 1500); +// CFG: TASK-CRAWLER-LOCAL-URL-GETTER-STARTUP-DELAY +$cfg->setConfigEntry('task_crawler_local_url_getter_startup_delay', 1500); -// CFG: TASK-CRAWLER-LOCAL-URL-FETCHER-INTERVAL-DELAY -$cfg->setConfigEntry('task_crawler_local_url_fetcher_interval_delay', 50); +// CFG: TASK-CRAWLER-LOCAL-URL-GETTER-INTERVAL-DELAY +$cfg->setConfigEntry('task_crawler_local_url_getter_interval_delay', 50); -// CFG: TASK-CRAWLER-LOCAL-URL-FETCHER-MAX-RUNS -$cfg->setConfigEntry('task_crawler_local_url_fetcher_max_runs', 0); +// CFG: TASK-CRAWLER-LOCAL-URL-GETTER-MAX-RUNS +$cfg->setConfigEntry('task_crawler_local_url_getter_max_runs', 0); -// CFG: CRAWLER-REMOTE-URL-FETCHER-TASK-CLASS -$cfg->setConfigEntry('crawler_remote_url_fetcher_task_class', 'CrawlerRemoteUrlFetcherTask'); +// CFG: CRAWLER-REMOTE-URL-GETTER-TASK-CLASS +$cfg->setConfigEntry('crawler_remote_url_getter_task_class', 'CrawlerRemoteUrlGetterTask'); -// CFG: TASK-CRAWLER-REMOTE-URL-FETCHER-STARTUP-DELAY -$cfg->setConfigEntry('task_crawler_remote_url_fetcher_startup_delay', 1500); +// CFG: TASK-CRAWLER-REMOTE-URL-GETTER-STARTUP-DELAY +$cfg->setConfigEntry('task_crawler_remote_url_getter_startup_delay', 1500); -// CFG: TASK-CRAWLER-REMOTE-URL-FETCHER-INTERVAL-DELAY -$cfg->setConfigEntry('task_crawler_remote_url_fetcher_interval_delay', 50); +// CFG: TASK-CRAWLER-REMOTE-URL-GETTER-INTERVAL-DELAY +$cfg->setConfigEntry('task_crawler_remote_url_getter_interval_delay', 50); -// CFG: TASK-CRAWLER-REMOTE-URL-FETCHER-MAX-RUNS -$cfg->setConfigEntry('task_crawler_remote_url_fetcher_max_runs', 0); +// CFG: TASK-CRAWLER-REMOTE-URL-GETTER-MAX-RUNS +$cfg->setConfigEntry('task_crawler_remote_url_getter_max_runs', 0); + +// CFG: CRAWLER-REMOTE-JOB-PUBLISHER-TASK-CLASS +$cfg->setConfigEntry('crawler_remote_job_publisher_task_class', 'CrawlerRemoteJobPublisherTask'); + +// CFG: TASK-CRAWLER-REMOTE-JOB-PUBLISHER-STARTUP-DELAY +$cfg->setConfigEntry('task_crawler_remote_job_publisher_startup_delay', 2000); + +// CFG: TASK-CRAWLER-REMOTE-JOB-PUBLISHER-INTERVAL-DELAY +$cfg->setConfigEntry('task_crawler_remote_job_publisher_interval_delay', 50); + +// CFG: TASK-CRAWLER-REMOTE-JOB-PUBLISHER-MAX-RUNS +$cfg->setConfigEntry('task_crawler_remote_job_publisher_max_runs', 0); /////////////////////////////////////////////////////////////////////////////// // HTTP Configuration diff --git a/application/hub/main/filter/task/crawler/class_CrawlerTaskHandlerInitializerFilter.php b/application/hub/main/filter/task/crawler/class_CrawlerTaskHandlerInitializerFilter.php index 799f9b60a..43a7be491 100644 --- a/application/hub/main/filter/task/crawler/class_CrawlerTaskHandlerInitializerFilter.php +++ b/application/hub/main/filter/task/crawler/class_CrawlerTaskHandlerInitializerFilter.php @@ -52,7 +52,7 @@ class CrawlerTaskHandlerInitializerFilter extends BaseCrawlerFilter implements F * @param $responseInstance An instance of a class with an Responseable interface * @return void * @throws FilterChainException If the filter chain needs to be interrupted - * @todo 5% done + * @todo 10% done */ public function execute (Requestable $requestInstance, Responseable $responseInstance) { // Get crawler instance @@ -69,13 +69,17 @@ class CrawlerTaskHandlerInitializerFilter extends BaseCrawlerFilter implements F $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_node_communicator_task_class'); $handlerInstance->registerTask('crawler_node_communicator', $taskInstance); - // 2) Local URL fetcher (fetches URLs locally and adds them to the analyzer's input stack) - $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_local_url_fetcher_task_class'); - $handlerInstance->registerTask('crawler_local_url_fetcher', $taskInstance); + // 2) Local URL getter (gets URLs locally and adds them to the analyzer's input stack) + $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_local_url_getter_task_class'); + $handlerInstance->registerTask('crawler_local_url_getter', $taskInstance); - // 3) Remote URL fetcher (let fetch URLs by other crawler nodes and also adds them to the stack) - $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_remote_url_fetcher_task_class'); - $handlerInstance->registerTask('crawler_remote_url_fetcher', $taskInstance); + // 3) Remote URL getter (gets URLs locally for other nodes, also includes the crawled URL in local index) + $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_remote_url_getter_task_class'); + $handlerInstance->registerTask('crawler_remote_url_getter', $taskInstance); + + // 4) Remote-crawl publisher (publishes crawl jobs for remote retrieval) + $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_remote_job_publisher_task_class'); + $handlerInstance->registerTask('crawler_remote_job_publisher', $taskInstance); // Put the task handler in registry Registry::getRegistry()->addInstance('task_handler', $handlerInstance); diff --git a/application/hub/main/tasks/crawler/publisher/.htaccess b/application/hub/main/tasks/crawler/publisher/.htaccess new file mode 100644 index 000000000..3a4288278 --- /dev/null +++ b/application/hub/main/tasks/crawler/publisher/.htaccess @@ -0,0 +1 @@ +Deny from all diff --git a/application/hub/main/tasks/crawler/publisher/class_CrawlerRemoteJobPublisherTask.php b/application/hub/main/tasks/crawler/publisher/class_CrawlerRemoteJobPublisherTask.php new file mode 100644 index 000000000..b36c9d770 --- /dev/null +++ b/application/hub/main/tasks/crawler/publisher/class_CrawlerRemoteJobPublisherTask.php @@ -0,0 +1,72 @@ + + * @version 0.0.0 + * @copyright Copyright (c) 2014 Crawler Developer Team + * @license GNU GPL 3.0 or any newer version + * @link http://www.ship-simu.org + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +class CrawlerRemoteJobPublisherTask extends BaseTask implements Taskable, Visitable { + /** + * Protected constructor + * + * @return void + */ + protected function __construct () { + // Call parent constructor + parent::__construct(__CLASS__); + } + + /** + * Creates an instance of this class + * + * @return $taskInstance An instance of a Visitable class + */ + public final static function createCrawlerRemoteJobPublisherTask () { + // Get new instance + $taskInstance = new CrawlerRemoteJobPublisherTask(); + + // Return the prepared instance + return $taskInstance; + } + + /** + * Accepts the visitor to process the visitor + * + * @param $visitorInstance An instance of a Visitor class + * @return void + * @todo Maybe visit some sub-objects + */ + public function accept (Visitor $visitorInstance) { + // Visit this task + $visitorInstance->visitTask($this); + } + + /** + * Executes the task + * + * @return void + * @todo 0% + */ + public function executeTask () { + $this->partialStub('Unimplemented task.'); + } +} + +// [EOF] +?> diff --git a/application/hub/main/tasks/crawler/url_fetcher/.htaccess b/application/hub/main/tasks/crawler/url_fetcher/.htaccess deleted file mode 100644 index 3a4288278..000000000 --- a/application/hub/main/tasks/crawler/url_fetcher/.htaccess +++ /dev/null @@ -1 +0,0 @@ -Deny from all diff --git a/application/hub/main/tasks/crawler/url_fetcher/local/.htaccess b/application/hub/main/tasks/crawler/url_fetcher/local/.htaccess deleted file mode 100644 index 3a4288278..000000000 --- a/application/hub/main/tasks/crawler/url_fetcher/local/.htaccess +++ /dev/null @@ -1 +0,0 @@ -Deny from all diff --git a/application/hub/main/tasks/crawler/url_fetcher/local/class_CrawlerLocalUrlFetcherTask.php b/application/hub/main/tasks/crawler/url_fetcher/local/class_CrawlerLocalUrlFetcherTask.php deleted file mode 100644 index 2f4b43fb6..000000000 --- a/application/hub/main/tasks/crawler/url_fetcher/local/class_CrawlerLocalUrlFetcherTask.php +++ /dev/null @@ -1,72 +0,0 @@ - - * @version 0.0.0 - * @copyright Copyright (c) 2014 Crawler Developer Team - * @license GNU GPL 3.0 or any newer version - * @link http://www.ship-simu.org - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ -class CrawlerLocalUrlFetcherTask extends BaseTask implements Taskable, Visitable { - /** - * Protected constructor - * - * @return void - */ - protected function __construct () { - // Call parent constructor - parent::__construct(__CLASS__); - } - - /** - * Creates an instance of this class - * - * @return $taskInstance An instance of a Visitable class - */ - public final static function createCrawlerLocalUrlFetcherTask () { - // Get new instance - $taskInstance = new CrawlerLocalUrlFetcherTask(); - - // Return the prepared instance - return $taskInstance; - } - - /** - * Accepts the visitor to process the visitor - * - * @param $visitorInstance An instance of a Visitor class - * @return void - * @todo Maybe visit some sub-objects - */ - public function accept (Visitor $visitorInstance) { - // Visit this task - $visitorInstance->visitTask($this); - } - - /** - * Executes the task - * - * @return void - * @todo 0% - */ - public function executeTask () { - $this->partialStub('Unimplemented task.'); - } -} - -// [EOF] -?> diff --git a/application/hub/main/tasks/crawler/url_fetcher/remote/.htaccess b/application/hub/main/tasks/crawler/url_fetcher/remote/.htaccess deleted file mode 100644 index 3a4288278..000000000 --- a/application/hub/main/tasks/crawler/url_fetcher/remote/.htaccess +++ /dev/null @@ -1 +0,0 @@ -Deny from all diff --git a/application/hub/main/tasks/crawler/url_fetcher/remote/class_CrawlerRemoteUrlFetcherTask.php b/application/hub/main/tasks/crawler/url_fetcher/remote/class_CrawlerRemoteUrlFetcherTask.php deleted file mode 100644 index 57d3009e9..000000000 --- a/application/hub/main/tasks/crawler/url_fetcher/remote/class_CrawlerRemoteUrlFetcherTask.php +++ /dev/null @@ -1,72 +0,0 @@ - - * @version 0.0.0 - * @copyright Copyright (c) 2014 Crawler Developer Team - * @license GNU GPL 3.0 or any newer version - * @link http://www.ship-simu.org - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ -class CrawlerRemoteUrlFetcherTask extends BaseTask implements Taskable, Visitable { - /** - * Protected constructor - * - * @return void - */ - protected function __construct () { - // Call parent constructor - parent::__construct(__CLASS__); - } - - /** - * Creates an instance of this class - * - * @return $taskInstance An instance of a Visitable class - */ - public final static function createCrawlerRemoteUrlFetcherTask () { - // Get new instance - $taskInstance = new CrawlerRemoteUrlFetcherTask(); - - // Return the prepared instance - return $taskInstance; - } - - /** - * Accepts the visitor to process the visitor - * - * @param $visitorInstance An instance of a Visitor class - * @return void - * @todo Maybe visit some sub-objects - */ - public function accept (Visitor $visitorInstance) { - // Visit this task - $visitorInstance->visitTask($this); - } - - /** - * Executes the task - * - * @return void - * @todo 0% - */ - public function executeTask () { - $this->partialStub('Unimplemented task.'); - } -} - -// [EOF] -?> diff --git a/application/hub/main/tasks/crawler/url_getter/.htaccess b/application/hub/main/tasks/crawler/url_getter/.htaccess new file mode 100644 index 000000000..3a4288278 --- /dev/null +++ b/application/hub/main/tasks/crawler/url_getter/.htaccess @@ -0,0 +1 @@ +Deny from all diff --git a/application/hub/main/tasks/crawler/url_getter/local/.htaccess b/application/hub/main/tasks/crawler/url_getter/local/.htaccess new file mode 100644 index 000000000..3a4288278 --- /dev/null +++ b/application/hub/main/tasks/crawler/url_getter/local/.htaccess @@ -0,0 +1 @@ +Deny from all diff --git a/application/hub/main/tasks/crawler/url_getter/local/class_CrawlerLocalUrlGetterTask.php b/application/hub/main/tasks/crawler/url_getter/local/class_CrawlerLocalUrlGetterTask.php new file mode 100644 index 000000000..e3ecc6c9c --- /dev/null +++ b/application/hub/main/tasks/crawler/url_getter/local/class_CrawlerLocalUrlGetterTask.php @@ -0,0 +1,72 @@ + + * @version 0.0.0 + * @copyright Copyright (c) 2014 Crawler Developer Team + * @license GNU GPL 3.0 or any newer version + * @link http://www.ship-simu.org + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +class CrawlerLocalUrlGetterTask extends BaseTask implements Taskable, Visitable { + /** + * Protected constructor + * + * @return void + */ + protected function __construct () { + // Call parent constructor + parent::__construct(__CLASS__); + } + + /** + * Creates an instance of this class + * + * @return $taskInstance An instance of a Visitable class + */ + public final static function createCrawlerLocalUrlGetterTask () { + // Get new instance + $taskInstance = new CrawlerLocalUrlGetterTask(); + + // Return the prepared instance + return $taskInstance; + } + + /** + * Accepts the visitor to process the visitor + * + * @param $visitorInstance An instance of a Visitor class + * @return void + * @todo Maybe visit some sub-objects + */ + public function accept (Visitor $visitorInstance) { + // Visit this task + $visitorInstance->visitTask($this); + } + + /** + * Executes the task + * + * @return void + * @todo 0% + */ + public function executeTask () { + $this->partialStub('Unimplemented task.'); + } +} + +// [EOF] +?> diff --git a/application/hub/main/tasks/crawler/url_getter/remote/.htaccess b/application/hub/main/tasks/crawler/url_getter/remote/.htaccess new file mode 100644 index 000000000..3a4288278 --- /dev/null +++ b/application/hub/main/tasks/crawler/url_getter/remote/.htaccess @@ -0,0 +1 @@ +Deny from all diff --git a/application/hub/main/tasks/crawler/url_getter/remote/class_CrawlerRemoteUrlGetterTask.php b/application/hub/main/tasks/crawler/url_getter/remote/class_CrawlerRemoteUrlGetterTask.php new file mode 100644 index 000000000..9dc5b9d88 --- /dev/null +++ b/application/hub/main/tasks/crawler/url_getter/remote/class_CrawlerRemoteUrlGetterTask.php @@ -0,0 +1,72 @@ + + * @version 0.0.0 + * @copyright Copyright (c) 2014 Crawler Developer Team + * @license GNU GPL 3.0 or any newer version + * @link http://www.ship-simu.org + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +class CrawlerRemoteUrlGetterTask extends BaseTask implements Taskable, Visitable { + /** + * Protected constructor + * + * @return void + */ + protected function __construct () { + // Call parent constructor + parent::__construct(__CLASS__); + } + + /** + * Creates an instance of this class + * + * @return $taskInstance An instance of a Visitable class + */ + public final static function createCrawlerRemoteUrlGetterTask () { + // Get new instance + $taskInstance = new CrawlerRemoteUrlGetterTask(); + + // Return the prepared instance + return $taskInstance; + } + + /** + * Accepts the visitor to process the visitor + * + * @param $visitorInstance An instance of a Visitor class + * @return void + * @todo Maybe visit some sub-objects + */ + public function accept (Visitor $visitorInstance) { + // Visit this task + $visitorInstance->visitTask($this); + } + + /** + * Executes the task + * + * @return void + * @todo 0% + */ + public function executeTask () { + $this->partialStub('Unimplemented task.'); + } +} + +// [EOF] +?>