From a5f1bd1c1a8fa9b506492e92b910947c21891c89 Mon Sep 17 00:00:00 2001 From: Roland Haeder Date: Sun, 4 May 2014 14:31:38 +0200 Subject: [PATCH] Renamed fetcher -> getter + added task for remote-crawl jobs. Signed-off-by: Roland Haeder --- application/hub/config.php | 44 +++++++----- ...ss_CrawlerTaskHandlerInitializerFilter.php | 18 +++-- .../{url_fetcher => publisher}/.htaccess | 0 .../class_CrawlerRemoteJobPublisherTask.php | 72 +++++++++++++++++++ .../local => url_getter}/.htaccess | 0 .../remote => url_getter/local}/.htaccess | 0 .../class_CrawlerLocalUrlGetterTask.php} | 8 +-- .../tasks/crawler/url_getter/remote/.htaccess | 1 + .../class_CrawlerRemoteUrlGetterTask.php} | 8 +-- 9 files changed, 120 insertions(+), 31 deletions(-) rename application/hub/main/tasks/crawler/{url_fetcher => publisher}/.htaccess (100%) create mode 100644 application/hub/main/tasks/crawler/publisher/class_CrawlerRemoteJobPublisherTask.php rename application/hub/main/tasks/crawler/{url_fetcher/local => url_getter}/.htaccess (100%) rename application/hub/main/tasks/crawler/{url_fetcher/remote => url_getter/local}/.htaccess (100%) rename application/hub/main/tasks/crawler/{url_fetcher/remote/class_CrawlerRemoteUrlFetcherTask.php => url_getter/local/class_CrawlerLocalUrlGetterTask.php} (87%) create mode 100644 application/hub/main/tasks/crawler/url_getter/remote/.htaccess rename application/hub/main/tasks/crawler/{url_fetcher/local/class_CrawlerLocalUrlFetcherTask.php => url_getter/remote/class_CrawlerRemoteUrlGetterTask.php} (88%) diff --git a/application/hub/config.php b/application/hub/config.php index 4b53d4697..da3c26053 100644 --- a/application/hub/config.php +++ b/application/hub/config.php @@ -1229,29 +1229,41 @@ $cfg->setConfigEntry('task_crawler_node_communicator_interval_delay', 250); // CFG: TASK-CRAWLER-NODE-COMMUNICATOR-MAX-RUNS $cfg->setConfigEntry('task_crawler_node_communicator_max_runs', 0); -// CFG: CRAWLER-LOCAL-URL-FETCHER-TASK-CLASS -$cfg->setConfigEntry('crawler_local_url_fetcher_task_class', 'CrawlerLocalUrlFetcherTask'); +// CFG: CRAWLER-LOCAL-URL-GETTER-TASK-CLASS +$cfg->setConfigEntry('crawler_local_url_getter_task_class', 'CrawlerLocalUrlGetterTask'); -// CFG: TASK-CRAWLER-LOCAL-URL-FETCHER-STARTUP-DELAY -$cfg->setConfigEntry('task_crawler_local_url_fetcher_startup_delay', 1500); +// CFG: TASK-CRAWLER-LOCAL-URL-GETTER-STARTUP-DELAY +$cfg->setConfigEntry('task_crawler_local_url_getter_startup_delay', 1500); -// CFG: TASK-CRAWLER-LOCAL-URL-FETCHER-INTERVAL-DELAY -$cfg->setConfigEntry('task_crawler_local_url_fetcher_interval_delay', 50); +// CFG: TASK-CRAWLER-LOCAL-URL-GETTER-INTERVAL-DELAY +$cfg->setConfigEntry('task_crawler_local_url_getter_interval_delay', 50); -// CFG: TASK-CRAWLER-LOCAL-URL-FETCHER-MAX-RUNS -$cfg->setConfigEntry('task_crawler_local_url_fetcher_max_runs', 0); +// CFG: TASK-CRAWLER-LOCAL-URL-GETTER-MAX-RUNS +$cfg->setConfigEntry('task_crawler_local_url_getter_max_runs', 0); -// CFG: CRAWLER-REMOTE-URL-FETCHER-TASK-CLASS -$cfg->setConfigEntry('crawler_remote_url_fetcher_task_class', 'CrawlerRemoteUrlFetcherTask'); +// CFG: CRAWLER-REMOTE-URL-GETTER-TASK-CLASS +$cfg->setConfigEntry('crawler_remote_url_getter_task_class', 'CrawlerRemoteUrlGetterTask'); -// CFG: TASK-CRAWLER-REMOTE-URL-FETCHER-STARTUP-DELAY -$cfg->setConfigEntry('task_crawler_remote_url_fetcher_startup_delay', 1500); +// CFG: TASK-CRAWLER-REMOTE-URL-GETTER-STARTUP-DELAY +$cfg->setConfigEntry('task_crawler_remote_url_getter_startup_delay', 1500); -// CFG: TASK-CRAWLER-REMOTE-URL-FETCHER-INTERVAL-DELAY -$cfg->setConfigEntry('task_crawler_remote_url_fetcher_interval_delay', 50); +// CFG: TASK-CRAWLER-REMOTE-URL-GETTER-INTERVAL-DELAY +$cfg->setConfigEntry('task_crawler_remote_url_getter_interval_delay', 50); -// CFG: TASK-CRAWLER-REMOTE-URL-FETCHER-MAX-RUNS -$cfg->setConfigEntry('task_crawler_remote_url_fetcher_max_runs', 0); +// CFG: TASK-CRAWLER-REMOTE-URL-GETTER-MAX-RUNS +$cfg->setConfigEntry('task_crawler_remote_url_getter_max_runs', 0); + +// CFG: CRAWLER-REMOTE-JOB-PUBLISHER-TASK-CLASS +$cfg->setConfigEntry('crawler_remote_job_publisher_task_class', 'CrawlerRemoteJobPublisherTask'); + +// CFG: TASK-CRAWLER-REMOTE-JOB-PUBLISHER-STARTUP-DELAY +$cfg->setConfigEntry('task_crawler_remote_job_publisher_startup_delay', 2000); + +// CFG: TASK-CRAWLER-REMOTE-JOB-PUBLISHER-INTERVAL-DELAY +$cfg->setConfigEntry('task_crawler_remote_job_publisher_interval_delay', 50); + +// CFG: TASK-CRAWLER-REMOTE-JOB-PUBLISHER-MAX-RUNS +$cfg->setConfigEntry('task_crawler_remote_job_publisher_max_runs', 0); /////////////////////////////////////////////////////////////////////////////// // HTTP Configuration diff --git a/application/hub/main/filter/task/crawler/class_CrawlerTaskHandlerInitializerFilter.php b/application/hub/main/filter/task/crawler/class_CrawlerTaskHandlerInitializerFilter.php index 799f9b60a..43a7be491 100644 --- a/application/hub/main/filter/task/crawler/class_CrawlerTaskHandlerInitializerFilter.php +++ b/application/hub/main/filter/task/crawler/class_CrawlerTaskHandlerInitializerFilter.php @@ -52,7 +52,7 @@ class CrawlerTaskHandlerInitializerFilter extends BaseCrawlerFilter implements F * @param $responseInstance An instance of a class with an Responseable interface * @return void * @throws FilterChainException If the filter chain needs to be interrupted - * @todo 5% done + * @todo 10% done */ public function execute (Requestable $requestInstance, Responseable $responseInstance) { // Get crawler instance @@ -69,13 +69,17 @@ class CrawlerTaskHandlerInitializerFilter extends BaseCrawlerFilter implements F $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_node_communicator_task_class'); $handlerInstance->registerTask('crawler_node_communicator', $taskInstance); - // 2) Local URL fetcher (fetches URLs locally and adds them to the analyzer's input stack) - $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_local_url_fetcher_task_class'); - $handlerInstance->registerTask('crawler_local_url_fetcher', $taskInstance); + // 2) Local URL getter (gets URLs locally and adds them to the analyzer's input stack) + $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_local_url_getter_task_class'); + $handlerInstance->registerTask('crawler_local_url_getter', $taskInstance); - // 3) Remote URL fetcher (let fetch URLs by other crawler nodes and also adds them to the stack) - $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_remote_url_fetcher_task_class'); - $handlerInstance->registerTask('crawler_remote_url_fetcher', $taskInstance); + // 3) Remote URL getter (gets URLs locally for other nodes, also includes the crawled URL in local index) + $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_remote_url_getter_task_class'); + $handlerInstance->registerTask('crawler_remote_url_getter', $taskInstance); + + // 4) Remote-crawl publisher (publishes crawl jobs for remote retrieval) + $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_remote_job_publisher_task_class'); + $handlerInstance->registerTask('crawler_remote_job_publisher', $taskInstance); // Put the task handler in registry Registry::getRegistry()->addInstance('task_handler', $handlerInstance); diff --git a/application/hub/main/tasks/crawler/url_fetcher/.htaccess b/application/hub/main/tasks/crawler/publisher/.htaccess similarity index 100% rename from application/hub/main/tasks/crawler/url_fetcher/.htaccess rename to application/hub/main/tasks/crawler/publisher/.htaccess diff --git a/application/hub/main/tasks/crawler/publisher/class_CrawlerRemoteJobPublisherTask.php b/application/hub/main/tasks/crawler/publisher/class_CrawlerRemoteJobPublisherTask.php new file mode 100644 index 000000000..b36c9d770 --- /dev/null +++ b/application/hub/main/tasks/crawler/publisher/class_CrawlerRemoteJobPublisherTask.php @@ -0,0 +1,72 @@ + + * @version 0.0.0 + * @copyright Copyright (c) 2014 Crawler Developer Team + * @license GNU GPL 3.0 or any newer version + * @link http://www.ship-simu.org + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +class CrawlerRemoteJobPublisherTask extends BaseTask implements Taskable, Visitable { + /** + * Protected constructor + * + * @return void + */ + protected function __construct () { + // Call parent constructor + parent::__construct(__CLASS__); + } + + /** + * Creates an instance of this class + * + * @return $taskInstance An instance of a Visitable class + */ + public final static function createCrawlerRemoteJobPublisherTask () { + // Get new instance + $taskInstance = new CrawlerRemoteJobPublisherTask(); + + // Return the prepared instance + return $taskInstance; + } + + /** + * Accepts the visitor to process the visitor + * + * @param $visitorInstance An instance of a Visitor class + * @return void + * @todo Maybe visit some sub-objects + */ + public function accept (Visitor $visitorInstance) { + // Visit this task + $visitorInstance->visitTask($this); + } + + /** + * Executes the task + * + * @return void + * @todo 0% + */ + public function executeTask () { + $this->partialStub('Unimplemented task.'); + } +} + +// [EOF] +?> diff --git a/application/hub/main/tasks/crawler/url_fetcher/local/.htaccess b/application/hub/main/tasks/crawler/url_getter/.htaccess similarity index 100% rename from application/hub/main/tasks/crawler/url_fetcher/local/.htaccess rename to application/hub/main/tasks/crawler/url_getter/.htaccess diff --git a/application/hub/main/tasks/crawler/url_fetcher/remote/.htaccess b/application/hub/main/tasks/crawler/url_getter/local/.htaccess similarity index 100% rename from application/hub/main/tasks/crawler/url_fetcher/remote/.htaccess rename to application/hub/main/tasks/crawler/url_getter/local/.htaccess diff --git a/application/hub/main/tasks/crawler/url_fetcher/remote/class_CrawlerRemoteUrlFetcherTask.php b/application/hub/main/tasks/crawler/url_getter/local/class_CrawlerLocalUrlGetterTask.php similarity index 87% rename from application/hub/main/tasks/crawler/url_fetcher/remote/class_CrawlerRemoteUrlFetcherTask.php rename to application/hub/main/tasks/crawler/url_getter/local/class_CrawlerLocalUrlGetterTask.php index 57d3009e9..e3ecc6c9c 100644 --- a/application/hub/main/tasks/crawler/url_fetcher/remote/class_CrawlerRemoteUrlFetcherTask.php +++ b/application/hub/main/tasks/crawler/url_getter/local/class_CrawlerLocalUrlGetterTask.php @@ -1,6 +1,6 @@ * @version 0.0.0 @@ -21,7 +21,7 @@ * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ -class CrawlerRemoteUrlFetcherTask extends BaseTask implements Taskable, Visitable { +class CrawlerLocalUrlGetterTask extends BaseTask implements Taskable, Visitable { /** * Protected constructor * @@ -37,9 +37,9 @@ class CrawlerRemoteUrlFetcherTask extends BaseTask implements Taskable, Visitabl * * @return $taskInstance An instance of a Visitable class */ - public final static function createCrawlerRemoteUrlFetcherTask () { + public final static function createCrawlerLocalUrlGetterTask () { // Get new instance - $taskInstance = new CrawlerRemoteUrlFetcherTask(); + $taskInstance = new CrawlerLocalUrlGetterTask(); // Return the prepared instance return $taskInstance; diff --git a/application/hub/main/tasks/crawler/url_getter/remote/.htaccess b/application/hub/main/tasks/crawler/url_getter/remote/.htaccess new file mode 100644 index 000000000..3a4288278 --- /dev/null +++ b/application/hub/main/tasks/crawler/url_getter/remote/.htaccess @@ -0,0 +1 @@ +Deny from all diff --git a/application/hub/main/tasks/crawler/url_fetcher/local/class_CrawlerLocalUrlFetcherTask.php b/application/hub/main/tasks/crawler/url_getter/remote/class_CrawlerRemoteUrlGetterTask.php similarity index 88% rename from application/hub/main/tasks/crawler/url_fetcher/local/class_CrawlerLocalUrlFetcherTask.php rename to application/hub/main/tasks/crawler/url_getter/remote/class_CrawlerRemoteUrlGetterTask.php index 2f4b43fb6..9dc5b9d88 100644 --- a/application/hub/main/tasks/crawler/url_fetcher/local/class_CrawlerLocalUrlFetcherTask.php +++ b/application/hub/main/tasks/crawler/url_getter/remote/class_CrawlerRemoteUrlGetterTask.php @@ -1,6 +1,6 @@ * @version 0.0.0 @@ -21,7 +21,7 @@ * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ -class CrawlerLocalUrlFetcherTask extends BaseTask implements Taskable, Visitable { +class CrawlerRemoteUrlGetterTask extends BaseTask implements Taskable, Visitable { /** * Protected constructor * @@ -37,9 +37,9 @@ class CrawlerLocalUrlFetcherTask extends BaseTask implements Taskable, Visitable * * @return $taskInstance An instance of a Visitable class */ - public final static function createCrawlerLocalUrlFetcherTask () { + public final static function createCrawlerRemoteUrlGetterTask () { // Get new instance - $taskInstance = new CrawlerLocalUrlFetcherTask(); + $taskInstance = new CrawlerRemoteUrlGetterTask(); // Return the prepared instance return $taskInstance; -- 2.39.5