// CFG: TASK-CRAWLER-NODE-COMMUNICATOR-MAX-RUNS
$cfg->setConfigEntry('task_crawler_node_communicator_max_runs', 0);
-// CFG: CRAWLER-LOCAL-URL-FETCHER-TASK-CLASS
-$cfg->setConfigEntry('crawler_local_url_fetcher_task_class', 'CrawlerLocalUrlFetcherTask');
+// CFG: CRAWLER-LOCAL-URL-GETTER-TASK-CLASS
+$cfg->setConfigEntry('crawler_local_url_getter_task_class', 'CrawlerLocalUrlGetterTask');
-// CFG: TASK-CRAWLER-LOCAL-URL-FETCHER-STARTUP-DELAY
-$cfg->setConfigEntry('task_crawler_local_url_fetcher_startup_delay', 1500);
+// CFG: TASK-CRAWLER-LOCAL-URL-GETTER-STARTUP-DELAY
+$cfg->setConfigEntry('task_crawler_local_url_getter_startup_delay', 1500);
-// CFG: TASK-CRAWLER-LOCAL-URL-FETCHER-INTERVAL-DELAY
-$cfg->setConfigEntry('task_crawler_local_url_fetcher_interval_delay', 50);
+// CFG: TASK-CRAWLER-LOCAL-URL-GETTER-INTERVAL-DELAY
+$cfg->setConfigEntry('task_crawler_local_url_getter_interval_delay', 50);
-// CFG: TASK-CRAWLER-LOCAL-URL-FETCHER-MAX-RUNS
-$cfg->setConfigEntry('task_crawler_local_url_fetcher_max_runs', 0);
+// CFG: TASK-CRAWLER-LOCAL-URL-GETTER-MAX-RUNS
+$cfg->setConfigEntry('task_crawler_local_url_getter_max_runs', 0);
-// CFG: CRAWLER-REMOTE-URL-FETCHER-TASK-CLASS
-$cfg->setConfigEntry('crawler_remote_url_fetcher_task_class', 'CrawlerRemoteUrlFetcherTask');
+// CFG: CRAWLER-REMOTE-URL-GETTER-TASK-CLASS
+$cfg->setConfigEntry('crawler_remote_url_getter_task_class', 'CrawlerRemoteUrlGetterTask');
-// CFG: TASK-CRAWLER-REMOTE-URL-FETCHER-STARTUP-DELAY
-$cfg->setConfigEntry('task_crawler_remote_url_fetcher_startup_delay', 1500);
+// CFG: TASK-CRAWLER-REMOTE-URL-GETTER-STARTUP-DELAY
+$cfg->setConfigEntry('task_crawler_remote_url_getter_startup_delay', 1500);
-// CFG: TASK-CRAWLER-REMOTE-URL-FETCHER-INTERVAL-DELAY
-$cfg->setConfigEntry('task_crawler_remote_url_fetcher_interval_delay', 50);
+// CFG: TASK-CRAWLER-REMOTE-URL-GETTER-INTERVAL-DELAY
+$cfg->setConfigEntry('task_crawler_remote_url_getter_interval_delay', 50);
-// CFG: TASK-CRAWLER-REMOTE-URL-FETCHER-MAX-RUNS
-$cfg->setConfigEntry('task_crawler_remote_url_fetcher_max_runs', 0);
+// CFG: TASK-CRAWLER-REMOTE-URL-GETTER-MAX-RUNS
+$cfg->setConfigEntry('task_crawler_remote_url_getter_max_runs', 0);
+
+// CFG: CRAWLER-REMOTE-JOB-PUBLISHER-TASK-CLASS
+$cfg->setConfigEntry('crawler_remote_job_publisher_task_class', 'CrawlerRemoteJobPublisherTask');
+
+// CFG: TASK-CRAWLER-REMOTE-JOB-PUBLISHER-STARTUP-DELAY
+$cfg->setConfigEntry('task_crawler_remote_job_publisher_startup_delay', 2000);
+
+// CFG: TASK-CRAWLER-REMOTE-JOB-PUBLISHER-INTERVAL-DELAY
+$cfg->setConfigEntry('task_crawler_remote_job_publisher_interval_delay', 50);
+
+// CFG: TASK-CRAWLER-REMOTE-JOB-PUBLISHER-MAX-RUNS
+$cfg->setConfigEntry('task_crawler_remote_job_publisher_max_runs', 0);
///////////////////////////////////////////////////////////////////////////////
// HTTP Configuration
* @param $responseInstance An instance of a class with an Responseable interface
* @return void
* @throws FilterChainException If the filter chain needs to be interrupted
- * @todo 5% done
+ * @todo 10% done
*/
public function execute (Requestable $requestInstance, Responseable $responseInstance) {
// Get crawler instance
$taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_node_communicator_task_class');
$handlerInstance->registerTask('crawler_node_communicator', $taskInstance);
- // 2) Local URL fetcher (fetches URLs locally and adds them to the analyzer's input stack)
- $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_local_url_fetcher_task_class');
- $handlerInstance->registerTask('crawler_local_url_fetcher', $taskInstance);
+ // 2) Local URL getter (gets URLs locally and adds them to the analyzer's input stack)
+ $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_local_url_getter_task_class');
+ $handlerInstance->registerTask('crawler_local_url_getter', $taskInstance);
- // 3) Remote URL fetcher (let fetch URLs by other crawler nodes and also adds them to the stack)
- $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_remote_url_fetcher_task_class');
- $handlerInstance->registerTask('crawler_remote_url_fetcher', $taskInstance);
+ // 3) Remote URL getter (gets URLs locally for other nodes, also includes the crawled URL in local index)
+ $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_remote_url_getter_task_class');
+ $handlerInstance->registerTask('crawler_remote_url_getter', $taskInstance);
+
+ // 4) Remote-crawl publisher (publishes crawl jobs for remote retrieval)
+ $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_remote_job_publisher_task_class');
+ $handlerInstance->registerTask('crawler_remote_job_publisher', $taskInstance);
// Put the task handler in registry
Registry::getRegistry()->addInstance('task_handler', $handlerInstance);
--- /dev/null
+Deny from all
--- /dev/null
+<?php
+/**
+ * A RemoteJobPublisher task for crawlers
+ *
+ * @author Roland Haeder <webmaster@ship-simu.org>
+ * @version 0.0.0
+ * @copyright Copyright (c) 2014 Crawler Developer Team
+ * @license GNU GPL 3.0 or any newer version
+ * @link http://www.ship-simu.org
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+class CrawlerRemoteJobPublisherTask extends BaseTask implements Taskable, Visitable {
+ /**
+ * Protected constructor
+ *
+ * @return void
+ */
+ protected function __construct () {
+ // Call parent constructor
+ parent::__construct(__CLASS__);
+ }
+
+ /**
+ * Creates an instance of this class
+ *
+ * @return $taskInstance An instance of a Visitable class
+ */
+ public final static function createCrawlerRemoteJobPublisherTask () {
+ // Get new instance
+ $taskInstance = new CrawlerRemoteJobPublisherTask();
+
+ // Return the prepared instance
+ return $taskInstance;
+ }
+
+ /**
+ * Accepts the visitor to process the visitor
+ *
+ * @param $visitorInstance An instance of a Visitor class
+ * @return void
+ * @todo Maybe visit some sub-objects
+ */
+ public function accept (Visitor $visitorInstance) {
+ // Visit this task
+ $visitorInstance->visitTask($this);
+ }
+
+ /**
+ * Executes the task
+ *
+ * @return void
+ * @todo 0%
+ */
+ public function executeTask () {
+ $this->partialStub('Unimplemented task.');
+ }
+}
+
+// [EOF]
+?>
+++ /dev/null
-Deny from all
+++ /dev/null
-Deny from all
+++ /dev/null
-<?php
-/**
- * A LocalUrlFetcher task for crawlers
- *
- * @author Roland Haeder <webmaster@ship-simu.org>
- * @version 0.0.0
- * @copyright Copyright (c) 2014 Crawler Developer Team
- * @license GNU GPL 3.0 or any newer version
- * @link http://www.ship-simu.org
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
-class CrawlerLocalUrlFetcherTask extends BaseTask implements Taskable, Visitable {
- /**
- * Protected constructor
- *
- * @return void
- */
- protected function __construct () {
- // Call parent constructor
- parent::__construct(__CLASS__);
- }
-
- /**
- * Creates an instance of this class
- *
- * @return $taskInstance An instance of a Visitable class
- */
- public final static function createCrawlerLocalUrlFetcherTask () {
- // Get new instance
- $taskInstance = new CrawlerLocalUrlFetcherTask();
-
- // Return the prepared instance
- return $taskInstance;
- }
-
- /**
- * Accepts the visitor to process the visitor
- *
- * @param $visitorInstance An instance of a Visitor class
- * @return void
- * @todo Maybe visit some sub-objects
- */
- public function accept (Visitor $visitorInstance) {
- // Visit this task
- $visitorInstance->visitTask($this);
- }
-
- /**
- * Executes the task
- *
- * @return void
- * @todo 0%
- */
- public function executeTask () {
- $this->partialStub('Unimplemented task.');
- }
-}
-
-// [EOF]
-?>
+++ /dev/null
-Deny from all
+++ /dev/null
-<?php
-/**
- * A RemoteUrlFetcher task for crawlers
- *
- * @author Roland Haeder <webmaster@ship-simu.org>
- * @version 0.0.0
- * @copyright Copyright (c) 2014 Crawler Developer Team
- * @license GNU GPL 3.0 or any newer version
- * @link http://www.ship-simu.org
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
-class CrawlerRemoteUrlFetcherTask extends BaseTask implements Taskable, Visitable {
- /**
- * Protected constructor
- *
- * @return void
- */
- protected function __construct () {
- // Call parent constructor
- parent::__construct(__CLASS__);
- }
-
- /**
- * Creates an instance of this class
- *
- * @return $taskInstance An instance of a Visitable class
- */
- public final static function createCrawlerRemoteUrlFetcherTask () {
- // Get new instance
- $taskInstance = new CrawlerRemoteUrlFetcherTask();
-
- // Return the prepared instance
- return $taskInstance;
- }
-
- /**
- * Accepts the visitor to process the visitor
- *
- * @param $visitorInstance An instance of a Visitor class
- * @return void
- * @todo Maybe visit some sub-objects
- */
- public function accept (Visitor $visitorInstance) {
- // Visit this task
- $visitorInstance->visitTask($this);
- }
-
- /**
- * Executes the task
- *
- * @return void
- * @todo 0%
- */
- public function executeTask () {
- $this->partialStub('Unimplemented task.');
- }
-}
-
-// [EOF]
-?>
--- /dev/null
+Deny from all
--- /dev/null
+Deny from all
--- /dev/null
+<?php
+/**
+ * A LocalUrlGetter task for crawlers
+ *
+ * @author Roland Haeder <webmaster@ship-simu.org>
+ * @version 0.0.0
+ * @copyright Copyright (c) 2014 Crawler Developer Team
+ * @license GNU GPL 3.0 or any newer version
+ * @link http://www.ship-simu.org
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+class CrawlerLocalUrlGetterTask extends BaseTask implements Taskable, Visitable {
+ /**
+ * Protected constructor
+ *
+ * @return void
+ */
+ protected function __construct () {
+ // Call parent constructor
+ parent::__construct(__CLASS__);
+ }
+
+ /**
+ * Creates an instance of this class
+ *
+ * @return $taskInstance An instance of a Visitable class
+ */
+ public final static function createCrawlerLocalUrlGetterTask () {
+ // Get new instance
+ $taskInstance = new CrawlerLocalUrlGetterTask();
+
+ // Return the prepared instance
+ return $taskInstance;
+ }
+
+ /**
+ * Accepts the visitor to process the visitor
+ *
+ * @param $visitorInstance An instance of a Visitor class
+ * @return void
+ * @todo Maybe visit some sub-objects
+ */
+ public function accept (Visitor $visitorInstance) {
+ // Visit this task
+ $visitorInstance->visitTask($this);
+ }
+
+ /**
+ * Executes the task
+ *
+ * @return void
+ * @todo 0%
+ */
+ public function executeTask () {
+ $this->partialStub('Unimplemented task.');
+ }
+}
+
+// [EOF]
+?>
--- /dev/null
+Deny from all
--- /dev/null
+<?php
+/**
+ * A RemoteUrlGetter task for crawlers
+ *
+ * @author Roland Haeder <webmaster@ship-simu.org>
+ * @version 0.0.0
+ * @copyright Copyright (c) 2014 Crawler Developer Team
+ * @license GNU GPL 3.0 or any newer version
+ * @link http://www.ship-simu.org
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+class CrawlerRemoteUrlGetterTask extends BaseTask implements Taskable, Visitable {
+ /**
+ * Protected constructor
+ *
+ * @return void
+ */
+ protected function __construct () {
+ // Call parent constructor
+ parent::__construct(__CLASS__);
+ }
+
+ /**
+ * Creates an instance of this class
+ *
+ * @return $taskInstance An instance of a Visitable class
+ */
+ public final static function createCrawlerRemoteUrlGetterTask () {
+ // Get new instance
+ $taskInstance = new CrawlerRemoteUrlGetterTask();
+
+ // Return the prepared instance
+ return $taskInstance;
+ }
+
+ /**
+ * Accepts the visitor to process the visitor
+ *
+ * @param $visitorInstance An instance of a Visitor class
+ * @return void
+ * @todo Maybe visit some sub-objects
+ */
+ public function accept (Visitor $visitorInstance) {
+ // Visit this task
+ $visitorInstance->visitTask($this);
+ }
+
+ /**
+ * Executes the task
+ *
+ * @return void
+ * @todo 0%
+ */
+ public function executeTask () {
+ $this->partialStub('Unimplemented task.');
+ }
+}
+
+// [EOF]
+?>