]> git.mxchange.org Git - hub.git/commitdiff
Renamed fetcher -> getter + added task for remote-crawl jobs.
authorRoland Haeder <roland@mxchange.org>
Sun, 4 May 2014 12:31:38 +0000 (14:31 +0200)
committerRoland Haeder <roland@mxchange.org>
Sun, 4 May 2014 12:31:38 +0000 (14:31 +0200)
Signed-off-by: Roland Haeder <roland@mxchange.org>
14 files changed:
application/hub/config.php
application/hub/main/filter/task/crawler/class_CrawlerTaskHandlerInitializerFilter.php
application/hub/main/tasks/crawler/publisher/.htaccess [new file with mode: 0644]
application/hub/main/tasks/crawler/publisher/class_CrawlerRemoteJobPublisherTask.php [new file with mode: 0644]
application/hub/main/tasks/crawler/url_fetcher/.htaccess [deleted file]
application/hub/main/tasks/crawler/url_fetcher/local/.htaccess [deleted file]
application/hub/main/tasks/crawler/url_fetcher/local/class_CrawlerLocalUrlFetcherTask.php [deleted file]
application/hub/main/tasks/crawler/url_fetcher/remote/.htaccess [deleted file]
application/hub/main/tasks/crawler/url_fetcher/remote/class_CrawlerRemoteUrlFetcherTask.php [deleted file]
application/hub/main/tasks/crawler/url_getter/.htaccess [new file with mode: 0644]
application/hub/main/tasks/crawler/url_getter/local/.htaccess [new file with mode: 0644]
application/hub/main/tasks/crawler/url_getter/local/class_CrawlerLocalUrlGetterTask.php [new file with mode: 0644]
application/hub/main/tasks/crawler/url_getter/remote/.htaccess [new file with mode: 0644]
application/hub/main/tasks/crawler/url_getter/remote/class_CrawlerRemoteUrlGetterTask.php [new file with mode: 0644]

index 4b53d4697587955539e9e55039a574e7ed616f70..da3c260537c4f6bdba4f6a2b1fb6fa8b2dc0cd7f 100644 (file)
@@ -1229,29 +1229,41 @@ $cfg->setConfigEntry('task_crawler_node_communicator_interval_delay', 250);
 // CFG: TASK-CRAWLER-NODE-COMMUNICATOR-MAX-RUNS
 $cfg->setConfigEntry('task_crawler_node_communicator_max_runs', 0);
 
-// CFG: CRAWLER-LOCAL-URL-FETCHER-TASK-CLASS
-$cfg->setConfigEntry('crawler_local_url_fetcher_task_class', 'CrawlerLocalUrlFetcherTask');
+// CFG: CRAWLER-LOCAL-URL-GETTER-TASK-CLASS
+$cfg->setConfigEntry('crawler_local_url_getter_task_class', 'CrawlerLocalUrlGetterTask');
 
-// CFG: TASK-CRAWLER-LOCAL-URL-FETCHER-STARTUP-DELAY
-$cfg->setConfigEntry('task_crawler_local_url_fetcher_startup_delay', 1500);
+// CFG: TASK-CRAWLER-LOCAL-URL-GETTER-STARTUP-DELAY
+$cfg->setConfigEntry('task_crawler_local_url_getter_startup_delay', 1500);
 
-// CFG: TASK-CRAWLER-LOCAL-URL-FETCHER-INTERVAL-DELAY
-$cfg->setConfigEntry('task_crawler_local_url_fetcher_interval_delay', 50);
+// CFG: TASK-CRAWLER-LOCAL-URL-GETTER-INTERVAL-DELAY
+$cfg->setConfigEntry('task_crawler_local_url_getter_interval_delay', 50);
 
-// CFG: TASK-CRAWLER-LOCAL-URL-FETCHER-MAX-RUNS
-$cfg->setConfigEntry('task_crawler_local_url_fetcher_max_runs', 0);
+// CFG: TASK-CRAWLER-LOCAL-URL-GETTER-MAX-RUNS
+$cfg->setConfigEntry('task_crawler_local_url_getter_max_runs', 0);
 
-// CFG: CRAWLER-REMOTE-URL-FETCHER-TASK-CLASS
-$cfg->setConfigEntry('crawler_remote_url_fetcher_task_class', 'CrawlerRemoteUrlFetcherTask');
+// CFG: CRAWLER-REMOTE-URL-GETTER-TASK-CLASS
+$cfg->setConfigEntry('crawler_remote_url_getter_task_class', 'CrawlerRemoteUrlGetterTask');
 
-// CFG: TASK-CRAWLER-REMOTE-URL-FETCHER-STARTUP-DELAY
-$cfg->setConfigEntry('task_crawler_remote_url_fetcher_startup_delay', 1500);
+// CFG: TASK-CRAWLER-REMOTE-URL-GETTER-STARTUP-DELAY
+$cfg->setConfigEntry('task_crawler_remote_url_getter_startup_delay', 1500);
 
-// CFG: TASK-CRAWLER-REMOTE-URL-FETCHER-INTERVAL-DELAY
-$cfg->setConfigEntry('task_crawler_remote_url_fetcher_interval_delay', 50);
+// CFG: TASK-CRAWLER-REMOTE-URL-GETTER-INTERVAL-DELAY
+$cfg->setConfigEntry('task_crawler_remote_url_getter_interval_delay', 50);
 
-// CFG: TASK-CRAWLER-REMOTE-URL-FETCHER-MAX-RUNS
-$cfg->setConfigEntry('task_crawler_remote_url_fetcher_max_runs', 0);
+// CFG: TASK-CRAWLER-REMOTE-URL-GETTER-MAX-RUNS
+$cfg->setConfigEntry('task_crawler_remote_url_getter_max_runs', 0);
+
+// CFG: CRAWLER-REMOTE-JOB-PUBLISHER-TASK-CLASS
+$cfg->setConfigEntry('crawler_remote_job_publisher_task_class', 'CrawlerRemoteJobPublisherTask');
+
+// CFG: TASK-CRAWLER-REMOTE-JOB-PUBLISHER-STARTUP-DELAY
+$cfg->setConfigEntry('task_crawler_remote_job_publisher_startup_delay', 2000);
+
+// CFG: TASK-CRAWLER-REMOTE-JOB-PUBLISHER-INTERVAL-DELAY
+$cfg->setConfigEntry('task_crawler_remote_job_publisher_interval_delay', 50);
+
+// CFG: TASK-CRAWLER-REMOTE-JOB-PUBLISHER-MAX-RUNS
+$cfg->setConfigEntry('task_crawler_remote_job_publisher_max_runs', 0);
 
 ///////////////////////////////////////////////////////////////////////////////
 //                            HTTP Configuration
index 799f9b60a9eecdb51fbf610c5de824642e573af4..43a7be491f7b6c994a411a0216c65e3f296d5cfd 100644 (file)
@@ -52,7 +52,7 @@ class CrawlerTaskHandlerInitializerFilter extends BaseCrawlerFilter implements F
         * @param       $responseInstance       An instance of a class with an Responseable interface
         * @return      void
         * @throws      FilterChainException    If the filter chain needs to be interrupted
-        * @todo        5% done
+        * @todo        10% done
         */
        public function execute (Requestable $requestInstance, Responseable $responseInstance) {
                // Get crawler instance
@@ -69,13 +69,17 @@ class CrawlerTaskHandlerInitializerFilter extends BaseCrawlerFilter implements F
                $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_node_communicator_task_class');
                $handlerInstance->registerTask('crawler_node_communicator', $taskInstance);
 
-               // 2) Local URL fetcher (fetches URLs locally and adds them to the analyzer's input stack)
-               $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_local_url_fetcher_task_class');
-               $handlerInstance->registerTask('crawler_local_url_fetcher', $taskInstance);
+               // 2) Local URL getter (gets URLs locally and adds them to the analyzer's input stack)
+               $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_local_url_getter_task_class');
+               $handlerInstance->registerTask('crawler_local_url_getter', $taskInstance);
 
-               // 3) Remote URL fetcher (let fetch URLs by other crawler nodes and also adds them to the stack)
-               $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_remote_url_fetcher_task_class');
-               $handlerInstance->registerTask('crawler_remote_url_fetcher', $taskInstance);
+               // 3) Remote URL getter (gets URLs locally for other nodes, also includes the crawled URL in local index)
+               $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_remote_url_getter_task_class');
+               $handlerInstance->registerTask('crawler_remote_url_getter', $taskInstance);
+
+               // 4) Remote-crawl publisher (publishes crawl jobs for remote retrieval)
+               $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_remote_job_publisher_task_class');
+               $handlerInstance->registerTask('crawler_remote_job_publisher', $taskInstance);
 
                // Put the task handler in registry
                Registry::getRegistry()->addInstance('task_handler', $handlerInstance);
diff --git a/application/hub/main/tasks/crawler/publisher/.htaccess b/application/hub/main/tasks/crawler/publisher/.htaccess
new file mode 100644 (file)
index 0000000..3a42882
--- /dev/null
@@ -0,0 +1 @@
+Deny from all
diff --git a/application/hub/main/tasks/crawler/publisher/class_CrawlerRemoteJobPublisherTask.php b/application/hub/main/tasks/crawler/publisher/class_CrawlerRemoteJobPublisherTask.php
new file mode 100644 (file)
index 0000000..b36c9d7
--- /dev/null
@@ -0,0 +1,72 @@
+<?php
+/**
+ * A RemoteJobPublisher task for crawlers
+ *
+ * @author             Roland Haeder <webmaster@ship-simu.org>
+ * @version            0.0.0
+ * @copyright  Copyright (c) 2014 Crawler Developer Team
+ * @license            GNU GPL 3.0 or any newer version
+ * @link               http://www.ship-simu.org
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+class CrawlerRemoteJobPublisherTask extends BaseTask implements Taskable, Visitable {
+       /**
+        * Protected constructor
+        *
+        * @return      void
+        */
+       protected function __construct () {
+               // Call parent constructor
+               parent::__construct(__CLASS__);
+       }
+
+       /**
+        * Creates an instance of this class
+        *
+        * @return      $taskInstance   An instance of a Visitable class
+        */
+       public final static function createCrawlerRemoteJobPublisherTask () {
+               // Get new instance
+               $taskInstance = new CrawlerRemoteJobPublisherTask();
+
+               // Return the prepared instance
+               return $taskInstance;
+       }
+
+       /**
+        * Accepts the visitor to process the visitor
+        *
+        * @param       $visitorInstance        An instance of a Visitor class
+        * @return      void
+        * @todo        Maybe visit some sub-objects
+        */
+       public function accept (Visitor $visitorInstance) {
+               // Visit this task
+               $visitorInstance->visitTask($this);
+       }
+
+       /**
+        * Executes the task
+        *
+        * @return      void
+        * @todo        0%
+        */
+       public function executeTask () {
+               $this->partialStub('Unimplemented task.');
+       }
+}
+
+// [EOF]
+?>
diff --git a/application/hub/main/tasks/crawler/url_fetcher/.htaccess b/application/hub/main/tasks/crawler/url_fetcher/.htaccess
deleted file mode 100644 (file)
index 3a42882..0000000
+++ /dev/null
@@ -1 +0,0 @@
-Deny from all
diff --git a/application/hub/main/tasks/crawler/url_fetcher/local/.htaccess b/application/hub/main/tasks/crawler/url_fetcher/local/.htaccess
deleted file mode 100644 (file)
index 3a42882..0000000
+++ /dev/null
@@ -1 +0,0 @@
-Deny from all
diff --git a/application/hub/main/tasks/crawler/url_fetcher/local/class_CrawlerLocalUrlFetcherTask.php b/application/hub/main/tasks/crawler/url_fetcher/local/class_CrawlerLocalUrlFetcherTask.php
deleted file mode 100644 (file)
index 2f4b43f..0000000
+++ /dev/null
@@ -1,72 +0,0 @@
-<?php
-/**
- * A LocalUrlFetcher task for crawlers
- *
- * @author             Roland Haeder <webmaster@ship-simu.org>
- * @version            0.0.0
- * @copyright  Copyright (c) 2014 Crawler Developer Team
- * @license            GNU GPL 3.0 or any newer version
- * @link               http://www.ship-simu.org
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-class CrawlerLocalUrlFetcherTask extends BaseTask implements Taskable, Visitable {
-       /**
-        * Protected constructor
-        *
-        * @return      void
-        */
-       protected function __construct () {
-               // Call parent constructor
-               parent::__construct(__CLASS__);
-       }
-
-       /**
-        * Creates an instance of this class
-        *
-        * @return      $taskInstance   An instance of a Visitable class
-        */
-       public final static function createCrawlerLocalUrlFetcherTask () {
-               // Get new instance
-               $taskInstance = new CrawlerLocalUrlFetcherTask();
-
-               // Return the prepared instance
-               return $taskInstance;
-       }
-
-       /**
-        * Accepts the visitor to process the visitor
-        *
-        * @param       $visitorInstance        An instance of a Visitor class
-        * @return      void
-        * @todo        Maybe visit some sub-objects
-        */
-       public function accept (Visitor $visitorInstance) {
-               // Visit this task
-               $visitorInstance->visitTask($this);
-       }
-
-       /**
-        * Executes the task
-        *
-        * @return      void
-        * @todo        0%
-        */
-       public function executeTask () {
-               $this->partialStub('Unimplemented task.');
-       }
-}
-
-// [EOF]
-?>
diff --git a/application/hub/main/tasks/crawler/url_fetcher/remote/.htaccess b/application/hub/main/tasks/crawler/url_fetcher/remote/.htaccess
deleted file mode 100644 (file)
index 3a42882..0000000
+++ /dev/null
@@ -1 +0,0 @@
-Deny from all
diff --git a/application/hub/main/tasks/crawler/url_fetcher/remote/class_CrawlerRemoteUrlFetcherTask.php b/application/hub/main/tasks/crawler/url_fetcher/remote/class_CrawlerRemoteUrlFetcherTask.php
deleted file mode 100644 (file)
index 57d3009..0000000
+++ /dev/null
@@ -1,72 +0,0 @@
-<?php
-/**
- * A RemoteUrlFetcher task for crawlers
- *
- * @author             Roland Haeder <webmaster@ship-simu.org>
- * @version            0.0.0
- * @copyright  Copyright (c) 2014 Crawler Developer Team
- * @license            GNU GPL 3.0 or any newer version
- * @link               http://www.ship-simu.org
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-class CrawlerRemoteUrlFetcherTask extends BaseTask implements Taskable, Visitable {
-       /**
-        * Protected constructor
-        *
-        * @return      void
-        */
-       protected function __construct () {
-               // Call parent constructor
-               parent::__construct(__CLASS__);
-       }
-
-       /**
-        * Creates an instance of this class
-        *
-        * @return      $taskInstance   An instance of a Visitable class
-        */
-       public final static function createCrawlerRemoteUrlFetcherTask () {
-               // Get new instance
-               $taskInstance = new CrawlerRemoteUrlFetcherTask();
-
-               // Return the prepared instance
-               return $taskInstance;
-       }
-
-       /**
-        * Accepts the visitor to process the visitor
-        *
-        * @param       $visitorInstance        An instance of a Visitor class
-        * @return      void
-        * @todo        Maybe visit some sub-objects
-        */
-       public function accept (Visitor $visitorInstance) {
-               // Visit this task
-               $visitorInstance->visitTask($this);
-       }
-
-       /**
-        * Executes the task
-        *
-        * @return      void
-        * @todo        0%
-        */
-       public function executeTask () {
-               $this->partialStub('Unimplemented task.');
-       }
-}
-
-// [EOF]
-?>
diff --git a/application/hub/main/tasks/crawler/url_getter/.htaccess b/application/hub/main/tasks/crawler/url_getter/.htaccess
new file mode 100644 (file)
index 0000000..3a42882
--- /dev/null
@@ -0,0 +1 @@
+Deny from all
diff --git a/application/hub/main/tasks/crawler/url_getter/local/.htaccess b/application/hub/main/tasks/crawler/url_getter/local/.htaccess
new file mode 100644 (file)
index 0000000..3a42882
--- /dev/null
@@ -0,0 +1 @@
+Deny from all
diff --git a/application/hub/main/tasks/crawler/url_getter/local/class_CrawlerLocalUrlGetterTask.php b/application/hub/main/tasks/crawler/url_getter/local/class_CrawlerLocalUrlGetterTask.php
new file mode 100644 (file)
index 0000000..e3ecc6c
--- /dev/null
@@ -0,0 +1,72 @@
+<?php
+/**
+ * A LocalUrlGetter task for crawlers
+ *
+ * @author             Roland Haeder <webmaster@ship-simu.org>
+ * @version            0.0.0
+ * @copyright  Copyright (c) 2014 Crawler Developer Team
+ * @license            GNU GPL 3.0 or any newer version
+ * @link               http://www.ship-simu.org
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+class CrawlerLocalUrlGetterTask extends BaseTask implements Taskable, Visitable {
+       /**
+        * Protected constructor
+        *
+        * @return      void
+        */
+       protected function __construct () {
+               // Call parent constructor
+               parent::__construct(__CLASS__);
+       }
+
+       /**
+        * Creates an instance of this class
+        *
+        * @return      $taskInstance   An instance of a Visitable class
+        */
+       public final static function createCrawlerLocalUrlGetterTask () {
+               // Get new instance
+               $taskInstance = new CrawlerLocalUrlGetterTask();
+
+               // Return the prepared instance
+               return $taskInstance;
+       }
+
+       /**
+        * Accepts the visitor to process the visitor
+        *
+        * @param       $visitorInstance        An instance of a Visitor class
+        * @return      void
+        * @todo        Maybe visit some sub-objects
+        */
+       public function accept (Visitor $visitorInstance) {
+               // Visit this task
+               $visitorInstance->visitTask($this);
+       }
+
+       /**
+        * Executes the task
+        *
+        * @return      void
+        * @todo        0%
+        */
+       public function executeTask () {
+               $this->partialStub('Unimplemented task.');
+       }
+}
+
+// [EOF]
+?>
diff --git a/application/hub/main/tasks/crawler/url_getter/remote/.htaccess b/application/hub/main/tasks/crawler/url_getter/remote/.htaccess
new file mode 100644 (file)
index 0000000..3a42882
--- /dev/null
@@ -0,0 +1 @@
+Deny from all
diff --git a/application/hub/main/tasks/crawler/url_getter/remote/class_CrawlerRemoteUrlGetterTask.php b/application/hub/main/tasks/crawler/url_getter/remote/class_CrawlerRemoteUrlGetterTask.php
new file mode 100644 (file)
index 0000000..9dc5b9d
--- /dev/null
@@ -0,0 +1,72 @@
+<?php
+/**
+ * A RemoteUrlGetter task for crawlers
+ *
+ * @author             Roland Haeder <webmaster@ship-simu.org>
+ * @version            0.0.0
+ * @copyright  Copyright (c) 2014 Crawler Developer Team
+ * @license            GNU GPL 3.0 or any newer version
+ * @link               http://www.ship-simu.org
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+class CrawlerRemoteUrlGetterTask extends BaseTask implements Taskable, Visitable {
+       /**
+        * Protected constructor
+        *
+        * @return      void
+        */
+       protected function __construct () {
+               // Call parent constructor
+               parent::__construct(__CLASS__);
+       }
+
+       /**
+        * Creates an instance of this class
+        *
+        * @return      $taskInstance   An instance of a Visitable class
+        */
+       public final static function createCrawlerRemoteUrlGetterTask () {
+               // Get new instance
+               $taskInstance = new CrawlerRemoteUrlGetterTask();
+
+               // Return the prepared instance
+               return $taskInstance;
+       }
+
+       /**
+        * Accepts the visitor to process the visitor
+        *
+        * @param       $visitorInstance        An instance of a Visitor class
+        * @return      void
+        * @todo        Maybe visit some sub-objects
+        */
+       public function accept (Visitor $visitorInstance) {
+               // Visit this task
+               $visitorInstance->visitTask($this);
+       }
+
+       /**
+        * Executes the task
+        *
+        * @return      void
+        * @todo        0%
+        */
+       public function executeTask () {
+               $this->partialStub('Unimplemented task.');
+       }
+}
+
+// [EOF]
+?>