]> git.mxchange.org Git - hub.git/commitdiff
Continued:
authorRoland Haeder <roland@mxchange.org>
Wed, 7 May 2014 20:18:54 +0000 (22:18 +0200)
committerRoland Haeder <roland@mxchange.org>
Wed, 7 May 2014 20:18:54 +0000 (22:18 +0200)
- added new tasks for URL sources (I need to split it in separate tasks to maintain a good cycle time).
- removed deprecated files and directories
- added "demo" list for URL list source

Signed-off-by: Roland Haeder <roland@mxchange.org>
17 files changed:
application/hub/config.php
application/hub/main/filter/task/crawler/class_CrawlerTaskHandlerInitializerFilter.php
application/hub/main/tasks/crawler/url_source/.htaccess [new file with mode: 0644]
application/hub/main/tasks/crawler/url_source/class_CrawlerUrlSource [new file with mode: 0644]
application/hub/main/tasks/crawler/url_source/class_CrawlerUrlSourceFoundRssTask.php [new file with mode: 0644]
application/hub/main/tasks/crawler/url_source/class_CrawlerUrlSourceLocalStartTask.php [new file with mode: 0644]
application/hub/main/tasks/crawler/url_source/class_CrawlerUrlSourceRssStartTask.php [new file with mode: 0644]
application/hub/main/tasks/crawler/url_source/class_CrawlerUrlSourceUploadedListTask.php [new file with mode: 0644]
application/hub/main/tasks/hub/.htaccess [deleted file]
application/hub/main/tasks/hub/announcement/.htaccess [deleted file]
application/hub/main/tasks/hub/chunks/.htaccess [deleted file]
application/hub/main/tasks/hub/class_Hub [deleted file]
application/hub/main/tasks/hub/decoder/.htaccess [deleted file]
application/hub/main/tasks/hub/ping/.htaccess [deleted file]
application/hub/main/tasks/hub/update/.htaccess [deleted file]
url_lists/.htaccess [new file with mode: 0644]
url_lists/demo.lst [new file with mode: 0644]

index 478c03cdb8d0442d0e3ad230207b02b1d864d621..ccd06b2e417f26daffbad6f84a2f3a7430935585 100644 (file)
@@ -1325,6 +1325,54 @@ $cfg->setConfigEntry('task_crawler_ping_interval_delay', 250);
 // CFG: TASK-CRAWLER-PING-MAX-RUNS
 $cfg->setConfigEntry('task_crawler_ping_max_runs', 0);
 
+// CFG: CRAWLER-URL-SOURCE-LOCAL-START-TASK-CLASS
+$cfg->setConfigEntry('crawler_url_source_local_start_task_class', 'CrawlerUrlSourceLocalStartTask');
+
+// CFG: TASK-CRAWLER-URL-SOURCE-LOCAL-START-STARTUP-DELAY
+$cfg->setConfigEntry('task_crawler_url_source_local_start_startup_delay', 3000);
+
+// CFG: TASK-CRAWLER-URL-SOURCE-LOCAL-START-INTERVAL-DELAY
+$cfg->setConfigEntry('task_crawler_url_source_local_start_interval_delay', 150);
+
+// CFG: TASK-CRAWLER-URL-SOURCE-LOCAL-START-MAX-RUNS
+$cfg->setConfigEntry('task_crawler_url_source_local_start_max_runs', 0);
+
+// CFG: CRAWLER-URL-SOURCE-UPLOADED-LIST-TASK-CLASS
+$cfg->setConfigEntry('crawler_url_source_uploaded_list_task_class', 'CrawlerUrlSourceUploadedListTask');
+
+// CFG: TASK-CRAWLER-URL-SOURCE-UPLOADED-LIST-STARTUP-DELAY
+$cfg->setConfigEntry('task_crawler_url_source_uploaded_list_startup_delay', 3000);
+
+// CFG: TASK-CRAWLER-URL-SOURCE-UPLOADED-LIST-INTERVAL-DELAY
+$cfg->setConfigEntry('task_crawler_url_source_uploaded_list_interval_delay', 150);
+
+// CFG: TASK-CRAWLER-URL-SOURCE-UPLOADED-LIST-MAX-RUNS
+$cfg->setConfigEntry('task_crawler_url_source_uploaded_list_max_runs', 0);
+
+// CFG: CRAWLER-URL-SOURCE-RSS-START-TASK-CLASS
+$cfg->setConfigEntry('crawler_url_source_rss_start_task_class', 'CrawlerUrlSourceRssStartTask');
+
+// CFG: TASK-CRAWLER-URL-SOURCE-RSS-START-STARTUP-DELAY
+$cfg->setConfigEntry('task_crawler_url_source_rss_start_startup_delay', 3000);
+
+// CFG: TASK-CRAWLER-URL-SOURCE-RSS-START-INTERVAL-DELAY
+$cfg->setConfigEntry('task_crawler_url_source_rss_start_interval_delay', 150);
+
+// CFG: TASK-CRAWLER-URL-SOURCE-RSS-START-MAX-RUNS
+$cfg->setConfigEntry('task_crawler_url_source_rss_start_max_runs', 0);
+
+// CFG: CRAWLER-URL-SOURCE-FOUND-RSS-TASK-CLASS
+$cfg->setConfigEntry('crawler_url_source_found_rss_task_class', 'CrawlerUrlSourceFoundRssTask');
+
+// CFG: TASK-CRAWLER-URL-SOURCE-FOUND-RSS-STARTUP-DELAY
+$cfg->setConfigEntry('task_crawler_url_source_found_rss_startup_delay', 3000);
+
+// CFG: TASK-CRAWLER-URL-SOURCE-FOUND-RSS-INTERVAL-DELAY
+$cfg->setConfigEntry('task_crawler_url_source_found_rss_interval_delay', 150);
+
+// CFG: TASK-CRAWLER-URL-SOURCE-FOUND-RSS-MAX-RUNS
+$cfg->setConfigEntry('task_crawler_url_source_found_rss_max_runs', 0);
+
 ///////////////////////////////////////////////////////////////////////////////
 //                            HTTP Configuration
 ///////////////////////////////////////////////////////////////////////////////
index cb2a1383c6e7b86cfea2e35ddf8d132a571fe1a7..4ba0ed0b178ef9b93da8490fe0038aa423281d76 100644 (file)
@@ -101,6 +101,22 @@ class CrawlerTaskHandlerInitializerFilter extends BaseCrawlerFilter implements F
                $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_ping_task_class');
                $handlerInstance->registerTask('crawler_ping', $taskInstance);
 
+               // 10) URL source: local start
+               $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_url_source_local_start_task_class');
+               $handlerInstance->registerTask('crawler_url_source_local_start', $taskInstance);
+
+               // 11) URL source: uploaded list
+               $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_url_source_uploaded_list_task_class');
+               $handlerInstance->registerTask('crawler_url_source_uploaded_list', $taskInstance);
+
+               // 12) URL source: RSS feed
+               $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_url_source_rss_start_task_class');
+               $handlerInstance->registerTask('crawler_url_source_rss_start', $taskInstance);
+
+               // 13) URL source: found RSS/ATOM feed
+               $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_url_source_found_rss_task_class');
+               $handlerInstance->registerTask('crawler_url_source_found_rss', $taskInstance);
+
                // Put the task handler in registry
                Registry::getRegistry()->addInstance('task_handler', $handlerInstance);
        }
diff --git a/application/hub/main/tasks/crawler/url_source/.htaccess b/application/hub/main/tasks/crawler/url_source/.htaccess
new file mode 100644 (file)
index 0000000..3a42882
--- /dev/null
@@ -0,0 +1 @@
+Deny from all
diff --git a/application/hub/main/tasks/crawler/url_source/class_CrawlerUrlSource b/application/hub/main/tasks/crawler/url_source/class_CrawlerUrlSource
new file mode 100644 (file)
index 0000000..fc9fa9f
--- /dev/null
@@ -0,0 +1,72 @@
+<?php
+/**
+ * An URL source task for ??? for crawlers
+ *
+ * @author             Roland Haeder <webmaster@ship-simu.org>
+ * @version            0.0.0
+ * @copyright  Copyright (c) 2014 Crawler Developer Team
+ * @license            GNU GPL 3.0 or any newer version
+ * @link               http://www.ship-simu.org
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+class CrawlerUrlSource???Task extends BaseTask implements Taskable, Visitable {
+       /**
+        * Protected constructor
+        *
+        * @return      void
+        */
+       protected function __construct () {
+               // Call parent constructor
+               parent::__construct(__CLASS__);
+       }
+
+       /**
+        * Creates an instance of this class
+        *
+        * @return      $taskInstance   An instance of a Visitable class
+        */
+       public final static function createCrawler???Task () {
+               // Get new instance
+               $taskInstance = new Crawler???Task();
+
+               // Return the prepared instance
+               return $taskInstance;
+       }
+
+       /**
+        * Accepts the visitor to process the visitor
+        *
+        * @param       $visitorInstance        An instance of a Visitor class
+        * @return      void
+        * @todo        Maybe visit some sub-objects
+        */
+       public function accept (Visitor $visitorInstance) {
+               // Visit this task
+               $visitorInstance->visitTask($this);
+       }
+
+       /**
+        * Executes the task
+        *
+        * @return      void
+        * @todo        0%
+        */
+       public function executeTask () {
+               $this->partialStub('Unimplemented task.');
+       }
+}
+
+// [EOF]
+?>
diff --git a/application/hub/main/tasks/crawler/url_source/class_CrawlerUrlSourceFoundRssTask.php b/application/hub/main/tasks/crawler/url_source/class_CrawlerUrlSourceFoundRssTask.php
new file mode 100644 (file)
index 0000000..1988739
--- /dev/null
@@ -0,0 +1,72 @@
+<?php
+/**
+ * An URL source task for FoundRss for crawlers
+ *
+ * @author             Roland Haeder <webmaster@ship-simu.org>
+ * @version            0.0.0
+ * @copyright  Copyright (c) 2014 Crawler Developer Team
+ * @license            GNU GPL 3.0 or any newer version
+ * @link               http://www.ship-simu.org
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+class CrawlerUrlSourceFoundRssTask extends BaseTask implements Taskable, Visitable {
+       /**
+        * Protected constructor
+        *
+        * @return      void
+        */
+       protected function __construct () {
+               // Call parent constructor
+               parent::__construct(__CLASS__);
+       }
+
+       /**
+        * Creates an instance of this class
+        *
+        * @return      $taskInstance   An instance of a Visitable class
+        */
+       public final static function createCrawlerUrlSourceFoundRssTask () {
+               // Get new instance
+               $taskInstance = new CrawlerUrlSourceFoundRssTask();
+
+               // Return the prepared instance
+               return $taskInstance;
+       }
+
+       /**
+        * Accepts the visitor to process the visitor
+        *
+        * @param       $visitorInstance        An instance of a Visitor class
+        * @return      void
+        * @todo        Maybe visit some sub-objects
+        */
+       public function accept (Visitor $visitorInstance) {
+               // Visit this task
+               $visitorInstance->visitTask($this);
+       }
+
+       /**
+        * Executes the task
+        *
+        * @return      void
+        * @todo        0%
+        */
+       public function executeTask () {
+               $this->partialStub('Unimplemented task.');
+       }
+}
+
+// [EOF]
+?>
diff --git a/application/hub/main/tasks/crawler/url_source/class_CrawlerUrlSourceLocalStartTask.php b/application/hub/main/tasks/crawler/url_source/class_CrawlerUrlSourceLocalStartTask.php
new file mode 100644 (file)
index 0000000..0972f06
--- /dev/null
@@ -0,0 +1,72 @@
+<?php
+/**
+ * An URL source task for LocalStart for crawlers
+ *
+ * @author             Roland Haeder <webmaster@ship-simu.org>
+ * @version            0.0.0
+ * @copyright  Copyright (c) 2014 Crawler Developer Team
+ * @license            GNU GPL 3.0 or any newer version
+ * @link               http://www.ship-simu.org
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+class CrawlerUrlSourceLocalStartTask extends BaseTask implements Taskable, Visitable {
+       /**
+        * Protected constructor
+        *
+        * @return      void
+        */
+       protected function __construct () {
+               // Call parent constructor
+               parent::__construct(__CLASS__);
+       }
+
+       /**
+        * Creates an instance of this class
+        *
+        * @return      $taskInstance   An instance of a Visitable class
+        */
+       public final static function createCrawlerUrlSourceLocalStartTask () {
+               // Get new instance
+               $taskInstance = new CrawlerUrlSourceLocalStartTask();
+
+               // Return the prepared instance
+               return $taskInstance;
+       }
+
+       /**
+        * Accepts the visitor to process the visitor
+        *
+        * @param       $visitorInstance        An instance of a Visitor class
+        * @return      void
+        * @todo        Maybe visit some sub-objects
+        */
+       public function accept (Visitor $visitorInstance) {
+               // Visit this task
+               $visitorInstance->visitTask($this);
+       }
+
+       /**
+        * Executes the task
+        *
+        * @return      void
+        * @todo        0%
+        */
+       public function executeTask () {
+               $this->partialStub('Unimplemented task.');
+       }
+}
+
+// [EOF]
+?>
diff --git a/application/hub/main/tasks/crawler/url_source/class_CrawlerUrlSourceRssStartTask.php b/application/hub/main/tasks/crawler/url_source/class_CrawlerUrlSourceRssStartTask.php
new file mode 100644 (file)
index 0000000..fd1a77d
--- /dev/null
@@ -0,0 +1,72 @@
+<?php
+/**
+ * An URL source task for RssStart for crawlers
+ *
+ * @author             Roland Haeder <webmaster@ship-simu.org>
+ * @version            0.0.0
+ * @copyright  Copyright (c) 2014 Crawler Developer Team
+ * @license            GNU GPL 3.0 or any newer version
+ * @link               http://www.ship-simu.org
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+class CrawlerUrlSourceRssStartTask extends BaseTask implements Taskable, Visitable {
+       /**
+        * Protected constructor
+        *
+        * @return      void
+        */
+       protected function __construct () {
+               // Call parent constructor
+               parent::__construct(__CLASS__);
+       }
+
+       /**
+        * Creates an instance of this class
+        *
+        * @return      $taskInstance   An instance of a Visitable class
+        */
+       public final static function createCrawlerUrlSourceRssStartTask () {
+               // Get new instance
+               $taskInstance = new CrawlerUrlSourceRssStartTask();
+
+               // Return the prepared instance
+               return $taskInstance;
+       }
+
+       /**
+        * Accepts the visitor to process the visitor
+        *
+        * @param       $visitorInstance        An instance of a Visitor class
+        * @return      void
+        * @todo        Maybe visit some sub-objects
+        */
+       public function accept (Visitor $visitorInstance) {
+               // Visit this task
+               $visitorInstance->visitTask($this);
+       }
+
+       /**
+        * Executes the task
+        *
+        * @return      void
+        * @todo        0%
+        */
+       public function executeTask () {
+               $this->partialStub('Unimplemented task.');
+       }
+}
+
+// [EOF]
+?>
diff --git a/application/hub/main/tasks/crawler/url_source/class_CrawlerUrlSourceUploadedListTask.php b/application/hub/main/tasks/crawler/url_source/class_CrawlerUrlSourceUploadedListTask.php
new file mode 100644 (file)
index 0000000..c8b4b70
--- /dev/null
@@ -0,0 +1,72 @@
+<?php
+/**
+ * An URL source task for UploadedList for crawlers
+ *
+ * @author             Roland Haeder <webmaster@ship-simu.org>
+ * @version            0.0.0
+ * @copyright  Copyright (c) 2014 Crawler Developer Team
+ * @license            GNU GPL 3.0 or any newer version
+ * @link               http://www.ship-simu.org
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+class CrawlerUrlSourceUploadedListTask extends BaseTask implements Taskable, Visitable {
+       /**
+        * Protected constructor
+        *
+        * @return      void
+        */
+       protected function __construct () {
+               // Call parent constructor
+               parent::__construct(__CLASS__);
+       }
+
+       /**
+        * Creates an instance of this class
+        *
+        * @return      $taskInstance   An instance of a Visitable class
+        */
+       public final static function createCrawlerUrlSourceUploadedListTask () {
+               // Get new instance
+               $taskInstance = new CrawlerUrlSourceUploadedListTask();
+
+               // Return the prepared instance
+               return $taskInstance;
+       }
+
+       /**
+        * Accepts the visitor to process the visitor
+        *
+        * @param       $visitorInstance        An instance of a Visitor class
+        * @return      void
+        * @todo        Maybe visit some sub-objects
+        */
+       public function accept (Visitor $visitorInstance) {
+               // Visit this task
+               $visitorInstance->visitTask($this);
+       }
+
+       /**
+        * Executes the task
+        *
+        * @return      void
+        * @todo        0%
+        */
+       public function executeTask () {
+               $this->partialStub('Unimplemented task.');
+       }
+}
+
+// [EOF]
+?>
diff --git a/application/hub/main/tasks/hub/.htaccess b/application/hub/main/tasks/hub/.htaccess
deleted file mode 100644 (file)
index 3a42882..0000000
+++ /dev/null
@@ -1 +0,0 @@
-Deny from all
diff --git a/application/hub/main/tasks/hub/announcement/.htaccess b/application/hub/main/tasks/hub/announcement/.htaccess
deleted file mode 100644 (file)
index 3a42882..0000000
+++ /dev/null
@@ -1 +0,0 @@
-Deny from all
diff --git a/application/hub/main/tasks/hub/chunks/.htaccess b/application/hub/main/tasks/hub/chunks/.htaccess
deleted file mode 100644 (file)
index 3a42882..0000000
+++ /dev/null
@@ -1 +0,0 @@
-Deny from all
diff --git a/application/hub/main/tasks/hub/class_Hub b/application/hub/main/tasks/hub/class_Hub
deleted file mode 100644 (file)
index f551ef4..0000000
+++ /dev/null
@@ -1,3 +0,0 @@
-<?php
-// @DEPRECATED
-?>
diff --git a/application/hub/main/tasks/hub/decoder/.htaccess b/application/hub/main/tasks/hub/decoder/.htaccess
deleted file mode 100644 (file)
index 3a42882..0000000
+++ /dev/null
@@ -1 +0,0 @@
-Deny from all
diff --git a/application/hub/main/tasks/hub/ping/.htaccess b/application/hub/main/tasks/hub/ping/.htaccess
deleted file mode 100644 (file)
index 3a42882..0000000
+++ /dev/null
@@ -1 +0,0 @@
-Deny from all
diff --git a/application/hub/main/tasks/hub/update/.htaccess b/application/hub/main/tasks/hub/update/.htaccess
deleted file mode 100644 (file)
index 3a42882..0000000
+++ /dev/null
@@ -1 +0,0 @@
-Deny from all
diff --git a/url_lists/.htaccess b/url_lists/.htaccess
new file mode 100644 (file)
index 0000000..3a42882
--- /dev/null
@@ -0,0 +1 @@
+Deny from all
diff --git a/url_lists/demo.lst b/url_lists/demo.lst
new file mode 100644 (file)
index 0000000..84b5b40
--- /dev/null
@@ -0,0 +1 @@
+http://mxchange.org