From 20bcb77223bd276e1f16d2c762893791feb6c28e Mon Sep 17 00:00:00 2001
From: Roland Haeder <roland@mxchange.org>
Date: Wed, 7 May 2014 22:18:54 +0200
Subject: [PATCH] Continued: - added new tasks for URL sources (I need to split
 it in separate tasks to maintain a good cycle time). - removed deprecated
 files and directories - added "demo" list for URL list source

Signed-off-by: Roland Haeder <roland@mxchange.org>
---
 application/hub/config.php                    | 48 +++++++++++++
 ...ss_CrawlerTaskHandlerInitializerFilter.php | 16 +++++
 .../{hub => crawler/url_source}/.htaccess     |  0
 .../crawler/url_source/class_CrawlerUrlSource | 72 +++++++++++++++++++
 .../class_CrawlerUrlSourceFoundRssTask.php    | 72 +++++++++++++++++++
 .../class_CrawlerUrlSourceLocalStartTask.php  | 72 +++++++++++++++++++
 .../class_CrawlerUrlSourceRssStartTask.php    | 72 +++++++++++++++++++
 ...class_CrawlerUrlSourceUploadedListTask.php | 72 +++++++++++++++++++
 .../hub/main/tasks/hub/chunks/.htaccess       |  1 -
 application/hub/main/tasks/hub/class_Hub      |  3 -
 .../hub/main/tasks/hub/decoder/.htaccess      |  1 -
 application/hub/main/tasks/hub/ping/.htaccess |  1 -
 .../hub/main/tasks/hub/update/.htaccess       |  1 -
 .../hub/announcement => url_lists}/.htaccess  |  0
 url_lists/demo.lst                            |  1 +
 15 files changed, 425 insertions(+), 7 deletions(-)
 rename application/hub/main/tasks/{hub => crawler/url_source}/.htaccess (100%)
 create mode 100644 application/hub/main/tasks/crawler/url_source/class_CrawlerUrlSource
 create mode 100644 application/hub/main/tasks/crawler/url_source/class_CrawlerUrlSourceFoundRssTask.php
 create mode 100644 application/hub/main/tasks/crawler/url_source/class_CrawlerUrlSourceLocalStartTask.php
 create mode 100644 application/hub/main/tasks/crawler/url_source/class_CrawlerUrlSourceRssStartTask.php
 create mode 100644 application/hub/main/tasks/crawler/url_source/class_CrawlerUrlSourceUploadedListTask.php
 delete mode 100644 application/hub/main/tasks/hub/chunks/.htaccess
 delete mode 100644 application/hub/main/tasks/hub/class_Hub
 delete mode 100644 application/hub/main/tasks/hub/decoder/.htaccess
 delete mode 100644 application/hub/main/tasks/hub/ping/.htaccess
 delete mode 100644 application/hub/main/tasks/hub/update/.htaccess
 rename {application/hub/main/tasks/hub/announcement => url_lists}/.htaccess (100%)
 create mode 100644 url_lists/demo.lst

diff --git a/application/hub/config.php b/application/hub/config.php
index 478c03cdb..ccd06b2e4 100644
--- a/application/hub/config.php
+++ b/application/hub/config.php
@@ -1325,6 +1325,54 @@ $cfg->setConfigEntry('task_crawler_ping_interval_delay', 250);
 // CFG: TASK-CRAWLER-PING-MAX-RUNS
 $cfg->setConfigEntry('task_crawler_ping_max_runs', 0);
 
+// CFG: CRAWLER-URL-SOURCE-LOCAL-START-TASK-CLASS
+$cfg->setConfigEntry('crawler_url_source_local_start_task_class', 'CrawlerUrlSourceLocalStartTask');
+
+// CFG: TASK-CRAWLER-URL-SOURCE-LOCAL-START-STARTUP-DELAY
+$cfg->setConfigEntry('task_crawler_url_source_local_start_startup_delay', 3000);
+
+// CFG: TASK-CRAWLER-URL-SOURCE-LOCAL-START-INTERVAL-DELAY
+$cfg->setConfigEntry('task_crawler_url_source_local_start_interval_delay', 150);
+
+// CFG: TASK-CRAWLER-URL-SOURCE-LOCAL-START-MAX-RUNS
+$cfg->setConfigEntry('task_crawler_url_source_local_start_max_runs', 0);
+
+// CFG: CRAWLER-URL-SOURCE-UPLOADED-LIST-TASK-CLASS
+$cfg->setConfigEntry('crawler_url_source_uploaded_list_task_class', 'CrawlerUrlSourceUploadedListTask');
+
+// CFG: TASK-CRAWLER-URL-SOURCE-UPLOADED-LIST-STARTUP-DELAY
+$cfg->setConfigEntry('task_crawler_url_source_uploaded_list_startup_delay', 3000);
+
+// CFG: TASK-CRAWLER-URL-SOURCE-UPLOADED-LIST-INTERVAL-DELAY
+$cfg->setConfigEntry('task_crawler_url_source_uploaded_list_interval_delay', 150);
+
+// CFG: TASK-CRAWLER-URL-SOURCE-UPLOADED-LIST-MAX-RUNS
+$cfg->setConfigEntry('task_crawler_url_source_uploaded_list_max_runs', 0);
+
+// CFG: CRAWLER-URL-SOURCE-RSS-START-TASK-CLASS
+$cfg->setConfigEntry('crawler_url_source_rss_start_task_class', 'CrawlerUrlSourceRssStartTask');
+
+// CFG: TASK-CRAWLER-URL-SOURCE-RSS-START-STARTUP-DELAY
+$cfg->setConfigEntry('task_crawler_url_source_rss_start_startup_delay', 3000);
+
+// CFG: TASK-CRAWLER-URL-SOURCE-RSS-START-INTERVAL-DELAY
+$cfg->setConfigEntry('task_crawler_url_source_rss_start_interval_delay', 150);
+
+// CFG: TASK-CRAWLER-URL-SOURCE-RSS-START-MAX-RUNS
+$cfg->setConfigEntry('task_crawler_url_source_rss_start_max_runs', 0);
+
+// CFG: CRAWLER-URL-SOURCE-FOUND-RSS-TASK-CLASS
+$cfg->setConfigEntry('crawler_url_source_found_rss_task_class', 'CrawlerUrlSourceFoundRssTask');
+
+// CFG: TASK-CRAWLER-URL-SOURCE-FOUND-RSS-STARTUP-DELAY
+$cfg->setConfigEntry('task_crawler_url_source_found_rss_startup_delay', 3000);
+
+// CFG: TASK-CRAWLER-URL-SOURCE-FOUND-RSS-INTERVAL-DELAY
+$cfg->setConfigEntry('task_crawler_url_source_found_rss_interval_delay', 150);
+
+// CFG: TASK-CRAWLER-URL-SOURCE-FOUND-RSS-MAX-RUNS
+$cfg->setConfigEntry('task_crawler_url_source_found_rss_max_runs', 0);
+
 ///////////////////////////////////////////////////////////////////////////////
 //                            HTTP Configuration
 ///////////////////////////////////////////////////////////////////////////////
diff --git a/application/hub/main/filter/task/crawler/class_CrawlerTaskHandlerInitializerFilter.php b/application/hub/main/filter/task/crawler/class_CrawlerTaskHandlerInitializerFilter.php
index cb2a1383c..4ba0ed0b1 100644
--- a/application/hub/main/filter/task/crawler/class_CrawlerTaskHandlerInitializerFilter.php
+++ b/application/hub/main/filter/task/crawler/class_CrawlerTaskHandlerInitializerFilter.php
@@ -101,6 +101,22 @@ class CrawlerTaskHandlerInitializerFilter extends BaseCrawlerFilter implements F
 		$taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_ping_task_class');
 		$handlerInstance->registerTask('crawler_ping', $taskInstance);
 
+		// 10) URL source: local start
+		$taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_url_source_local_start_task_class');
+		$handlerInstance->registerTask('crawler_url_source_local_start', $taskInstance);
+
+		// 11) URL source: uploaded list
+		$taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_url_source_uploaded_list_task_class');
+		$handlerInstance->registerTask('crawler_url_source_uploaded_list', $taskInstance);
+
+		// 12) URL source: RSS feed
+		$taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_url_source_rss_start_task_class');
+		$handlerInstance->registerTask('crawler_url_source_rss_start', $taskInstance);
+
+		// 13) URL source: found RSS/ATOM feed
+		$taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_url_source_found_rss_task_class');
+		$handlerInstance->registerTask('crawler_url_source_found_rss', $taskInstance);
+
 		// Put the task handler in registry
 		Registry::getRegistry()->addInstance('task_handler', $handlerInstance);
 	}
diff --git a/application/hub/main/tasks/hub/.htaccess b/application/hub/main/tasks/crawler/url_source/.htaccess
similarity index 100%
rename from application/hub/main/tasks/hub/.htaccess
rename to application/hub/main/tasks/crawler/url_source/.htaccess
diff --git a/application/hub/main/tasks/crawler/url_source/class_CrawlerUrlSource b/application/hub/main/tasks/crawler/url_source/class_CrawlerUrlSource
new file mode 100644
index 000000000..fc9fa9f0e
--- /dev/null
+++ b/application/hub/main/tasks/crawler/url_source/class_CrawlerUrlSource
@@ -0,0 +1,72 @@
+<?php
+/**
+ * An URL source task for ??? for crawlers
+ *
+ * @author		Roland Haeder <webmaster@ship-simu.org>
+ * @version		0.0.0
+ * @copyright	Copyright (c) 2014 Crawler Developer Team
+ * @license		GNU GPL 3.0 or any newer version
+ * @link		http://www.ship-simu.org
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+class CrawlerUrlSource???Task extends BaseTask implements Taskable, Visitable {
+	/**
+	 * Protected constructor
+	 *
+	 * @return	void
+	 */
+	protected function __construct () {
+		// Call parent constructor
+		parent::__construct(__CLASS__);
+	}
+
+	/**
+	 * Creates an instance of this class
+	 *
+	 * @return	$taskInstance	An instance of a Visitable class
+	 */
+	public final static function createCrawler???Task () {
+		// Get new instance
+		$taskInstance = new Crawler???Task();
+
+		// Return the prepared instance
+		return $taskInstance;
+	}
+
+	/**
+	 * Accepts the visitor to process the visitor
+	 *
+	 * @param	$visitorInstance	An instance of a Visitor class
+	 * @return	void
+	 * @todo	Maybe visit some sub-objects
+	 */
+	public function accept (Visitor $visitorInstance) {
+		// Visit this task
+		$visitorInstance->visitTask($this);
+	}
+
+	/**
+	 * Executes the task
+	 *
+	 * @return	void
+	 * @todo	0%
+	 */
+	public function executeTask () {
+		$this->partialStub('Unimplemented task.');
+	}
+}
+
+// [EOF]
+?>
diff --git a/application/hub/main/tasks/crawler/url_source/class_CrawlerUrlSourceFoundRssTask.php b/application/hub/main/tasks/crawler/url_source/class_CrawlerUrlSourceFoundRssTask.php
new file mode 100644
index 000000000..1988739de
--- /dev/null
+++ b/application/hub/main/tasks/crawler/url_source/class_CrawlerUrlSourceFoundRssTask.php
@@ -0,0 +1,72 @@
+<?php
+/**
+ * An URL source task for FoundRss for crawlers
+ *
+ * @author		Roland Haeder <webmaster@ship-simu.org>
+ * @version		0.0.0
+ * @copyright	Copyright (c) 2014 Crawler Developer Team
+ * @license		GNU GPL 3.0 or any newer version
+ * @link		http://www.ship-simu.org
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+class CrawlerUrlSourceFoundRssTask extends BaseTask implements Taskable, Visitable {
+	/**
+	 * Protected constructor
+	 *
+	 * @return	void
+	 */
+	protected function __construct () {
+		// Call parent constructor
+		parent::__construct(__CLASS__);
+	}
+
+	/**
+	 * Creates an instance of this class
+	 *
+	 * @return	$taskInstance	An instance of a Visitable class
+	 */
+	public final static function createCrawlerUrlSourceFoundRssTask () {
+		// Get new instance
+		$taskInstance = new CrawlerUrlSourceFoundRssTask();
+
+		// Return the prepared instance
+		return $taskInstance;
+	}
+
+	/**
+	 * Accepts the visitor to process the visitor
+	 *
+	 * @param	$visitorInstance	An instance of a Visitor class
+	 * @return	void
+	 * @todo	Maybe visit some sub-objects
+	 */
+	public function accept (Visitor $visitorInstance) {
+		// Visit this task
+		$visitorInstance->visitTask($this);
+	}
+
+	/**
+	 * Executes the task
+	 *
+	 * @return	void
+	 * @todo	0%
+	 */
+	public function executeTask () {
+		$this->partialStub('Unimplemented task.');
+	}
+}
+
+// [EOF]
+?>
diff --git a/application/hub/main/tasks/crawler/url_source/class_CrawlerUrlSourceLocalStartTask.php b/application/hub/main/tasks/crawler/url_source/class_CrawlerUrlSourceLocalStartTask.php
new file mode 100644
index 000000000..0972f062a
--- /dev/null
+++ b/application/hub/main/tasks/crawler/url_source/class_CrawlerUrlSourceLocalStartTask.php
@@ -0,0 +1,72 @@
+<?php
+/**
+ * An URL source task for LocalStart for crawlers
+ *
+ * @author		Roland Haeder <webmaster@ship-simu.org>
+ * @version		0.0.0
+ * @copyright	Copyright (c) 2014 Crawler Developer Team
+ * @license		GNU GPL 3.0 or any newer version
+ * @link		http://www.ship-simu.org
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+class CrawlerUrlSourceLocalStartTask extends BaseTask implements Taskable, Visitable {
+	/**
+	 * Protected constructor
+	 *
+	 * @return	void
+	 */
+	protected function __construct () {
+		// Call parent constructor
+		parent::__construct(__CLASS__);
+	}
+
+	/**
+	 * Creates an instance of this class
+	 *
+	 * @return	$taskInstance	An instance of a Visitable class
+	 */
+	public final static function createCrawlerUrlSourceLocalStartTask () {
+		// Get new instance
+		$taskInstance = new CrawlerUrlSourceLocalStartTask();
+
+		// Return the prepared instance
+		return $taskInstance;
+	}
+
+	/**
+	 * Accepts the visitor to process the visitor
+	 *
+	 * @param	$visitorInstance	An instance of a Visitor class
+	 * @return	void
+	 * @todo	Maybe visit some sub-objects
+	 */
+	public function accept (Visitor $visitorInstance) {
+		// Visit this task
+		$visitorInstance->visitTask($this);
+	}
+
+	/**
+	 * Executes the task
+	 *
+	 * @return	void
+	 * @todo	0%
+	 */
+	public function executeTask () {
+		$this->partialStub('Unimplemented task.');
+	}
+}
+
+// [EOF]
+?>
diff --git a/application/hub/main/tasks/crawler/url_source/class_CrawlerUrlSourceRssStartTask.php b/application/hub/main/tasks/crawler/url_source/class_CrawlerUrlSourceRssStartTask.php
new file mode 100644
index 000000000..fd1a77d45
--- /dev/null
+++ b/application/hub/main/tasks/crawler/url_source/class_CrawlerUrlSourceRssStartTask.php
@@ -0,0 +1,72 @@
+<?php
+/**
+ * An URL source task for RssStart for crawlers
+ *
+ * @author		Roland Haeder <webmaster@ship-simu.org>
+ * @version		0.0.0
+ * @copyright	Copyright (c) 2014 Crawler Developer Team
+ * @license		GNU GPL 3.0 or any newer version
+ * @link		http://www.ship-simu.org
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+class CrawlerUrlSourceRssStartTask extends BaseTask implements Taskable, Visitable {
+	/**
+	 * Protected constructor
+	 *
+	 * @return	void
+	 */
+	protected function __construct () {
+		// Call parent constructor
+		parent::__construct(__CLASS__);
+	}
+
+	/**
+	 * Creates an instance of this class
+	 *
+	 * @return	$taskInstance	An instance of a Visitable class
+	 */
+	public final static function createCrawlerUrlSourceRssStartTask () {
+		// Get new instance
+		$taskInstance = new CrawlerUrlSourceRssStartTask();
+
+		// Return the prepared instance
+		return $taskInstance;
+	}
+
+	/**
+	 * Accepts the visitor to process the visitor
+	 *
+	 * @param	$visitorInstance	An instance of a Visitor class
+	 * @return	void
+	 * @todo	Maybe visit some sub-objects
+	 */
+	public function accept (Visitor $visitorInstance) {
+		// Visit this task
+		$visitorInstance->visitTask($this);
+	}
+
+	/**
+	 * Executes the task
+	 *
+	 * @return	void
+	 * @todo	0%
+	 */
+	public function executeTask () {
+		$this->partialStub('Unimplemented task.');
+	}
+}
+
+// [EOF]
+?>
diff --git a/application/hub/main/tasks/crawler/url_source/class_CrawlerUrlSourceUploadedListTask.php b/application/hub/main/tasks/crawler/url_source/class_CrawlerUrlSourceUploadedListTask.php
new file mode 100644
index 000000000..c8b4b7085
--- /dev/null
+++ b/application/hub/main/tasks/crawler/url_source/class_CrawlerUrlSourceUploadedListTask.php
@@ -0,0 +1,72 @@
+<?php
+/**
+ * An URL source task for UploadedList for crawlers
+ *
+ * @author		Roland Haeder <webmaster@ship-simu.org>
+ * @version		0.0.0
+ * @copyright	Copyright (c) 2014 Crawler Developer Team
+ * @license		GNU GPL 3.0 or any newer version
+ * @link		http://www.ship-simu.org
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+class CrawlerUrlSourceUploadedListTask extends BaseTask implements Taskable, Visitable {
+	/**
+	 * Protected constructor
+	 *
+	 * @return	void
+	 */
+	protected function __construct () {
+		// Call parent constructor
+		parent::__construct(__CLASS__);
+	}
+
+	/**
+	 * Creates an instance of this class
+	 *
+	 * @return	$taskInstance	An instance of a Visitable class
+	 */
+	public final static function createCrawlerUrlSourceUploadedListTask () {
+		// Get new instance
+		$taskInstance = new CrawlerUrlSourceUploadedListTask();
+
+		// Return the prepared instance
+		return $taskInstance;
+	}
+
+	/**
+	 * Accepts the visitor to process the visitor
+	 *
+	 * @param	$visitorInstance	An instance of a Visitor class
+	 * @return	void
+	 * @todo	Maybe visit some sub-objects
+	 */
+	public function accept (Visitor $visitorInstance) {
+		// Visit this task
+		$visitorInstance->visitTask($this);
+	}
+
+	/**
+	 * Executes the task
+	 *
+	 * @return	void
+	 * @todo	0%
+	 */
+	public function executeTask () {
+		$this->partialStub('Unimplemented task.');
+	}
+}
+
+// [EOF]
+?>
diff --git a/application/hub/main/tasks/hub/chunks/.htaccess b/application/hub/main/tasks/hub/chunks/.htaccess
deleted file mode 100644
index 3a4288278..000000000
--- a/application/hub/main/tasks/hub/chunks/.htaccess
+++ /dev/null
@@ -1 +0,0 @@
-Deny from all
diff --git a/application/hub/main/tasks/hub/class_Hub b/application/hub/main/tasks/hub/class_Hub
deleted file mode 100644
index f551ef47b..000000000
--- a/application/hub/main/tasks/hub/class_Hub
+++ /dev/null
@@ -1,3 +0,0 @@
-<?php
-// @DEPRECATED
-?>
diff --git a/application/hub/main/tasks/hub/decoder/.htaccess b/application/hub/main/tasks/hub/decoder/.htaccess
deleted file mode 100644
index 3a4288278..000000000
--- a/application/hub/main/tasks/hub/decoder/.htaccess
+++ /dev/null
@@ -1 +0,0 @@
-Deny from all
diff --git a/application/hub/main/tasks/hub/ping/.htaccess b/application/hub/main/tasks/hub/ping/.htaccess
deleted file mode 100644
index 3a4288278..000000000
--- a/application/hub/main/tasks/hub/ping/.htaccess
+++ /dev/null
@@ -1 +0,0 @@
-Deny from all
diff --git a/application/hub/main/tasks/hub/update/.htaccess b/application/hub/main/tasks/hub/update/.htaccess
deleted file mode 100644
index 3a4288278..000000000
--- a/application/hub/main/tasks/hub/update/.htaccess
+++ /dev/null
@@ -1 +0,0 @@
-Deny from all
diff --git a/application/hub/main/tasks/hub/announcement/.htaccess b/url_lists/.htaccess
similarity index 100%
rename from application/hub/main/tasks/hub/announcement/.htaccess
rename to url_lists/.htaccess
diff --git a/url_lists/demo.lst b/url_lists/demo.lst
new file mode 100644
index 000000000..84b5b400b
--- /dev/null
+++ b/url_lists/demo.lst
@@ -0,0 +1 @@
+http://mxchange.org
-- 
2.39.5