- added new tasks for URL sources (I need to split it in separate tasks to maintain a good cycle time).
- removed deprecated files and directories
- added "demo" list for URL list source
Signed-off-by: Roland Haeder <roland@mxchange.org>
// CFG: TASK-CRAWLER-PING-MAX-RUNS
$cfg->setConfigEntry('task_crawler_ping_max_runs', 0);
+// CFG: CRAWLER-URL-SOURCE-LOCAL-START-TASK-CLASS
+$cfg->setConfigEntry('crawler_url_source_local_start_task_class', 'CrawlerUrlSourceLocalStartTask');
+
+// CFG: TASK-CRAWLER-URL-SOURCE-LOCAL-START-STARTUP-DELAY
+$cfg->setConfigEntry('task_crawler_url_source_local_start_startup_delay', 3000);
+
+// CFG: TASK-CRAWLER-URL-SOURCE-LOCAL-START-INTERVAL-DELAY
+$cfg->setConfigEntry('task_crawler_url_source_local_start_interval_delay', 150);
+
+// CFG: TASK-CRAWLER-URL-SOURCE-LOCAL-START-MAX-RUNS
+$cfg->setConfigEntry('task_crawler_url_source_local_start_max_runs', 0);
+
+// CFG: CRAWLER-URL-SOURCE-UPLOADED-LIST-TASK-CLASS
+$cfg->setConfigEntry('crawler_url_source_uploaded_list_task_class', 'CrawlerUrlSourceUploadedListTask');
+
+// CFG: TASK-CRAWLER-URL-SOURCE-UPLOADED-LIST-STARTUP-DELAY
+$cfg->setConfigEntry('task_crawler_url_source_uploaded_list_startup_delay', 3000);
+
+// CFG: TASK-CRAWLER-URL-SOURCE-UPLOADED-LIST-INTERVAL-DELAY
+$cfg->setConfigEntry('task_crawler_url_source_uploaded_list_interval_delay', 150);
+
+// CFG: TASK-CRAWLER-URL-SOURCE-UPLOADED-LIST-MAX-RUNS
+$cfg->setConfigEntry('task_crawler_url_source_uploaded_list_max_runs', 0);
+
+// CFG: CRAWLER-URL-SOURCE-RSS-START-TASK-CLASS
+$cfg->setConfigEntry('crawler_url_source_rss_start_task_class', 'CrawlerUrlSourceRssStartTask');
+
+// CFG: TASK-CRAWLER-URL-SOURCE-RSS-START-STARTUP-DELAY
+$cfg->setConfigEntry('task_crawler_url_source_rss_start_startup_delay', 3000);
+
+// CFG: TASK-CRAWLER-URL-SOURCE-RSS-START-INTERVAL-DELAY
+$cfg->setConfigEntry('task_crawler_url_source_rss_start_interval_delay', 150);
+
+// CFG: TASK-CRAWLER-URL-SOURCE-RSS-START-MAX-RUNS
+$cfg->setConfigEntry('task_crawler_url_source_rss_start_max_runs', 0);
+
+// CFG: CRAWLER-URL-SOURCE-FOUND-RSS-TASK-CLASS
+$cfg->setConfigEntry('crawler_url_source_found_rss_task_class', 'CrawlerUrlSourceFoundRssTask');
+
+// CFG: TASK-CRAWLER-URL-SOURCE-FOUND-RSS-STARTUP-DELAY
+$cfg->setConfigEntry('task_crawler_url_source_found_rss_startup_delay', 3000);
+
+// CFG: TASK-CRAWLER-URL-SOURCE-FOUND-RSS-INTERVAL-DELAY
+$cfg->setConfigEntry('task_crawler_url_source_found_rss_interval_delay', 150);
+
+// CFG: TASK-CRAWLER-URL-SOURCE-FOUND-RSS-MAX-RUNS
+$cfg->setConfigEntry('task_crawler_url_source_found_rss_max_runs', 0);
+
///////////////////////////////////////////////////////////////////////////////
// HTTP Configuration
///////////////////////////////////////////////////////////////////////////////
$taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_ping_task_class');
$handlerInstance->registerTask('crawler_ping', $taskInstance);
+ // 10) URL source: local start
+ $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_url_source_local_start_task_class');
+ $handlerInstance->registerTask('crawler_url_source_local_start', $taskInstance);
+
+ // 11) URL source: uploaded list
+ $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_url_source_uploaded_list_task_class');
+ $handlerInstance->registerTask('crawler_url_source_uploaded_list', $taskInstance);
+
+ // 12) URL source: RSS feed
+ $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_url_source_rss_start_task_class');
+ $handlerInstance->registerTask('crawler_url_source_rss_start', $taskInstance);
+
+ // 13) URL source: found RSS/ATOM feed
+ $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_url_source_found_rss_task_class');
+ $handlerInstance->registerTask('crawler_url_source_found_rss', $taskInstance);
+
// Put the task handler in registry
Registry::getRegistry()->addInstance('task_handler', $handlerInstance);
}
--- /dev/null
+Deny from all
--- /dev/null
+<?php
+/**
+ * An URL source task for ??? for crawlers
+ *
+ * @author Roland Haeder <webmaster@ship-simu.org>
+ * @version 0.0.0
+ * @copyright Copyright (c) 2014 Crawler Developer Team
+ * @license GNU GPL 3.0 or any newer version
+ * @link http://www.ship-simu.org
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+class CrawlerUrlSource???Task extends BaseTask implements Taskable, Visitable {
+ /**
+ * Protected constructor
+ *
+ * @return void
+ */
+ protected function __construct () {
+ // Call parent constructor
+ parent::__construct(__CLASS__);
+ }
+
+ /**
+ * Creates an instance of this class
+ *
+ * @return $taskInstance An instance of a Visitable class
+ */
+ public final static function createCrawler???Task () {
+ // Get new instance
+ $taskInstance = new Crawler???Task();
+
+ // Return the prepared instance
+ return $taskInstance;
+ }
+
+ /**
+ * Accepts the visitor to process the visitor
+ *
+ * @param $visitorInstance An instance of a Visitor class
+ * @return void
+ * @todo Maybe visit some sub-objects
+ */
+ public function accept (Visitor $visitorInstance) {
+ // Visit this task
+ $visitorInstance->visitTask($this);
+ }
+
+ /**
+ * Executes the task
+ *
+ * @return void
+ * @todo 0%
+ */
+ public function executeTask () {
+ $this->partialStub('Unimplemented task.');
+ }
+}
+
+// [EOF]
+?>
--- /dev/null
+<?php
+/**
+ * An URL source task for FoundRss for crawlers
+ *
+ * @author Roland Haeder <webmaster@ship-simu.org>
+ * @version 0.0.0
+ * @copyright Copyright (c) 2014 Crawler Developer Team
+ * @license GNU GPL 3.0 or any newer version
+ * @link http://www.ship-simu.org
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+class CrawlerUrlSourceFoundRssTask extends BaseTask implements Taskable, Visitable {
+ /**
+ * Protected constructor
+ *
+ * @return void
+ */
+ protected function __construct () {
+ // Call parent constructor
+ parent::__construct(__CLASS__);
+ }
+
+ /**
+ * Creates an instance of this class
+ *
+ * @return $taskInstance An instance of a Visitable class
+ */
+ public final static function createCrawlerUrlSourceFoundRssTask () {
+ // Get new instance
+ $taskInstance = new CrawlerUrlSourceFoundRssTask();
+
+ // Return the prepared instance
+ return $taskInstance;
+ }
+
+ /**
+ * Accepts the visitor to process the visitor
+ *
+ * @param $visitorInstance An instance of a Visitor class
+ * @return void
+ * @todo Maybe visit some sub-objects
+ */
+ public function accept (Visitor $visitorInstance) {
+ // Visit this task
+ $visitorInstance->visitTask($this);
+ }
+
+ /**
+ * Executes the task
+ *
+ * @return void
+ * @todo 0%
+ */
+ public function executeTask () {
+ $this->partialStub('Unimplemented task.');
+ }
+}
+
+// [EOF]
+?>
--- /dev/null
+<?php
+/**
+ * An URL source task for LocalStart for crawlers
+ *
+ * @author Roland Haeder <webmaster@ship-simu.org>
+ * @version 0.0.0
+ * @copyright Copyright (c) 2014 Crawler Developer Team
+ * @license GNU GPL 3.0 or any newer version
+ * @link http://www.ship-simu.org
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+class CrawlerUrlSourceLocalStartTask extends BaseTask implements Taskable, Visitable {
+ /**
+ * Protected constructor
+ *
+ * @return void
+ */
+ protected function __construct () {
+ // Call parent constructor
+ parent::__construct(__CLASS__);
+ }
+
+ /**
+ * Creates an instance of this class
+ *
+ * @return $taskInstance An instance of a Visitable class
+ */
+ public final static function createCrawlerUrlSourceLocalStartTask () {
+ // Get new instance
+ $taskInstance = new CrawlerUrlSourceLocalStartTask();
+
+ // Return the prepared instance
+ return $taskInstance;
+ }
+
+ /**
+ * Accepts the visitor to process the visitor
+ *
+ * @param $visitorInstance An instance of a Visitor class
+ * @return void
+ * @todo Maybe visit some sub-objects
+ */
+ public function accept (Visitor $visitorInstance) {
+ // Visit this task
+ $visitorInstance->visitTask($this);
+ }
+
+ /**
+ * Executes the task
+ *
+ * @return void
+ * @todo 0%
+ */
+ public function executeTask () {
+ $this->partialStub('Unimplemented task.');
+ }
+}
+
+// [EOF]
+?>
--- /dev/null
+<?php
+/**
+ * An URL source task for RssStart for crawlers
+ *
+ * @author Roland Haeder <webmaster@ship-simu.org>
+ * @version 0.0.0
+ * @copyright Copyright (c) 2014 Crawler Developer Team
+ * @license GNU GPL 3.0 or any newer version
+ * @link http://www.ship-simu.org
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+class CrawlerUrlSourceRssStartTask extends BaseTask implements Taskable, Visitable {
+ /**
+ * Protected constructor
+ *
+ * @return void
+ */
+ protected function __construct () {
+ // Call parent constructor
+ parent::__construct(__CLASS__);
+ }
+
+ /**
+ * Creates an instance of this class
+ *
+ * @return $taskInstance An instance of a Visitable class
+ */
+ public final static function createCrawlerUrlSourceRssStartTask () {
+ // Get new instance
+ $taskInstance = new CrawlerUrlSourceRssStartTask();
+
+ // Return the prepared instance
+ return $taskInstance;
+ }
+
+ /**
+ * Accepts the visitor to process the visitor
+ *
+ * @param $visitorInstance An instance of a Visitor class
+ * @return void
+ * @todo Maybe visit some sub-objects
+ */
+ public function accept (Visitor $visitorInstance) {
+ // Visit this task
+ $visitorInstance->visitTask($this);
+ }
+
+ /**
+ * Executes the task
+ *
+ * @return void
+ * @todo 0%
+ */
+ public function executeTask () {
+ $this->partialStub('Unimplemented task.');
+ }
+}
+
+// [EOF]
+?>
--- /dev/null
+<?php
+/**
+ * An URL source task for UploadedList for crawlers
+ *
+ * @author Roland Haeder <webmaster@ship-simu.org>
+ * @version 0.0.0
+ * @copyright Copyright (c) 2014 Crawler Developer Team
+ * @license GNU GPL 3.0 or any newer version
+ * @link http://www.ship-simu.org
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+class CrawlerUrlSourceUploadedListTask extends BaseTask implements Taskable, Visitable {
+ /**
+ * Protected constructor
+ *
+ * @return void
+ */
+ protected function __construct () {
+ // Call parent constructor
+ parent::__construct(__CLASS__);
+ }
+
+ /**
+ * Creates an instance of this class
+ *
+ * @return $taskInstance An instance of a Visitable class
+ */
+ public final static function createCrawlerUrlSourceUploadedListTask () {
+ // Get new instance
+ $taskInstance = new CrawlerUrlSourceUploadedListTask();
+
+ // Return the prepared instance
+ return $taskInstance;
+ }
+
+ /**
+ * Accepts the visitor to process the visitor
+ *
+ * @param $visitorInstance An instance of a Visitor class
+ * @return void
+ * @todo Maybe visit some sub-objects
+ */
+ public function accept (Visitor $visitorInstance) {
+ // Visit this task
+ $visitorInstance->visitTask($this);
+ }
+
+ /**
+ * Executes the task
+ *
+ * @return void
+ * @todo 0%
+ */
+ public function executeTask () {
+ $this->partialStub('Unimplemented task.');
+ }
+}
+
+// [EOF]
+?>
+++ /dev/null
-Deny from all
+++ /dev/null
-Deny from all
+++ /dev/null
-Deny from all
+++ /dev/null
-<?php
-// @DEPRECATED
-?>
+++ /dev/null
-Deny from all
+++ /dev/null
-Deny from all
+++ /dev/null
-Deny from all
--- /dev/null
+Deny from all
--- /dev/null
+http://mxchange.org