From 9df6aa6f3dd7190e379ce16008de92402a17e19a Mon Sep 17 00:00:00 2001 From: Roland Haeder Date: Thu, 15 May 2014 21:45:28 +0200 Subject: [PATCH] Continued with crawler: - added BaseUrlSource class with generic file-based stack initialization. - added missing configuration entries. Signed-off-by: Roland Haeder --- application/hub/config.php | 18 ++++++ .../hub/main/source/class_BaseUrlSource.php | 60 +++++++++++++++++++ .../main/source/urls/class_CrawlerUrlSource | 5 +- .../urls/class_CrawlerUrlSourceFoundRss.php | 5 +- .../urls/class_CrawlerUrlSourceLocalStart.php | 5 +- .../urls/class_CrawlerUrlSourceRssStart.php | 5 +- .../class_CrawlerUrlSourceUploadedList.php | 5 +- 7 files changed, 98 insertions(+), 5 deletions(-) create mode 100644 application/hub/main/source/class_BaseUrlSource.php diff --git a/application/hub/config.php b/application/hub/config.php index 8536140c1..6d70ce50c 100644 --- a/application/hub/config.php +++ b/application/hub/config.php @@ -1193,6 +1193,12 @@ $cfg->setConfigEntry('task_chat_telnet_listener_max_runs', 0); // Crawler Configuration /////////////////////////////////////////////////////////////////////////////// +// CFG: BASE-FILE-STACKS-PATH +$cfg->setConfigEntry('base_file_stacks_path', 'data/stacks'); + +// CFG: FILE-STACKS-EXTENSION +$cfg->setConfigEntry('file_stacks_extension', 'stack'); + // CFG: HUBCONSOLE-CMD-CHAT-RESOLVER-CLASS $cfg->setConfigEntry('hubconsole_cmd_crawler_resolver_class', 'HubConsoleCommandResolver'); @@ -1250,6 +1256,18 @@ $cfg->setConfigEntry('crawler_url_source_found_rss_class', 'CrawlerUrlSourceFoun // CFG: CRAWLER-NODE-COMMUNICATOR-TASK-CLASS $cfg->setConfigEntry('crawler_node_communicator_task_class', 'CrawlerNodeCommunicatorTask'); +// CFG: CRAWLER-URL-LOCAL-START-STACK-CLASS +$cfg->setConfigEntry('crawler_url_local_start_stack_class', 'FiFoFileStack'); + +// CFG: CRAWLER-URL-UPLOADED-LIST-STACK-CLASS +$cfg->setConfigEntry('crawler_url_uploaded_list_stack_class', 'FiFoFileStack'); + +// CFG: CRAWLER-URL-RSS-START-STACK-CLASS +$cfg->setConfigEntry('crawler_url_rss_start_stack_class', 'FiFoFileStack'); + +// CFG: CRAWLER-URL-FOUND-RSS-STACK-CLASS +$cfg->setConfigEntry('crawler_url_found_rss_stack_class', 'FiFoFileStack'); + // CFG: TASK-CRAWLER-NODE-COMMUNICATOR-STARTUP-DELAY $cfg->setConfigEntry('task_crawler_node_communicator_startup_delay', 500); diff --git a/application/hub/main/source/class_BaseUrlSource.php b/application/hub/main/source/class_BaseUrlSource.php new file mode 100644 index 000000000..391346d32 --- /dev/null +++ b/application/hub/main/source/class_BaseUrlSource.php @@ -0,0 +1,60 @@ + + * @version 0.0.0 + * @copyright Copyright (c) 2007, 2008 Roland Haeder, 2009 - 2012 Hub Developer Team + * @license GNU GPL 3.0 or any newer version + * @link http://www.shipsimu.org + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +class BaseUrlSource extends BaseSource { + /** + * Protected constructor + * + * @param $className Name of the class + * @return void + */ + protected function __construct ($className) { + // Call parent constructor + parent::__construct($className); + } + + /** + * Initalizes this source + * + * @param $prefix Prefix for this source + * @param $sourceName Name of this source + * @return void + */ + protected function initSource ($prefix, $sourceName) { + // Construct file stack name + $stackFileName = sprintf('%s/%s.%s', + $this->getConfigInstance()->getConfigEntry('base_file_stacks_path'), + $sourceName, + $this->getConfigInstance()->getConfigEntry('file_stacks_extension') + ); + + // Get file-based stack + $stackInstance = ObjectFactory::createObjectByConfiguredName($prefix . '_url_' . $sourceName . '_stack_class', array($stackFileName)); + + // Set stack here + $this->setStackerInstance($stackInstance); + } +} + +// [EOF] +?> diff --git a/application/hub/main/source/urls/class_CrawlerUrlSource b/application/hub/main/source/urls/class_CrawlerUrlSource index c330af0a4..1fef9c7c9 100644 --- a/application/hub/main/source/urls/class_CrawlerUrlSource +++ b/application/hub/main/source/urls/class_CrawlerUrlSource @@ -21,7 +21,7 @@ * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ -class CrawlerUrlSource??? extends BaseSource implements UrlSource, Registerable { +class CrawlerUrlSource??? extends BaseUrlSource implements UrlSource, Registerable { /** * Protected constructor * @@ -41,6 +41,9 @@ class CrawlerUrlSource??? extends BaseSource implements UrlSource, Registerable // Get new instance $sourceInstance = new CrawlerUrlSource???(); + // Init source + $sourceInstance->initSource('crawler', '!!!'); + // Return the prepared instance return $sourceInstance; } diff --git a/application/hub/main/source/urls/class_CrawlerUrlSourceFoundRss.php b/application/hub/main/source/urls/class_CrawlerUrlSourceFoundRss.php index 08a4595f6..9240c8456 100644 --- a/application/hub/main/source/urls/class_CrawlerUrlSourceFoundRss.php +++ b/application/hub/main/source/urls/class_CrawlerUrlSourceFoundRss.php @@ -21,7 +21,7 @@ * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ -class CrawlerUrlSourceFoundRss extends BaseSource implements UrlSource, Registerable { +class CrawlerUrlSourceFoundRss extends BaseUrlSource implements UrlSource, Registerable { /** * Protected constructor * @@ -41,6 +41,9 @@ class CrawlerUrlSourceFoundRss extends BaseSource implements UrlSource, Register // Get new instance $sourceInstance = new CrawlerUrlSourceFoundRss(); + // Init source + $sourceInstance->initSource('crawler', 'found_rss'); + // Return the prepared instance return $sourceInstance; } diff --git a/application/hub/main/source/urls/class_CrawlerUrlSourceLocalStart.php b/application/hub/main/source/urls/class_CrawlerUrlSourceLocalStart.php index 67279be04..1ccc9efa9 100644 --- a/application/hub/main/source/urls/class_CrawlerUrlSourceLocalStart.php +++ b/application/hub/main/source/urls/class_CrawlerUrlSourceLocalStart.php @@ -21,7 +21,7 @@ * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ -class CrawlerUrlSourceLocalStart extends BaseSource implements UrlSource, Registerable { +class CrawlerUrlSourceLocalStart extends BaseUrlSource implements UrlSource, Registerable { /** * Protected constructor * @@ -41,6 +41,9 @@ class CrawlerUrlSourceLocalStart extends BaseSource implements UrlSource, Regist // Get new instance $sourceInstance = new CrawlerUrlSourceLocalStart(); + // Init source + $sourceInstance->initSource('crawler', 'local_start'); + // Return the prepared instance return $sourceInstance; } diff --git a/application/hub/main/source/urls/class_CrawlerUrlSourceRssStart.php b/application/hub/main/source/urls/class_CrawlerUrlSourceRssStart.php index 7378c0c0b..12177e7ad 100644 --- a/application/hub/main/source/urls/class_CrawlerUrlSourceRssStart.php +++ b/application/hub/main/source/urls/class_CrawlerUrlSourceRssStart.php @@ -21,7 +21,7 @@ * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ -class CrawlerUrlSourceRssStart extends BaseSource implements UrlSource, Registerable { +class CrawlerUrlSourceRssStart extends BaseUrlSource implements UrlSource, Registerable { /** * Protected constructor * @@ -41,6 +41,9 @@ class CrawlerUrlSourceRssStart extends BaseSource implements UrlSource, Register // Get new instance $sourceInstance = new CrawlerUrlSourceRssStart(); + // Init source + $sourceInstance->initSource('crawler', 'rss_start'); + // Return the prepared instance return $sourceInstance; } diff --git a/application/hub/main/source/urls/class_CrawlerUrlSourceUploadedList.php b/application/hub/main/source/urls/class_CrawlerUrlSourceUploadedList.php index 5ef26cdaf..96d1ba2d6 100644 --- a/application/hub/main/source/urls/class_CrawlerUrlSourceUploadedList.php +++ b/application/hub/main/source/urls/class_CrawlerUrlSourceUploadedList.php @@ -21,7 +21,7 @@ * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ -class CrawlerUrlSourceUploadedList extends BaseSource implements UrlSource, Registerable { +class CrawlerUrlSourceUploadedList extends BaseUrlSource implements UrlSource, Registerable { /** * Protected constructor * @@ -41,6 +41,9 @@ class CrawlerUrlSourceUploadedList extends BaseSource implements UrlSource, Regi // Get new instance $sourceInstance = new CrawlerUrlSourceUploadedList(); + // Init source + $sourceInstance->initSource('crawler', 'uploaded_list'); + // Return the prepared instance return $sourceInstance; } -- 2.39.5