]> git.mxchange.org Git - hub.git/commitdiff
Continued with crawler:
authorRoland Haeder <roland@mxchange.org>
Thu, 15 May 2014 19:45:28 +0000 (21:45 +0200)
committerRoland Haeder <roland@mxchange.org>
Thu, 15 May 2014 19:59:47 +0000 (21:59 +0200)
- added BaseUrlSource class with generic file-based stack initialization.
- added missing configuration entries.

Signed-off-by: Roland Haeder <roland@mxchange.org>
application/hub/config.php
application/hub/main/source/class_BaseUrlSource.php [new file with mode: 0644]
application/hub/main/source/urls/class_CrawlerUrlSource
application/hub/main/source/urls/class_CrawlerUrlSourceFoundRss.php
application/hub/main/source/urls/class_CrawlerUrlSourceLocalStart.php
application/hub/main/source/urls/class_CrawlerUrlSourceRssStart.php
application/hub/main/source/urls/class_CrawlerUrlSourceUploadedList.php

index 8536140c1464949e4c37443259f532ac7065177f..6d70ce50c7c93633e57f2d9a2e2686baf90bb9a3 100644 (file)
@@ -1193,6 +1193,12 @@ $cfg->setConfigEntry('task_chat_telnet_listener_max_runs', 0);
 //                            Crawler Configuration
 ///////////////////////////////////////////////////////////////////////////////
 
+// CFG: BASE-FILE-STACKS-PATH
+$cfg->setConfigEntry('base_file_stacks_path', 'data/stacks');
+
+// CFG: FILE-STACKS-EXTENSION
+$cfg->setConfigEntry('file_stacks_extension', 'stack');
+
 // CFG: HUBCONSOLE-CMD-CHAT-RESOLVER-CLASS
 $cfg->setConfigEntry('hubconsole_cmd_crawler_resolver_class', 'HubConsoleCommandResolver');
 
@@ -1250,6 +1256,18 @@ $cfg->setConfigEntry('crawler_url_source_found_rss_class', 'CrawlerUrlSourceFoun
 // CFG: CRAWLER-NODE-COMMUNICATOR-TASK-CLASS
 $cfg->setConfigEntry('crawler_node_communicator_task_class', 'CrawlerNodeCommunicatorTask');
 
+// CFG: CRAWLER-URL-LOCAL-START-STACK-CLASS
+$cfg->setConfigEntry('crawler_url_local_start_stack_class', 'FiFoFileStack');
+
+// CFG: CRAWLER-URL-UPLOADED-LIST-STACK-CLASS
+$cfg->setConfigEntry('crawler_url_uploaded_list_stack_class', 'FiFoFileStack');
+
+// CFG: CRAWLER-URL-RSS-START-STACK-CLASS
+$cfg->setConfigEntry('crawler_url_rss_start_stack_class', 'FiFoFileStack');
+
+// CFG: CRAWLER-URL-FOUND-RSS-STACK-CLASS
+$cfg->setConfigEntry('crawler_url_found_rss_stack_class', 'FiFoFileStack');
+
 // CFG: TASK-CRAWLER-NODE-COMMUNICATOR-STARTUP-DELAY
 $cfg->setConfigEntry('task_crawler_node_communicator_startup_delay', 500);
 
diff --git a/application/hub/main/source/class_BaseUrlSource.php b/application/hub/main/source/class_BaseUrlSource.php
new file mode 100644 (file)
index 0000000..391346d
--- /dev/null
@@ -0,0 +1,60 @@
+<?php
+/**
+ * A general URL source class
+ *
+ * @author             Roland Haeder <webmaster@shipsimu.org>
+ * @version            0.0.0
+ * @copyright  Copyright (c) 2007, 2008 Roland Haeder, 2009 - 2012 Hub Developer Team
+ * @license            GNU GPL 3.0 or any newer version
+ * @link               http://www.shipsimu.org
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+class BaseUrlSource extends BaseSource {
+       /**
+        * Protected constructor
+        *
+        * @param       $className      Name of the class
+        * @return      void
+        */
+       protected function __construct ($className) {
+               // Call parent constructor
+               parent::__construct($className);
+       }
+
+       /**
+        * Initalizes this source
+        *
+        * @param       $prefix                 Prefix for this source
+        * @param       $sourceName             Name of this source
+        * @return      void
+        */
+       protected function initSource ($prefix, $sourceName) {
+               // Construct file stack name
+               $stackFileName = sprintf('%s/%s.%s',
+                       $this->getConfigInstance()->getConfigEntry('base_file_stacks_path'),
+                       $sourceName,
+                       $this->getConfigInstance()->getConfigEntry('file_stacks_extension')
+               );
+
+               // Get file-based stack
+               $stackInstance = ObjectFactory::createObjectByConfiguredName($prefix . '_url_' . $sourceName . '_stack_class', array($stackFileName));
+
+               // Set stack here
+               $this->setStackerInstance($stackInstance);
+       }
+}
+
+// [EOF]
+?>
index c330af0a4e20b0684ffdfb28a6662b55120d4ff8..1fef9c7c941b25c17b37696b48446a356ec0d7dd 100644 (file)
@@ -21,7 +21,7 @@
  * You should have received a copy of the GNU General Public License
  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
-class CrawlerUrlSource??? extends BaseSource implements UrlSource, Registerable {
+class CrawlerUrlSource??? extends BaseUrlSource implements UrlSource, Registerable {
        /**
         * Protected constructor
         *
@@ -41,6 +41,9 @@ class CrawlerUrlSource??? extends BaseSource implements UrlSource, Registerable
                // Get new instance
                $sourceInstance = new CrawlerUrlSource???();
 
+               // Init source
+               $sourceInstance->initSource('crawler', '!!!');
+
                // Return the prepared instance
                return $sourceInstance;
        }
index 08a4595f6dbf8698c2244e96465dd69c3e0e1676..9240c8456da37811a0dfad2a2fbef7214223593e 100644 (file)
@@ -21,7 +21,7 @@
  * You should have received a copy of the GNU General Public License
  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
-class CrawlerUrlSourceFoundRss extends BaseSource implements UrlSource, Registerable {
+class CrawlerUrlSourceFoundRss extends BaseUrlSource implements UrlSource, Registerable {
        /**
         * Protected constructor
         *
@@ -41,6 +41,9 @@ class CrawlerUrlSourceFoundRss extends BaseSource implements UrlSource, Register
                // Get new instance
                $sourceInstance = new CrawlerUrlSourceFoundRss();
 
+               // Init source
+               $sourceInstance->initSource('crawler', 'found_rss');
+
                // Return the prepared instance
                return $sourceInstance;
        }
index 67279be0468df23b87be6fe538575a4704d7f09d..1ccc9efa99d47eb0c44a5e2dce567858787cc868 100644 (file)
@@ -21,7 +21,7 @@
  * You should have received a copy of the GNU General Public License
  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
-class CrawlerUrlSourceLocalStart extends BaseSource implements UrlSource, Registerable {
+class CrawlerUrlSourceLocalStart extends BaseUrlSource implements UrlSource, Registerable {
        /**
         * Protected constructor
         *
@@ -41,6 +41,9 @@ class CrawlerUrlSourceLocalStart extends BaseSource implements UrlSource, Regist
                // Get new instance
                $sourceInstance = new CrawlerUrlSourceLocalStart();
 
+               // Init source
+               $sourceInstance->initSource('crawler', 'local_start');
+
                // Return the prepared instance
                return $sourceInstance;
        }
index 7378c0c0bc74537c06cf70e5d83fde0cb4b42d14..12177e7add1526f30d3b36d6f5d4d485f6ecf25f 100644 (file)
@@ -21,7 +21,7 @@
  * You should have received a copy of the GNU General Public License
  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
-class CrawlerUrlSourceRssStart extends BaseSource implements UrlSource, Registerable {
+class CrawlerUrlSourceRssStart extends BaseUrlSource implements UrlSource, Registerable {
        /**
         * Protected constructor
         *
@@ -41,6 +41,9 @@ class CrawlerUrlSourceRssStart extends BaseSource implements UrlSource, Register
                // Get new instance
                $sourceInstance = new CrawlerUrlSourceRssStart();
 
+               // Init source
+               $sourceInstance->initSource('crawler', 'rss_start');
+
                // Return the prepared instance
                return $sourceInstance;
        }
index 5ef26cdaf0a2f36d1aa253e18ff139911b2d369a..96d1ba2d6d8d7be7c2ebe9422eb30d3f2560dbff 100644 (file)
@@ -21,7 +21,7 @@
  * You should have received a copy of the GNU General Public License
  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
-class CrawlerUrlSourceUploadedList extends BaseSource implements UrlSource, Registerable {
+class CrawlerUrlSourceUploadedList extends BaseUrlSource implements UrlSource, Registerable {
        /**
         * Protected constructor
         *
@@ -41,6 +41,9 @@ class CrawlerUrlSourceUploadedList extends BaseSource implements UrlSource, Regi
                // Get new instance
                $sourceInstance = new CrawlerUrlSourceUploadedList();
 
+               // Init source
+               $sourceInstance->initSource('crawler', 'uploaded_list');
+
                // Return the prepared instance
                return $sourceInstance;
        }