Added crawler stuff:
authorRoland Haeder <roland@mxchange.org>
Mon, 12 May 2014 19:21:19 +0000 (21:21 +0200)
committerRoland Haeder <roland@mxchange.org>
Mon, 12 May 2014 19:56:45 +0000 (21:56 +0200)
- Added scanner class/interface and task class.
- Added a scanner class for looking for uploaded lists. They must be located in
  data/url_lists/ .
- Rewritten both factories (scanner, source) to use createObjectByConfiguredName()
  instead of direct createObjectByName(). This allows to set another class if you
  don't like the original implementation.

Signed-off-by: Roland Haeder <roland@mxchange.org>
24 files changed:
application/hub/config.php
application/hub/interfaces/scanner/.htaccess [new file with mode: 0644]
application/hub/interfaces/scanner/class_Scanner.php [new file with mode: 0644]
application/hub/interfaces/source/urls/class_UrlSource.php
application/hub/main/factories/scanner/.htaccess [new file with mode: 0644]
application/hub/main/factories/scanner/class_ScannerObjectFactory.php [new file with mode: 0644]
application/hub/main/factories/source/url/class_UrlSourceObjectFactory.php
application/hub/main/filter/task/crawler/class_CrawlerTaskHandlerInitializerFilter.php
application/hub/main/scanner/.htaccess [new file with mode: 0644]
application/hub/main/scanner/class_ [new file with mode: 0644]
application/hub/main/scanner/class_BaseScanner.php [new file with mode: 0644]
application/hub/main/scanner/crawler/.htaccess [new file with mode: 0644]
application/hub/main/scanner/crawler/class_Crawler [new file with mode: 0644]
application/hub/main/scanner/crawler/uploaded_list/.htaccess [new file with mode: 0644]
application/hub/main/scanner/crawler/uploaded_list/class_CrawlerUploadedListScanner.php [new file with mode: 0644]
application/hub/main/source/urls/class_CrawlerUrlSource
application/hub/main/source/urls/class_CrawlerUrlSourceFoundRss.php
application/hub/main/source/urls/class_CrawlerUrlSourceLocalStart.php
application/hub/main/source/urls/class_CrawlerUrlSourceRssStart.php
application/hub/main/source/urls/class_CrawlerUrlSourceUploadedList.php
application/hub/main/tasks/crawler/scanner/.htaccess [new file with mode: 0644]
application/hub/main/tasks/crawler/scanner/class_Crawler [new file with mode: 0644]
application/hub/main/tasks/crawler/scanner/class_CrawlerUploadedListScannerTask.php [new file with mode: 0644]
docs/TODOs.txt

index ccd06b2e417f26daffbad6f84a2f3a7430935585..8536140c1464949e4c37443259f532ac7065177f 100644 (file)
@@ -983,9 +983,24 @@ $cfg->setConfigEntry('task_miner_test_genesis_block_producer_interval_delay', 10
 // CFG: TASK-MINER-TEST-GENESIC-BLOCK-PRODUCER-MAX-RUNS
 $cfg->setConfigEntry('task_miner_test_genesis_block_producer_max_runs', 1);
 
+// CFG: MINER_NODE-COMMUNICATOR-TASK-CLASS
+$cfg->setConfigEntry('miner_node_communicator_task_class', 'MinerNodeCommunicatorTask');
+
+// CFG: TASK-MINER_NODE-COMMUNICATOR-STARTUP-DELAY
+$cfg->setConfigEntry('task_miner_node_communicator_startup_delay', 500);
+
+// CFG: TASK-MINER_NODE-COMMUNICATOR-INTERVAL-DELAY
+$cfg->setConfigEntry('task_miner_node_communicator_interval_delay', 250);
+
+// CFG: TASK-MINER_NODE-COMMUNICATOR-MAX-RUNS
+$cfg->setConfigEntry('task_miner_node_communicator_max_runs', 0);
+
 // CFG: MINER-TEST-GENESIS-BLOCK-PRODUCER-CLASS
 $cfg->setConfigEntry('miner_test_genesis_block_producer_class', 'MinerTestGenesisBlockProducer');
 
+// CFG: MINER-NODE-COMMUNICATOR-CLASS
+$cfg->setConfigEntry('miner_node_communicator_class', 'MinerNodeCommunicator');
+
 ///////////////////////////////////////////////////////////////////////////////
 //                        Cruncher configuration
 ///////////////////////////////////////////////////////////////////////////////
@@ -1217,6 +1232,21 @@ $cfg->setConfigEntry('crawler_booting_state_class', 'CrawlerBootingState');
 // CFG: COMMUNICATOR-INIT-STATE-CLASS
 $cfg->setConfigEntry('communicator_init_state_class', 'CommunicatorInitState');
 
+// CFG: CRAWLER-UPLOADED-LIST-SCANNER
+$cfg->setConfigEntry('crawler_uploaded_list_scanner', 'CrawlerUploadedListScanner');
+
+// CFG: CRAWLER-URL-SOURCE-LOCAL-START-CLASS
+$cfg->setConfigEntry('crawler_url_source_local_start_class', 'CrawlerUrlSourceLocalStart');
+
+// CFG: CRAWLER-URL-SOURCE-UPLOADED-LIST-CLASS
+$cfg->setConfigEntry('crawler_url_source_uploaded_list_class', 'CrawlerUrlSourceUploadedList');
+
+// CFG: CRAWLER-URL-SOURCE-RSS-START-CLASS
+$cfg->setConfigEntry('crawler_url_source_rss_start_class', 'CrawlerUrlSourceRssStart');
+
+// CFG: CRAWLER-URL-SOURCE-FOUND-RSS-CLASS
+$cfg->setConfigEntry('crawler_url_source_found_rss_class', 'CrawlerUrlSourceFoundRss');
+
 // CFG: CRAWLER-NODE-COMMUNICATOR-TASK-CLASS
 $cfg->setConfigEntry('crawler_node_communicator_task_class', 'CrawlerNodeCommunicatorTask');
 
@@ -1373,6 +1403,18 @@ $cfg->setConfigEntry('task_crawler_url_source_found_rss_interval_delay', 150);
 // CFG: TASK-CRAWLER-URL-SOURCE-FOUND-RSS-MAX-RUNS
 $cfg->setConfigEntry('task_crawler_url_source_found_rss_max_runs', 0);
 
+// CFG: CRAWLER-URL-SOURCE-FOUND-RSS-TASK-CLASS
+$cfg->setConfigEntry('crawler_uploaded_list_scanner_task_class', 'CrawlerUploadedListScannerTask');
+
+// CFG: TASK-CRAWLER-URL-SOURCE-FOUND-RSS-STARTUP-DELAY
+$cfg->setConfigEntry('task_crawler_uploaded_list_scanner_startup_delay', 500);
+
+// CFG: TASK-CRAWLER-URL-SOURCE-FOUND-RSS-INTERVAL-DELAY
+$cfg->setConfigEntry('task_crawler_uploaded_list_scanner_interval_delay', 1000);
+
+// CFG: TASK-CRAWLER-URL-SOURCE-FOUND-RSS-MAX-RUNS
+$cfg->setConfigEntry('task_crawler_uploaded_list_scanner_max_runs', 0);
+
 ///////////////////////////////////////////////////////////////////////////////
 //                            HTTP Configuration
 ///////////////////////////////////////////////////////////////////////////////
diff --git a/application/hub/interfaces/scanner/.htaccess b/application/hub/interfaces/scanner/.htaccess
new file mode 100644 (file)
index 0000000..3a42882
--- /dev/null
@@ -0,0 +1 @@
+Deny from all
diff --git a/application/hub/interfaces/scanner/class_Scanner.php b/application/hub/interfaces/scanner/class_Scanner.php
new file mode 100644 (file)
index 0000000..7d697a0
--- /dev/null
@@ -0,0 +1,34 @@
+<?php
+/**
+ * An interface for scanners
+ *
+ * @author             Roland Haeder <webmaster@shipsimu.org>
+ * @version            0.0.0
+ * @copyright  Copyright (c) 2007, 2008 Roland Haeder, 2009 - 2012 Hub Developer Team
+ * @license            GNU GPL 3.0 or any newer version
+ * @link               http://www.shipsimu.org
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+interface Scanner extends FrameworkInterface {
+       /**
+        * Runs the scanner (please no loops here)
+        *
+        * @return      void
+        */
+       function execute ();
+}
+
+// [EOF]
+?>
index 03d51cac7cd80009d4f5ee0cbf35f5582f8a7a9c..19bba42a4fb617381ad734456f2c730d8989da79 100644 (file)
  * along with this program. If not, see <http://www.gnu.org/licenses/>.
  */
 interface UrlSource extends Source {
+       /**
+        * Processes entries in the stack.
+        *
+        * @return      void
+        */
+       function processStack ();
 }
 
 // [EOF]
diff --git a/application/hub/main/factories/scanner/.htaccess b/application/hub/main/factories/scanner/.htaccess
new file mode 100644 (file)
index 0000000..3a42882
--- /dev/null
@@ -0,0 +1 @@
+Deny from all
diff --git a/application/hub/main/factories/scanner/class_ScannerObjectFactory.php b/application/hub/main/factories/scanner/class_ScannerObjectFactory.php
new file mode 100644 (file)
index 0000000..c92d3bc
--- /dev/null
@@ -0,0 +1,72 @@
+<?php
+/**
+ * A factory class for scanners
+ *
+ * @author             Roland Haeder <webmaster@ship-simu.org>
+ * @version            0.0.0
+ * @copyright  Copyright (c) 2007, 2008 Roland Haeder, 2009 - 2012 Hub Developer Team
+ * @license            GNU GPL 3.0 or any newer version
+ * @link               http://www.ship-simu.org
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+class ScannerObjectFactory extends ObjectFactory {
+       /**
+        * Protected constructor
+        *
+        * @return      void
+        */
+       protected function __construct () {
+               // Call parent constructor
+               parent::__construct(__CLASS__);
+       }
+
+       /**
+        * Returns a singleton (registry-based) Scanner instance
+        *
+        * @param       $taskInstance           An instance of a class with an Taskable interface
+        * @return      $sourceInstance         An instance of a Scanner class
+        */
+       public static final function createScannerInstance (Taskable $taskInstance) {
+               // Get new factory instance
+               $factoryInstance = new ScannerObjectFactory();
+
+               // Get task handler instance
+               $handlerInstance = Registry::getRegistry()->getInstance('task_handler');
+
+               // Get scanner type for given task instance by searching it in task handler
+               $scannerType = $handlerInstance->searchTask($taskInstance);
+
+               // Only let registered tasks pass
+               assert(!empty($scannerType));
+
+               // If there is no handler?
+               if (Registry::getRegistry()->instanceExists('scanner_' . $scannerType)) {
+                       // Get handler from registry
+                       $scannerInstance = Registry::getRegistry()->getInstance('scanner_' . $scannerType);
+               } else {
+                       // Get the scanner instance
+                       $scannerInstance = ObjectFactory::createObjectByConfiguredName($scannerType);
+
+                       // Add it to the registry
+                       Registry::getRegistry()->addInstance('scanner_' . $scannerType, $scannerInstance);
+               }
+
+               // Return the instance
+               return $scannerInstance;
+       }
+}
+
+// [EOF]
+?>
index 76ac831354dc1bfe2286b7b28db28d414329940d..42b6056296989cbe82a11452b8c2f09dca52189d 100644 (file)
@@ -45,25 +45,22 @@ class UrlSourceObjectFactory extends ObjectFactory {
                // Get task handler instance
                $handlerInstance = Registry::getRegistry()->getInstance('task_handler');
 
-               // The default node-mode is from our configuration
+               // Get source type by looking for given task instance in task handler
                $sourceType = $handlerInstance->searchTask($taskInstance);
 
                // Only let registered tasks pass
                assert(!empty($sourceType));
 
                // If there is no handler?
-               if (Registry::getRegistry()->instanceExists('node_' . $sourceType)) {
+               if (Registry::getRegistry()->instanceExists('source_' . $sourceType)) {
                        // Get handler from registry
-                       $sourceInstance = Registry::getRegistry()->getInstance('node_' . $sourceType);
+                       $sourceInstance = Registry::getRegistry()->getInstance('source_' . $sourceType);
                } else {
-                       // Now convert the source type into a class name
-                       $className = $factoryInstance->convertToClassName($sourceType);
-
-                       // Get the node instance
-                       $sourceInstance = ObjectFactory::createObjectByName($className);
+                       // Get the source instance
+                       $sourceInstance = ObjectFactory::createObjectByConfiguredName($sourceType . '_class');
 
                        // Add it to the registry
-                       Registry::getRegistry()->addInstance('node_' . $sourceType, $sourceInstance);
+                       Registry::getRegistry()->addInstance('source_' . $sourceType, $sourceInstance);
                }
 
                // Return the instance
index 4ba0ed0b178ef9b93da8490fe0038aa423281d76..46a84ffef9ef7c10ae24524dbb7ee479332fdb98 100644 (file)
@@ -117,6 +117,10 @@ class CrawlerTaskHandlerInitializerFilter extends BaseCrawlerFilter implements F
                $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_url_source_found_rss_task_class');
                $handlerInstance->registerTask('crawler_url_source_found_rss', $taskInstance);
 
+               // 14) Uploaded list scanner (checks for wanted files)
+               $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_uploaded_list_scanner_task_class');
+               $handlerInstance->registerTask('crawler_uploaded_list_scanner', $taskInstance);
+
                // Put the task handler in registry
                Registry::getRegistry()->addInstance('task_handler', $handlerInstance);
        }
diff --git a/application/hub/main/scanner/.htaccess b/application/hub/main/scanner/.htaccess
new file mode 100644 (file)
index 0000000..3a42882
--- /dev/null
@@ -0,0 +1 @@
+Deny from all
diff --git a/application/hub/main/scanner/class_ b/application/hub/main/scanner/class_
new file mode 100644 (file)
index 0000000..6b8ba17
--- /dev/null
@@ -0,0 +1,60 @@
+<?php
+/**
+ * A ??? scanner class
+ *
+ * @author             Roland Haeder <webmaster@ship-simu.org>
+ * @version            0.0.0
+ * @copyright  Copyright (c) 2007, 2008 Roland Haeder, 2009 - 2012 Hub Developer Team
+ * @license            GNU GPL 3.0 or any newer version
+ * @link               http://www.ship-simu.org
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+class ???Scanner extends BaseScanner implements Scanner, Registerable {
+       /**
+        * Protected constructor
+        *
+        * @return      void
+        */
+       protected function __construct () {
+               // Call parent constructor
+               parent::__construct(__CLASS__);
+       }
+
+       /**
+        * Creates an instance of this class
+        *
+        * @return      $scannerInstance        An instance of a Scanner class
+        */
+       public final static function create???Scanner () {
+               // Get new instance
+               $scannerInstance = new ???Scanner();
+
+               // Return the prepared instance
+               return $scannerInstance;
+       }
+
+       /**
+        * Runs the scanner (please no loops here)
+        *
+        * @return      void
+        * @todo        0% done
+        */
+       public function execute () {
+               $this->partialStub('Please implement this method.');
+       }
+}
+
+// [EOF]
+?>
diff --git a/application/hub/main/scanner/class_BaseScanner.php b/application/hub/main/scanner/class_BaseScanner.php
new file mode 100644 (file)
index 0000000..d56b387
--- /dev/null
@@ -0,0 +1,38 @@
+<?php
+/**
+ * A general Scanner class
+ *
+ * @author             Roland Haeder <webmaster@ship-simu.org>
+ * @version            0.0.0
+ * @copyright  Copyright (c) 2007, 2008 Roland Haeder, 2009 - 2012 Hub Developer Team
+ * @license            GNU GPL 3.0 or any newer version
+ * @link               http://www.ship-simu.org
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+class BaseScanner extends BaseHubSystem {
+       /**
+        * Protected constructor
+        *
+        * @param       $className      Name of the class
+        * @return      void
+        */
+       protected function __construct ($className) {
+               // Call parent constructor
+               parent::__construct($className);
+       }
+}
+
+// [EOF]
+?>
diff --git a/application/hub/main/scanner/crawler/.htaccess b/application/hub/main/scanner/crawler/.htaccess
new file mode 100644 (file)
index 0000000..3a42882
--- /dev/null
@@ -0,0 +1 @@
+Deny from all
diff --git a/application/hub/main/scanner/crawler/class_Crawler b/application/hub/main/scanner/crawler/class_Crawler
new file mode 100644 (file)
index 0000000..d02f28c
--- /dev/null
@@ -0,0 +1,60 @@
+<?php
+/**
+ * A ??? scanner class for crawlers
+ *
+ * @author             Roland Haeder <webmaster@ship-simu.org>
+ * @version            0.0.0
+ * @copyright  Copyright (c) 2014 Crawler Developer Team
+ * @license            GNU GPL 3.0 or any newer version
+ * @link               http://www.ship-simu.org
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+class Crawler???Scanner extends BaseScanner implements Scanner, Registerable {
+       /**
+        * Protected constructor
+        *
+        * @return      void
+        */
+       protected function __construct () {
+               // Call parent constructor
+               parent::__construct(__CLASS__);
+       }
+
+       /**
+        * Creates an instance of this class
+        *
+        * @return      $scannerInstance        An instance of a Scanner class
+        */
+       public final static function createCrawler???Scanner () {
+               // Get new instance
+               $scannerInstance = new Crawler???Scanner();
+
+               // Return the prepared instance
+               return $scannerInstance;
+       }
+
+       /**
+        * Runs the scanner (please no loops here)
+        *
+        * @return      void
+        * @todo        0% done
+        */
+       public function execute () {
+               $this->partialStub('Please implement this method.');
+       }
+}
+
+// [EOF]
+?>
diff --git a/application/hub/main/scanner/crawler/uploaded_list/.htaccess b/application/hub/main/scanner/crawler/uploaded_list/.htaccess
new file mode 100644 (file)
index 0000000..3a42882
--- /dev/null
@@ -0,0 +1 @@
+Deny from all
diff --git a/application/hub/main/scanner/crawler/uploaded_list/class_CrawlerUploadedListScanner.php b/application/hub/main/scanner/crawler/uploaded_list/class_CrawlerUploadedListScanner.php
new file mode 100644 (file)
index 0000000..1dfac0f
--- /dev/null
@@ -0,0 +1,60 @@
+<?php
+/**
+ * A UploadedList scanner class for crawlers
+ *
+ * @author             Roland Haeder <webmaster@ship-simu.org>
+ * @version            0.0.0
+ * @copyright  Copyright (c) 2014 Crawler Developer Team
+ * @license            GNU GPL 3.0 or any newer version
+ * @link               http://www.ship-simu.org
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+class CrawlerUploadedListScanner extends BaseScanner implements Scanner, Registerable {
+       /**
+        * Protected constructor
+        *
+        * @return      void
+        */
+       protected function __construct () {
+               // Call parent constructor
+               parent::__construct(__CLASS__);
+       }
+
+       /**
+        * Creates an instance of this class
+        *
+        * @return      $scannerInstance        An instance of a Scanner class
+        */
+       public final static function createCrawlerUploadedListScanner () {
+               // Get new instance
+               $scannerInstance = new CrawlerUploadedListScanner();
+
+               // Return the prepared instance
+               return $scannerInstance;
+       }
+
+       /**
+        * Runs the scanner (please no loops here)
+        *
+        * @return      void
+        * @todo        0% done
+        */
+       public function execute () {
+               $this->partialStub('Please implement this method.');
+       }
+}
+
+// [EOF]
+?>
index eb94110240d79738252aec5b3601af735c450eae..c330af0a4e20b0684ffdfb28a6662b55120d4ff8 100644 (file)
@@ -44,6 +44,16 @@ class CrawlerUrlSource??? extends BaseSource implements UrlSource, Registerable
                // Return the prepared instance
                return $sourceInstance;
        }
+
+       /**
+        * Processes entries in the stack.
+        *
+        * @return      void
+        * @todo        0% done
+        */
+       public function processStack () {
+               $this->partialStub('Please implement this method.');
+       }
 }
 
 // [EOF]
index 6abf586a6c836a145e81b00b6d3eb10ef35aaad6..08a4595f6dbf8698c2244e96465dd69c3e0e1676 100644 (file)
@@ -44,6 +44,16 @@ class CrawlerUrlSourceFoundRss extends BaseSource implements UrlSource, Register
                // Return the prepared instance
                return $sourceInstance;
        }
+
+       /**
+        * Processes entries in the stack.
+        *
+        * @return      void
+        * @todo        0% done
+        */
+       public function processStack () {
+               $this->partialStub('Please implement this method.');
+       }
 }
 
 // [EOF]
index f2b5e15570b06d00c64dfd72900ac3ab20ffc990..67279be0468df23b87be6fe538575a4704d7f09d 100644 (file)
@@ -44,6 +44,16 @@ class CrawlerUrlSourceLocalStart extends BaseSource implements UrlSource, Regist
                // Return the prepared instance
                return $sourceInstance;
        }
+
+       /**
+        * Processes entries in the stack.
+        *
+        * @return      void
+        * @todo        0% done
+        */
+       public function processStack () {
+               $this->partialStub('Please implement this method.');
+       }
 }
 
 // [EOF]
index c0f880ee72529d3a860d60a78bb8cc6de578e748..7378c0c0bc74537c06cf70e5d83fde0cb4b42d14 100644 (file)
@@ -44,6 +44,16 @@ class CrawlerUrlSourceRssStart extends BaseSource implements UrlSource, Register
                // Return the prepared instance
                return $sourceInstance;
        }
+
+       /**
+        * Processes entries in the stack.
+        *
+        * @return      void
+        * @todo        0% done
+        */
+       public function processStack () {
+               $this->partialStub('Please implement this method.');
+       }
 }
 
 // [EOF]
index ecbbe593895f839531099714c4d59baec397ef12..5ef26cdaf0a2f36d1aa253e18ff139911b2d369a 100644 (file)
@@ -44,6 +44,16 @@ class CrawlerUrlSourceUploadedList extends BaseSource implements UrlSource, Regi
                // Return the prepared instance
                return $sourceInstance;
        }
+
+       /**
+        * Processes entries in the stack.
+        *
+        * @return      void
+        * @todo        0% done
+        */
+       public function processStack () {
+               $this->partialStub('Please implement this method.');
+       }
 }
 
 // [EOF]
diff --git a/application/hub/main/tasks/crawler/scanner/.htaccess b/application/hub/main/tasks/crawler/scanner/.htaccess
new file mode 100644 (file)
index 0000000..3a42882
--- /dev/null
@@ -0,0 +1 @@
+Deny from all
diff --git a/application/hub/main/tasks/crawler/scanner/class_Crawler b/application/hub/main/tasks/crawler/scanner/class_Crawler
new file mode 100644 (file)
index 0000000..359c41d
--- /dev/null
@@ -0,0 +1,72 @@
+<?php
+/**
+ * A ??? scanner task for crawlers
+ *
+ * @author             Roland Haeder <webmaster@ship-simu.org>
+ * @version            0.0.0
+ * @copyright  Copyright (c) 2014 Crawler Developer Team
+ * @license            GNU GPL 3.0 or any newer version
+ * @link               http://www.ship-simu.org
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+class Crawler???ScannerTask extends BaseTask implements Taskable, Visitable {
+       /**
+        * Protected constructor
+        *
+        * @return      void
+        */
+       protected function __construct () {
+               // Call parent constructor
+               parent::__construct(__CLASS__);
+       }
+
+       /**
+        * Creates an instance of this class
+        *
+        * @return      $taskInstance   An instance of a Visitable class
+        */
+       public final static function createCrawler???ScannerTask () {
+               // Get new instance
+               $taskInstance = new Crawler???ScannerTask();
+
+               // Return the prepared instance
+               return $taskInstance;
+       }
+
+       /**
+        * Accepts the visitor to process the visitor
+        *
+        * @param       $visitorInstance        An instance of a Visitor class
+        * @return      void
+        * @todo        Maybe visit some sub-objects
+        */
+       public function accept (Visitor $visitorInstance) {
+               // Visit this task
+               $visitorInstance->visitTask($this);
+       }
+
+       /**
+        * Executes the task
+        *
+        * @return      void
+        */
+       public function executeTask () {
+               // Call factory for a scanner instance and execute it
+               ScannerFactory::createScannerInstance($this)->execute();
+       }
+}
+
+// [EOF]
+?>
diff --git a/application/hub/main/tasks/crawler/scanner/class_CrawlerUploadedListScannerTask.php b/application/hub/main/tasks/crawler/scanner/class_CrawlerUploadedListScannerTask.php
new file mode 100644 (file)
index 0000000..cdb3c0d
--- /dev/null
@@ -0,0 +1,72 @@
+<?php
+/**
+ * A UploadedList scanner task for crawlers
+ *
+ * @author             Roland Haeder <webmaster@ship-simu.org>
+ * @version            0.0.0
+ * @copyright  Copyright (c) 2014 Crawler Developer Team
+ * @license            GNU GPL 3.0 or any newer version
+ * @link               http://www.ship-simu.org
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+class CrawlerUploadedListScannerTask extends BaseTask implements Taskable, Visitable {
+       /**
+        * Protected constructor
+        *
+        * @return      void
+        */
+       protected function __construct () {
+               // Call parent constructor
+               parent::__construct(__CLASS__);
+       }
+
+       /**
+        * Creates an instance of this class
+        *
+        * @return      $taskInstance   An instance of a Visitable class
+        */
+       public final static function createCrawlerUploadedListScannerTask () {
+               // Get new instance
+               $taskInstance = new CrawlerUploadedListScannerTask();
+
+               // Return the prepared instance
+               return $taskInstance;
+       }
+
+       /**
+        * Accepts the visitor to process the visitor
+        *
+        * @param       $visitorInstance        An instance of a Visitor class
+        * @return      void
+        * @todo        Maybe visit some sub-objects
+        */
+       public function accept (Visitor $visitorInstance) {
+               // Visit this task
+               $visitorInstance->visitTask($this);
+       }
+
+       /**
+        * Executes the task
+        *
+        * @return      void
+        */
+       public function executeTask () {
+               // Call factory for a scanner instance and execute it
+               ScannerObjectFactory::createScannerInstance($this)->execute();
+       }
+}
+
+// [EOF]
+?>
index 1b260bfa2285ea3f204f819d0330e99294f7ab63..8ef8fa236e9a62e2d56532bbb7e2f687ffb34518 100644 (file)
@@ -14,7 +14,7 @@
 ./application/hub/interfaces/wrapper/class_NodeDhtWrapper.php:122:      * @todo        Add minimum/maximum age limitations
 ./application/hub/interfaces/wrapper/class_NodeDhtWrapper.php:132:      * @todo        Add timestamp to dataset instance
 ./application/hub/main/chains/class_PackageFilterChain.php:54:  * @todo        This may be slow if a message with a lot tags arrived
-./application/hub/main/class_BaseHubSystem.php:528:                            // @TODO On some systems it is 134, on some 107?
+./application/hub/main/class_BaseHubSystem.php:576:                            // @TODO On some systems it is 134, on some 107?
 ./application/hub/main/commands/console/class_HubConsoleAptProxyCommand.php:107:        * @todo        Should we add some more filters?
 ./application/hub/main/commands/console/class_HubConsoleAptProxyCommand.php:58:         * @todo        Try to create a AptProxyActivationTask or so
 ./application/hub/main/commands/console/class_HubConsoleChatCommand.php:107:    * @todo        Should we add some more filters?
 ./application/hub/main/commands/console/class_HubConsoleMainCommand.php:58:     * @todo        Try to create a HubActivationTask or so
 ./application/hub/main/commands/console/class_HubConsoleMinerCommand.php:107:   * @todo        Should we add some more filters?
 ./application/hub/main/commands/console/class_HubConsoleMinerCommand.php:58:    * @todo        Try to create a MinerActivationTask or so
+./application/hub/main/crawler/class_BaseNodeCrawler.php:59:    * @todo        0% done
 ./application/hub/main/cruncher/class_BaseHubCruncher.php:200:  * @todo        Try to make this method more generic so we can move it in BaseFrameworkSystem
 ./application/hub/main/cruncher/mcrypt/class_HubMcryptCruncher.php:108:         * @todo        Implement this method
 ./application/hub/main/cruncher/mcrypt/class_HubMcryptCruncher.php:138:         * @todo        0% done
 ./application/hub/main/cruncher/mcrypt/class_HubMcryptCruncher.php:98:         // @TODO Implement this method
-./application/hub/main/dht/class_BaseDht.php:129:       * @todo        Add minimum/maximum age limitations
-./application/hub/main/dht/class_BaseDht.php:163:                      // @TODO Maybe add more small checks?
-./application/hub/main/dht/class_BaseDht.php:205:       * @todo        Find out if loadDescriptorXml() can be called only once to avoid a lot methods working.
-./application/hub/main/dht/class_BaseDht.php:245:       * @todo        0% done
-./application/hub/main/dht/class_BaseDht.php:256:       * @todo        Switch flag 'accept_bootstrap'
-./application/hub/main/dht/class_BaseDht.php:89:        * @todo        Find more to do here
+./application/hub/main/dht/class_BaseDht.php:126:       * @todo        Add minimum/maximum age limitations
+./application/hub/main/dht/class_BaseDht.php:160:                      // @TODO Maybe add more small checks?
+./application/hub/main/dht/class_BaseDht.php:202:       * @todo        Find out if loadDescriptorXml() can be called only once to avoid a lot methods working.
+./application/hub/main/dht/class_BaseDht.php:242:       * @todo        0% done
+./application/hub/main/dht/class_BaseDht.php:253:       * @todo        Switch flag 'accept_bootstrap'
+./application/hub/main/dht/class_BaseDht.php:86:        * @todo        Find more to do here
 ./application/hub/main/dht/node/class_NodeDhtFacade.php:61:     * @todo        Does this data need to be enriched with more meta data?
 ./application/hub/main/discovery/package/class_PackageRecipientDiscovery.php:86:        * @todo        Add some validation of recipient field, e.g. ip:port is found
 ./application/hub/main/discovery/package/class_PackageRecipientDiscovery.php:87:        * @todo        The if() does only check for TCP, not UDP, e.g. try to get a $handlerInstance here
@@ -74,7 +75,7 @@
 ./application/hub/main/filter/shutdown/node/class_NodeShutdownTaskHandlerFilter.php:55:         * @todo        0% done
 ./application/hub/main/filter/task/apt-proxy/class_AptProxyTaskHandlerInitializerFilter.php:55:         * @todo        5% done
 ./application/hub/main/filter/task/chat/class_ChatTaskHandlerInitializerFilter.php:55:  * @todo        5% done
-./application/hub/main/filter/task/crawler/class_CrawlerTaskHandlerInitializerFilter.php:55:    * @todo        5% done
+./application/hub/main/filter/task/crawler/class_CrawlerTaskHandlerInitializerFilter.php:55:    * @todo        10% done
 ./application/hub/main/filter/task/cruncher/class_CruncherTaskHandlerInitializerFilter.php:55:  * @todo        5% done
 ./application/hub/main/filter/task/miner/class_MinerTaskHandlerInitializerFilter.php:55:        * @todo        5% done
 ./application/hub/main/filter/task/node/class_NodeTaskHandlerInitializerFilter.php:55:  * @todo        Maybe some more tasks needs to be added?
 ./application/hub/main/miner/class_BaseHubMiner.php:200:        * @todo        Try to make this method more generic so we can move it in BaseFrameworkSystem
 ./application/hub/main/nodes/boot/class_HubBootNode.php:110:           // @TODO Add some filters here
 ./application/hub/main/nodes/boot/class_HubBootNode.php:58:     * @todo        add some more special bootstrap things for this boot node
-./application/hub/main/nodes/class_BaseHubNode.php:120:         * @todo        Make this code more generic and move it to CryptoHelper or
-./application/hub/main/nodes/class_BaseHubNode.php:417:         * @todo        Try to make this method more generic so we can move it in BaseFrameworkSystem
-./application/hub/main/nodes/class_BaseHubNode.php:457:         * @todo        Change the first if() block to check for a specific state
-./application/hub/main/nodes/class_BaseHubNode.php:663:         * @todo        Add checking if this node has been announced to the sender node
-./application/hub/main/nodes/class_BaseHubNode.php:683:         * @todo        Add checking if this node has been announced to the sender node
-./application/hub/main/nodes/class_BaseHubNode.php:767:         * @todo        Find more to do here
-./application/hub/main/nodes/class_BaseHubNode.php:780:         * @todo        Handle thrown exception
+./application/hub/main/nodes/class_BaseHubNode.php:117:         * @todo        Make this code more generic and move it to CryptoHelper or
+./application/hub/main/nodes/class_BaseHubNode.php:414:         * @todo        Try to make this method more generic so we can move it in BaseFrameworkSystem
+./application/hub/main/nodes/class_BaseHubNode.php:454:         * @todo        Change the first if() block to check for a specific state
+./application/hub/main/nodes/class_BaseHubNode.php:660:         * @todo        Add checking if this node has been announced to the sender node
+./application/hub/main/nodes/class_BaseHubNode.php:680:         * @todo        Add checking if this node has been announced to the sender node
+./application/hub/main/nodes/class_BaseHubNode.php:764:         * @todo        Find more to do here
+./application/hub/main/nodes/class_BaseHubNode.php:777:         * @todo        Handle thrown exception
 ./application/hub/main/nodes/list/class_HubListNode.php:58:     * @todo        Implement more bootstrap steps
 ./application/hub/main/nodes/list/class_HubListNode.php:79:            // @TODO Add some filters here
 ./application/hub/main/nodes/list/class_HubListNode.php:88:     * @todo        0% done
 ./application/hub/main/producer/cruncher/work_units/class_CruncherTestUnitProducer.php:79:      * @todo        ~60% done
 ./application/hub/main/producer/cruncher/work_units/class_CruncherTestUnitProducer.php:88:                     // @TODO Unfinished work here
 ./application/hub/main/producer/miner/blocks/class_MinerTestGenesisBlockProducer.php:53:        * @todo        0% done
-./application/hub/main/producer/miner/blocks/class_MinerTestGenesisBlockProducer.php:64:        * @todo        0% done
+./application/hub/main/producer/miner/blocks/class_MinerTestGenesisBlockProducer.php:63:        * @todo        0% done
+./application/hub/main/producer/miner/blocks/class_MinerTestGenesisBlockProducer.php:74:        * @todo        0% done
 ./application/hub/main/registry/socket/class_SocketRegistry.php:71:            // @TODO Tested again base class, rewrite it to a generic interface!
 ./application/hub/main/resolver/state/peer/class_PeerStateResolver.php:59:      * @todo        ~30% done
+./application/hub/main/states/crawler/active/class_CrawlerActiveState.php:60:   * @todo        0% done
+./application/hub/main/states/crawler/booting/class_CrawlerBootingState.php:60:         * @todo        0% done
+./application/hub/main/states/crawler/init/class_CrawlerInitState.php:70:       * @todo        ~30% done
 ./application/hub/main/states/dht/class_BaseDhtState.php:10: * @todo           Create generic DHT interface
 ./application/hub/main/states/node/active/class_NodeActiveState.php:75:         * @todo        We might want to move some calls to this method to fill it with life
 ./application/hub/main/states/node/init/class_NodeInitState.php:60:     * @todo        We might want to move some calls to this method to fill it with life
 ./application/hub/main/streams/raw_data/input/class_RawDataInputStream.php:58:  * @todo        Do we need to do something more here?
 ./application/hub/main/tasks/apt-proxy/class_AptProxyListenerTask.php:63:       * @todo        0%
 ./application/hub/main/tasks/chat/class_ChatTelnetListenerTask.php:63:  * @todo        0%
+./application/hub/main/tasks/crawler/communicator/class_CrawlerNodeCommunicatorTask.php:53:     * @todo        Maybe visit some sub-objects
+./application/hub/main/tasks/crawler/document_parser/class_CrawlerDocumentParserTask.php:53:    * @todo        Maybe visit some sub-objects
+./application/hub/main/tasks/crawler/document_parser/class_CrawlerDocumentParserTask.php:64:    * @todo        0%
+./application/hub/main/tasks/crawler/mime_sniffer/class_CrawlerMimeSnifferTask.php:53:  * @todo        Maybe visit some sub-objects
+./application/hub/main/tasks/crawler/mime_sniffer/class_CrawlerMimeSnifferTask.php:64:  * @todo        0%
+./application/hub/main/tasks/crawler/ping/class_CrawlerPingTask.php:53:         * @todo        Maybe visit some sub-objects
+./application/hub/main/tasks/crawler/ping/class_CrawlerPingTask.php:64:         * @todo        0%
+./application/hub/main/tasks/crawler/publisher/class_CrawlerRemoteJobPublisherTask.php:53:      * @todo        Maybe visit some sub-objects
+./application/hub/main/tasks/crawler/publisher/class_CrawlerRemoteJobPublisherTask.php:64:      * @todo        0%
+./application/hub/main/tasks/crawler/snippet_extractor/class_CrawlerSnippetExtractorTask.php:53:        * @todo        Maybe visit some sub-objects
+./application/hub/main/tasks/crawler/snippet_extractor/class_CrawlerSnippetExtractorTask.php:64:        * @todo        0%
+./application/hub/main/tasks/crawler/structure_analyzer/class_CrawlerStructureAnalyzerTask.php:53:      * @todo        Maybe visit some sub-objects
+./application/hub/main/tasks/crawler/structure_analyzer/class_CrawlerStructureAnalyzerTask.php:64:      * @todo        0%
+./application/hub/main/tasks/crawler/url_getter/local/class_CrawlerLocalUrlGetterTask.php:53:   * @todo        Maybe visit some sub-objects
+./application/hub/main/tasks/crawler/url_getter/local/class_CrawlerLocalUrlGetterTask.php:64:   * @todo        0%
+./application/hub/main/tasks/crawler/url_getter/remote/class_CrawlerRemoteUrlGetterTask.php:53:         * @todo        Maybe visit some sub-objects
+./application/hub/main/tasks/crawler/url_getter/remote/class_CrawlerRemoteUrlGetterTask.php:64:         * @todo        0%
+./application/hub/main/tasks/crawler/url_source/class_CrawlerUrlSourceFoundRssTask.php:53:      * @todo        Maybe visit some sub-objects
+./application/hub/main/tasks/crawler/url_source/class_CrawlerUrlSourceLocalStartTask.php:53:    * @todo        Maybe visit some sub-objects
+./application/hub/main/tasks/crawler/url_source/class_CrawlerUrlSourceRssStartTask.php:53:      * @todo        Maybe visit some sub-objects
+./application/hub/main/tasks/crawler/url_source/class_CrawlerUrlSourceUploadedListTask.php:53:  * @todo        Maybe visit some sub-objects
+./application/hub/main/tasks/miner/communicator/class_MinerNodeCommunicatorTask.php:53:         * @todo        Maybe visit some sub-objects
 ./application/hub/main/tasks/network/class_NetworkPackageReaderTask.php:63:     * @todo        Also visit some sub-objects?
 ./application/hub/main/tasks/network/class_NetworkPackageWriterTask.php:59:     * @todo        Also visit some sub-objects?
 ./application/hub/main/tasks/node/chunks/class_NodeChunkAssemblerTask.php:59:   * @todo        Also visit some sub-objects?