]> git.mxchange.org Git - hub.git/commitdiff
Merge branch 'refacuring/protocol_handler' into latest-core/crawler
authorRoland Haeder <roland@mxchange.org>
Sat, 21 Mar 2015 21:17:13 +0000 (22:17 +0100)
committerRoland Haeder <roland@mxchange.org>
Sat, 21 Mar 2015 21:17:18 +0000 (22:17 +0100)
Signed-off-by: Roland Haeder <roland@mxchange.org>
32 files changed:
application/hub/config.php
application/hub/interfaces/source/urls/class_UrlSource.php
application/hub/main/filter/task/apt-proxy/class_AptProxyTaskHandlerInitializerFilter.php
application/hub/main/filter/task/chat/class_ChatTaskHandlerInitializerFilter.php
application/hub/main/filter/task/crawler/class_CrawlerTaskHandlerInitializerFilter.php
application/hub/main/filter/task/cruncher/class_CruncherTaskHandlerInitializerFilter.php
application/hub/main/filter/task/miner/class_MinerTaskHandlerInitializerFilter.php
application/hub/main/filter/task/node/class_NodeTaskHandlerInitializerFilter.php
application/hub/main/resolver/protocol/tcp/class_TcpProtocolResolver.php
application/hub/main/source/class_BaseUrlSource.php
application/hub/main/source/urls/class_CrawlerFoundRssUrlSource.php
application/hub/main/source/urls/class_CrawlerLocalStartUrlSource.php
application/hub/main/source/urls/class_CrawlerRssStartUrlSource.php
application/hub/main/source/urls/class_CrawlerUploadedListUrlSource.php
application/hub/main/tasks/crawler/class_BaseUrlSourceTask.php [new file with mode: 0644]
application/hub/main/tasks/crawler/url_crawler/.htaccess [new file with mode: 0644]
application/hub/main/tasks/crawler/url_crawler/local/.htaccess [new file with mode: 0644]
application/hub/main/tasks/crawler/url_crawler/local/class_CrawlerLocalUrlCrawlerTask.php [new file with mode: 0644]
application/hub/main/tasks/crawler/url_crawler/remote/.htaccess [new file with mode: 0644]
application/hub/main/tasks/crawler/url_crawler/remote/class_CrawlerRemoteUrlCrawlerTask.php [new file with mode: 0644]
application/hub/main/tasks/crawler/url_getter/.htaccess [deleted file]
application/hub/main/tasks/crawler/url_getter/local/.htaccess [deleted file]
application/hub/main/tasks/crawler/url_getter/local/class_CrawlerLocalUrlGetterTask.php [deleted file]
application/hub/main/tasks/crawler/url_getter/remote/.htaccess [deleted file]
application/hub/main/tasks/crawler/url_getter/remote/class_CrawlerRemoteUrlGetterTask.php [deleted file]
application/hub/main/tasks/crawler/url_source/class_CrawlerUrlSource
application/hub/main/tasks/crawler/url_source/class_CrawlerUrlSourceFoundRssTask.php
application/hub/main/tasks/crawler/url_source/class_CrawlerUrlSourceLocalStartTask.php
application/hub/main/tasks/crawler/url_source/class_CrawlerUrlSourceRssStartTask.php
application/hub/main/tasks/crawler/url_source/class_CrawlerUrlSourceUploadedListTask.php
core
docs/TODOs.txt

index 8130471e56ce121d81a3c59730a3518df67c2af7..c516e96c4b6ac145dc76b9a899dafc5d67959e2c 100644 (file)
@@ -1280,6 +1280,9 @@ $cfg->setConfigEntry('crawler_url_source_rss_start_class', 'CrawlerRssStartUrlSo
 // CFG: CRAWLER-URL-SOURCE-FOUND-RSS-CLASS
 $cfg->setConfigEntry('crawler_url_source_found_rss_class', 'CrawlerFoundRssUrlSource');
 
+// CFG: CRAWLER-URL-STACKS
+$cfg->setConfigEntry('crawler_url_stacks', 'local_start:uploaded_list:rss_start:found_rss');
+
 // CFG: CRAWLER-NODE-COMMUNICATOR-TASK-CLASS
 $cfg->setConfigEntry('crawler_node_communicator_task_class', 'CrawlerNodeCommunicatorTask');
 
@@ -1313,6 +1316,9 @@ $cfg->setConfigEntry('crawler_uploaded_list_url_source_stack_class', 'FiFoStacke
 // CFG: STACKER-CSV-FILE-MAX-SIZE
 $cfg->setConfigEntry('stacker_csv_file_max_size', 10);
 
+// CFG: STACKER-CSV-ENTRY-MAX-SIZE
+$cfg->setConfigEntry('stacker_csv_entry_max_size', 100);
+
 // CFG: TASK-CRAWLER-NODE-COMMUNICATOR-STARTUP-DELAY
 $cfg->setConfigEntry('task_crawler_node_communicator_startup_delay', 500);
 
@@ -1322,29 +1328,29 @@ $cfg->setConfigEntry('task_crawler_node_communicator_interval_delay', 250);
 // CFG: TASK-CRAWLER-NODE-COMMUNICATOR-MAX-RUNS
 $cfg->setConfigEntry('task_crawler_node_communicator_max_runs', 0);
 
-// CFG: CRAWLER-LOCAL-URL-GETTER-TASK-CLASS
-$cfg->setConfigEntry('crawler_local_url_getter_task_class', 'CrawlerLocalUrlGetterTask');
+// CFG: CRAWLER-LOCAL-URL-CRAWLER-TASK-CLASS
+$cfg->setConfigEntry('crawler_local_url_crawler_task_class', 'CrawlerLocalUrlCrawlerTask');
 
-// CFG: TASK-CRAWLER-LOCAL-URL-GETTER-STARTUP-DELAY
-$cfg->setConfigEntry('task_crawler_local_url_getter_startup_delay', 1500);
+// CFG: TASK-CRAWLER-LOCAL-URL-CRAWLER-STARTUP-DELAY
+$cfg->setConfigEntry('task_crawler_local_url_crawler_startup_delay', 1500);
 
-// CFG: TASK-CRAWLER-LOCAL-URL-GETTER-INTERVAL-DELAY
-$cfg->setConfigEntry('task_crawler_local_url_getter_interval_delay', 200);
+// CFG: TASK-CRAWLER-LOCAL-URL-CRAWLER-INTERVAL-DELAY
+$cfg->setConfigEntry('task_crawler_local_url_crawler_interval_delay', 200);
 
-// CFG: TASK-CRAWLER-LOCAL-URL-GETTER-MAX-RUNS
-$cfg->setConfigEntry('task_crawler_local_url_getter_max_runs', 0);
+// CFG: TASK-CRAWLER-LOCAL-URL-CRAWLER-MAX-RUNS
+$cfg->setConfigEntry('task_crawler_local_url_crawler_max_runs', 0);
 
-// CFG: CRAWLER-REMOTE-URL-GETTER-TASK-CLASS
-$cfg->setConfigEntry('crawler_remote_url_getter_task_class', 'CrawlerRemoteUrlGetterTask');
+// CFG: CRAWLER-REMOTE-URL-CRAWLER-TASK-CLASS
+$cfg->setConfigEntry('crawler_remote_url_crawler_task_class', 'CrawlerRemoteUrlCrawlerTask');
 
-// CFG: TASK-CRAWLER-REMOTE-URL-GETTER-STARTUP-DELAY
-$cfg->setConfigEntry('task_crawler_remote_url_getter_startup_delay', 1500);
+// CFG: TASK-CRAWLER-REMOTE-URL-CRAWLER-STARTUP-DELAY
+$cfg->setConfigEntry('task_crawler_remote_url_crawler_startup_delay', 1500);
 
-// CFG: TASK-CRAWLER-REMOTE-URL-GETTER-INTERVAL-DELAY
-$cfg->setConfigEntry('task_crawler_remote_url_getter_interval_delay', 200);
+// CFG: TASK-CRAWLER-REMOTE-URL-CRAWLER-INTERVAL-DELAY
+$cfg->setConfigEntry('task_crawler_remote_url_crawler_interval_delay', 200);
 
-// CFG: TASK-CRAWLER-REMOTE-URL-GETTER-MAX-RUNS
-$cfg->setConfigEntry('task_crawler_remote_url_getter_max_runs', 0);
+// CFG: TASK-CRAWLER-REMOTE-URL-CRAWLER-MAX-RUNS
+$cfg->setConfigEntry('task_crawler_remote_url_crawler_max_runs', 0);
 
 // CFG: CRAWLER-REMOTE-JOB-PUBLISHER-TASK-CLASS
 $cfg->setConfigEntry('crawler_remote_job_publisher_task_class', 'CrawlerRemoteJobPublisherTask');
@@ -1481,6 +1487,9 @@ $cfg->setConfigEntry('task_crawler_uploaded_list_scanner_max_runs', 0);
 // CFG: CRAWLER-CSV-FILE-PATH
 $cfg->setConfigEntry('crawler_csv_file_path', 'data/url_lists');
 
+// CFG: CRAWLER-URL-LIST-COLUMN-SEPARATOR
+$cfg->setConfigEntry('crawler_url_list_column_separator', ',');
+
 ///////////////////////////////////////////////////////////////////////////////
 //                            HTTP Configuration
 ///////////////////////////////////////////////////////////////////////////////
index 996eb52b595fd3839cebe6b1a501650dd22a03c2..8f30d0a5f3e55e1e6b6946b1123614653bf52745 100644 (file)
  */
 interface UrlSource extends Source {
        /**
-        * Processes entries in the stack.
+        * Fills the URL stack with new entries from source
         *
         * @return      void
         */
-       function processStack ();
+       function fillUrlStack ();
+
+       /**
+        * Determines whether the stack 'urls' is empty.
+        *
+        * @return      $isEmpty        Whether the stack 'urls' is empty.
+        */
+       function isUrlStackEmpty ();
 }
 
 // [EOF]
index a395759675c41b01bc4fb4d89731765835963a3d..17e808a6276d2c15127747ab0d63ceb08d1684eb 100644 (file)
@@ -61,6 +61,9 @@ class AptProxyTaskHandlerInitializerFilter extends BaseAptProxyFilter implements
                // Get a new task handler instance
                $handlerInstance = ObjectFactory::createObjectByConfiguredName('task_handler_class');
 
+               // Put the task handler in registry
+               Registry::getRegistry()->addInstance('task_handler', $handlerInstance);
+
                /*
                 * Register all tasks:
                 *
@@ -68,9 +71,6 @@ class AptProxyTaskHandlerInitializerFilter extends BaseAptProxyFilter implements
                 */
                $taskInstance = ObjectFactory::createObjectByConfiguredName('apt_proxy_listener_task_class');
                $handlerInstance->registerTask('apt_proxy_listener', $taskInstance);
-
-               // Put the task handler in registry
-               Registry::getRegistry()->addInstance('task_handler', $handlerInstance);
        }
 }
 
index 178f70743a4828d5bb11eefcc65e3cf3fa3d4ec8..85db533fb2ed28b4a7ab4c07525e048549cdf442 100644 (file)
@@ -61,6 +61,9 @@ class ChatTaskHandlerInitializerFilter extends BaseChatFilter implements Filtera
                // Get a new task handler instance
                $handlerInstance = ObjectFactory::createObjectByConfiguredName('task_handler_class');
 
+               // Put the task handler in registry
+               Registry::getRegistry()->addInstance('task_handler', $handlerInstance);
+
                /*
                 * Register all tasks:
                 *
@@ -68,9 +71,6 @@ class ChatTaskHandlerInitializerFilter extends BaseChatFilter implements Filtera
                 */
                $taskInstance = ObjectFactory::createObjectByConfiguredName('chat_telnet_listener_task_class');
                $handlerInstance->registerTask('chat_telnet_listener', $taskInstance);
-
-               // Put the task handler in registry
-               Registry::getRegistry()->addInstance('task_handler', $handlerInstance);
        }
 }
 
index 46a84ffef9ef7c10ae24524dbb7ee479332fdb98..1a5ee9eb7cc8c25f42c85417088c72779b612915 100644 (file)
@@ -61,6 +61,9 @@ class CrawlerTaskHandlerInitializerFilter extends BaseCrawlerFilter implements F
                // Get a new task handler instance
                $handlerInstance = ObjectFactory::createObjectByConfiguredName('task_handler_class');
 
+               // Put the task handler in registry
+               Registry::getRegistry()->addInstance('task_handler', $handlerInstance);
+
                /*
                 * Register all tasks:
                 *
@@ -69,13 +72,13 @@ class CrawlerTaskHandlerInitializerFilter extends BaseCrawlerFilter implements F
                $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_node_communicator_task_class');
                $handlerInstance->registerTask('crawler_node_communicator', $taskInstance);
 
-               // 2) Local URL getter (gets URLs locally and adds them to the analyzer's input stack)
-               $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_local_url_getter_task_class');
-               $handlerInstance->registerTask('crawler_local_url_getter', $taskInstance);
+               // 2) Local URL crawler (gets URLs locally and adds them to the analyzer's input stack)
+               $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_local_url_crawler_task_class');
+               $handlerInstance->registerTask('crawler_local_url_crawler', $taskInstance);
 
-               // 3) Remote URL getter (gets URLs locally for other nodes, also includes the crawled URL in local index)
-               $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_remote_url_getter_task_class');
-               $handlerInstance->registerTask('crawler_remote_url_getter', $taskInstance);
+               // 3) Remote URL crawler (gets URLs locally for other nodes, also includes the crawled URL in local index)
+               $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_remote_url_crawler_task_class');
+               $handlerInstance->registerTask('crawler_remote_url_crawler', $taskInstance);
 
                // 4) Remote-crawl publisher (publishes crawl jobs for remote retrieval)
                $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_remote_job_publisher_task_class');
@@ -101,28 +104,18 @@ class CrawlerTaskHandlerInitializerFilter extends BaseCrawlerFilter implements F
                $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_ping_task_class');
                $handlerInstance->registerTask('crawler_ping', $taskInstance);
 
-               // 10) URL source: local start
-               $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_url_source_local_start_task_class');
-               $handlerInstance->registerTask('crawler_url_source_local_start', $taskInstance);
-
-               // 11) URL source: uploaded list
-               $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_url_source_uploaded_list_task_class');
-               $handlerInstance->registerTask('crawler_url_source_uploaded_list', $taskInstance);
+               // 10) URL sources
+               foreach (explode(':', $this->getConfigInstance()->getConfigEntry('crawler_url_stacks')) as $stack) {
+                       // Init task instance
+                       $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_url_source_' . $stack . '_task_class');
 
-               // 12) URL source: RSS feed
-               $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_url_source_rss_start_task_class');
-               $handlerInstance->registerTask('crawler_url_source_rss_start', $taskInstance);
+                       // And register it
+                       $handlerInstance->registerTask('crawler_url_source_' . $stack, $taskInstance);
+               } // END - foreach
 
-               // 13) URL source: found RSS/ATOM feed
-               $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_url_source_found_rss_task_class');
-               $handlerInstance->registerTask('crawler_url_source_found_rss', $taskInstance);
-
-               // 14) Uploaded list scanner (checks for wanted files)
+               // 11) Uploaded list scanner (checks for wanted files)
                $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_uploaded_list_scanner_task_class');
                $handlerInstance->registerTask('crawler_uploaded_list_scanner', $taskInstance);
-
-               // Put the task handler in registry
-               Registry::getRegistry()->addInstance('task_handler', $handlerInstance);
        }
 }
 
index 0b49446e9a5f24f87f35ef71067911bcfaacf1c2..2d0095ec2f74d6338a52ad51b396b9d7994a3260 100644 (file)
@@ -61,6 +61,9 @@ class CruncherTaskHandlerInitializerFilter extends BaseCruncherFilter implements
                // Get a new task handler instance
                $handlerInstance = ObjectFactory::createObjectByConfiguredName('task_handler_class');
 
+               // Put the task handler in registry
+               Registry::getRegistry()->addInstance('task_handler', $handlerInstance);
+
                // Register all tasks:
                //
                // 1) A task for fetching WUs (work units) or test units
@@ -82,9 +85,6 @@ class CruncherTaskHandlerInitializerFilter extends BaseCruncherFilter implements
                 */
                $taskInstance = ObjectFactory::createObjectByConfiguredName('cruncher_key_producer_task_class');
                $handlerInstance->registerTask('cruncher_key_producer', $taskInstance);
-
-               // Put the task handler in registry
-               Registry::getRegistry()->addInstance('task_handler', $handlerInstance);
        }
 }
 
index fefaf3c1e0ca4f19010b5e85785ada3714765da3..742b40be42e2781e7aa3628d05c43dade2f865bc 100644 (file)
@@ -61,6 +61,9 @@ class MinerTaskHandlerInitializerFilter extends BaseMinerFilter implements Filte
                // Get a new task handler instance
                $handlerInstance = ObjectFactory::createObjectByConfiguredName('task_handler_class');
 
+               // Put the task handler in registry
+               Registry::getRegistry()->addInstance('task_handler', $handlerInstance);
+
                // Register all tasks:
                //
                // 1) A task for fetching blocks from the network
@@ -82,9 +85,6 @@ class MinerTaskHandlerInitializerFilter extends BaseMinerFilter implements Filte
                 */
                $taskInstance = ObjectFactory::createObjectByConfiguredName('miner_node_communicator_task_class');
                $handlerInstance->registerTask('miner_node_communicator', $taskInstance);
-
-               // Put the task handler in registry
-               Registry::getRegistry()->addInstance('task_handler', $handlerInstance);
        }
 }
 
index c497604f8d2a43d7beb81109b88ce74899c31afc..bb5ab1ac4d3c98db9d573104d9809a5814676b47 100644 (file)
@@ -61,6 +61,9 @@ class NodeTaskHandlerInitializerFilter extends BaseNodeFilter implements Filtera
                // Get a new task handler instance
                $handlerInstance = ObjectFactory::createObjectByConfiguredName('task_handler_class');
 
+               // Put the task handler in registry
+               Registry::getRegistry()->addInstance('task_handler', $handlerInstance);
+
                // Prepare a package-tags initialization task for the listeners
                $taskInstance = ObjectFactory::createObjectByConfiguredName('node_package_tags_init_task_class');
 
@@ -142,9 +145,6 @@ class NodeTaskHandlerInitializerFilter extends BaseNodeFilter implements Filtera
                // Register it
                $handlerInstance->registerTask('ping', $taskInstance);
 
-               // Put the task handler in registry
-               Registry::getRegistry()->addInstance('task_handler', $handlerInstance);
-
                /*
                 * Allow extra node-depending tasks, e.g. the bootstrapper node needs
                 * booting its DHT. DHTs are decentralized and are working on
index c2aab32d95e97efd538bbabbb1f5d4b069a0c533..a7f79e98cc8c40bb7547a1a84b778d3bea17d455 100644 (file)
@@ -67,7 +67,7 @@ class TcpProtocolResolver extends BaseProtocolResolver implements ProtocolResolv
                $resultInstance = $nodeInstance->getWrapperInstance()->doSelectByCriteria($searchInstance);
 
                // Is the result valid?
-               if ((!$resultInstance->valid()) || (! $resultInstance->next())) {
+               if ((!$resultInstance->valid()) || (!$resultInstance->next())) {
                        // Node not found in database, this could mean that your database file is damaged.
                        return NULL;
                } // END - if
index c302f17ac58b4cfb90a408121ae19f09adf2a3ab..c4ef084048488663d74e7ed0a1944c339a6618e0 100644 (file)
  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 class BaseUrlSource extends BaseSource {
+       // Stack name for all URLs
+       const STACKER_NAME_URLS = 'urls';
+
+       // Array elements for CSV data array
+       const CRAWL_JOB_ARRAY_START_URL      = 'start_url';
+       const CRAWL_JOB_ARRAY_DEPTH          = 'start_depth';
+       const CRAWL_JOB_ARRAY_EXTERNAL_DEPTH = 'external_depth';
+
        /**
         * Protected constructor
         *
@@ -53,13 +61,55 @@ class BaseUrlSource extends BaseSource {
         *
         * @return      $isEmpty        Whether the stack 'urls' is empty.
         */
-       protected function isUrlStackEmpty () {
+       public function isUrlStackEmpty () {
                // Determine it
-               $isEmpty = $this->getStackInstance()->isStackEmpty('urls');
+               $isEmpty = $this->getStackInstance()->isStackEmpty(self::STACKER_NAME_URLS);
 
                // Return result
                return $isEmpty;
        }
+
+       /**
+        * Enriches the given associative array with more data, now at least 2
+        * elements are required:
+        *
+        * 'start_url'   - Starting URL
+        * 'start_depth' - Crawl depth for starting URL
+        *
+        * @param       $crawlData      Array with partial data for being queued
+        * @return      void
+        * @todo        ~10% done
+        */
+       protected function enrichCrawlerQueueData (array &$crawlData) {
+               // Debug message
+               //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput('CRAWLER-SOURCE [' . __METHOD__ . ':' . __LINE__ . ']: crawlData()=' . count($crawlData) . ' - CALLED!');
+
+               // Check for minimum array elements
+               assert(isset($crawlData[self::CRAWL_JOB_ARRAY_START_URL]));
+               assert(isset($crawlData[self::CRAWL_JOB_ARRAY_DEPTH]));
+
+               // @TODO Add more elements
+
+               // Debug message
+               //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput('CRAWLER-SOURCE [' . __METHOD__ . ':' . __LINE__ . ']: EXIT!');
+       }
+
+       /**
+        * Enqueues given crawler array in assigned file-based stack
+        *
+        * @param       $crawlData      Array with partial data for being queued
+        * @return      void
+        */
+       protected function enqueueInFileStack (array $crawlData) {
+               // Debug message
+               //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput('CRAWLER-SOURCE [' . __METHOD__ . ':' . __LINE__ . ']: crawlData()=' . count($crawlData) . ' - CALLED!');
+
+               // Get the stack instance and enqueue it
+               $this->getStackInstance()->pushNamed(self::STACKER_NAME_URLS, $crawlData);
+
+               // Debug message
+               //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput('CRAWLER-SOURCE [' . __METHOD__ . ':' . __LINE__ . ']: EXIT!');
+       }
 }
 
 // [EOF]
index d77847c2cd758311ba2e1f95573d7065138721c6..cad4691e7f38f37fcc9459bda498348a8e97d4d1 100644 (file)
@@ -49,18 +49,12 @@ class CrawlerFoundRssUrlSource extends BaseUrlSource implements UrlSource, Regis
        }
 
        /**
-        * Processes entries in the stack.
+        * Fills the URL stack with new entries from source
         *
         * @return      void
-        * @todo        ~10% done
+        * @todo        0% done
         */
-       public function processStack () {
-               // Does the stack have some entries left?
-               if ($this->isUrlStackEmpty()) {
-                       // Nothing to handle here
-                       return;
-               } // END - if
-
+       public function fillUrlStack () {
                $this->partialStub('Please implement this method.');
        }
 }
index a2679ebdb2799bedeebb2b7572c976ac97f1b9cd..fdabe06421aeff9df7b23ee3d0f1493d78b27a3b 100644 (file)
@@ -49,18 +49,12 @@ class CrawlerLocalStartUrlSource extends BaseUrlSource implements UrlSource, Reg
        }
 
        /**
-        * Processes entries in the stack.
+        * Fills the URL stack with new entries from source
         *
         * @return      void
-        * @todo        ~10% done
+        * @todo        0% done
         */
-       public function processStack () {
-               // Does the stack have some entries left?
-               if ($this->isUrlStackEmpty()) {
-                       // Nothing to handle here
-                       return;
-               } // END - if
-
+       public function fillUrlStack () {
                $this->partialStub('Please implement this method.');
        }
 }
index e955d027f2f93209024ea05c060556278b8ad09a..ef6ade1bd02e78eaee16ff28e1bf9c74ad7e5b2d 100644 (file)
@@ -49,18 +49,12 @@ class CrawlerRssStartUrlSource extends BaseUrlSource implements UrlSource, Regis
        }
 
        /**
-        * Processes entries in the stack.
+        * Fills the URL stack with new entries from source
         *
         * @return      void
-        * @todo        ~10% done
+        * @todo        0% done
         */
-       public function processStack () {
-               // Does the stack have some entries left?
-               if ($this->isUrlStackEmpty()) {
-                       // Nothing to handle here
-                       return;
-               } // END - if
-
+       public function fillUrlStack () {
                $this->partialStub('Please implement this method.');
        }
 }
index 3fd94f31d3701b56d4baee362cc3338725810316..4c28ab0c52ef36b0fc49b7c59478c352105b5a06 100644 (file)
  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 class CrawlerUploadedListUrlSource extends BaseUrlSource implements UrlSource, Registerable {
+       /**
+        * Stack name for a CSV file
+        */
+       const STACK_NAME_CSV_FILE = 'csv_file';
+
+       /**
+        * Stack name for a CSV entry
+        */
+       const STACK_NAME_CSV_ENTRY = 'csv_entry';
+
+       /**
+        * Size of crawl (CSV) entry which is an indexed array:
+        *
+        * 0 = URL to crawl
+        * 1 = Crawl depth of URL
+        * 2 = Crawl depth of linked URLs (same other host only)
+        */
+       const CRAWL_ENTRY_SIZE = 3;
+
        /**
         * "Cached" CSV path
         */
@@ -38,14 +57,14 @@ class CrawlerUploadedListUrlSource extends BaseUrlSource implements UrlSource, R
        private $stackSourceInstance = NULL;
 
        /**
-        * Stack name for a CSV file
+        * "Imported" CSV files
         */
-       const STACK_NAME_CSV_FILE = 'csv_file';
+       private $csvFileImported = array();
 
        /**
-        * "Imported" CSV files
+        * "Cached" separator for columns
         */
-       private $csvFileImported = array();
+       private $columnSeparator = '';
 
        /**
         * Protected constructor
@@ -68,8 +87,12 @@ class CrawlerUploadedListUrlSource extends BaseUrlSource implements UrlSource, R
                // Init stack instance
                $this->stackSourceInstance = ObjectFactory::createObjectByConfiguredName('crawler_uploaded_list_url_source_stack_class');
 
-               // Init stack
+               // Init stacks
                $this->getStackSourceInstance()->initStack(self::STACK_NAME_CSV_FILE);
+               $this->getStackSourceInstance()->initStack(self::STACK_NAME_CSV_ENTRY);
+
+               // "Cache" column separator
+               $this->columnSeparator = $this->getConfigInstance()->getConfigEntry('crawler_url_list_column_separator');
        }
 
        /**
@@ -91,12 +114,12 @@ class CrawlerUploadedListUrlSource extends BaseUrlSource implements UrlSource, R
                $directoryEntry = $this->getDirectoryInstance()->readDirectoryExcept(array_merge(array('.htaccess', '.', '..'), $this->csvFileImported));
 
                // Debug message
-               /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput('CRAWLER-SOURCE[' . __METHOD__ . ':' . __LINE__ . '] directoryEntry(' . strlen($directoryEntry) . ')=' . $directoryEntry);
+               //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput('CRAWLER-SOURCE[' . __METHOD__ . ':' . __LINE__ . '] directoryEntry(' . strlen($directoryEntry) . ')=' . $directoryEntry);
 
                // Is it empty or wrong file extension?
                if ((empty($directoryEntry)) || (substr($directoryEntry, -4, 4) != '.csv')) {
                        // Skip further processing
-                       /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput('CRAWLER-SOURCE[' . __METHOD__ . ':' . __LINE__ . '] directoryEntry(' . strlen($directoryEntry) . ')=' . $directoryEntry . ' - SKIPPED!');
+                       //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput('CRAWLER-SOURCE[' . __METHOD__ . ':' . __LINE__ . '] directoryEntry(' . strlen($directoryEntry) . ')=' . $directoryEntry . ' - SKIPPED!');
                        return FALSE;
                } // END - if
 
@@ -126,17 +149,75 @@ class CrawlerUploadedListUrlSource extends BaseUrlSource implements UrlSource, R
                return $sourceInstance;
        }
 
+       /**
+        * Enriches and saves the given CSV entry (array) in the assigned
+        * file-based stack. To such entry a lot more informations are added, such
+        * as which files shall be crawled and many more.
+        *
+        * @param       $csvData        Array with data from a CSV file
+        * @return      void
+        */
+       private function saveCsvDataInCrawlerQueue (array $csvData) {
+               // Debug message
+               //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput('CRAWLER-SOURCE [' . __METHOD__ . ':' . __LINE__ . ']: csvData()=' . count($csvData) . ' - CALLED!');
+
+               // The array has 3 elements, later enhancements may accept more
+               assert(count($csvData) == self::CRAWL_ENTRY_SIZE);
+
+               /*
+                * First converted the indexed array into an assoziative array. Don't
+                * forget to expand this array as well when you want to add another
+                * column to the CSV file.
+                */
+               $csvArray = array(
+                       self::CRAWL_JOB_ARRAY_START_URL      => $csvData[0],
+                       self::CRAWL_JOB_ARRAY_DEPTH          => $csvData[1],
+                       self::CRAWL_JOB_ARRAY_EXTERNAL_DEPTH => $csvData[2]
+               );
+
+               // Debug message
+               //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput('CRAWLER-SOURCE [' . __METHOD__ . ':' . __LINE__ . ']: csvArray()=' . count($csvArray) . ' - BEFORE!');
+
+               // Then add more data to it
+               $this->enrichCrawlerQueueData($csvArray);
+
+               // Debug message
+               //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput('CRAWLER-SOURCE [' . __METHOD__ . ':' . __LINE__ . ']: csvArray()=' . count($csvArray) . ' - AFTER!');
+
+               /*
+                * Then enqueue it in the file stack. The local crawler "task" will
+                * then pick this up.
+                */
+               $this->enqueueInFileStack($csvArray);
+
+               // Debug message
+               //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput('CRAWLER-SOURCE [' . __METHOD__ . ':' . __LINE__ . ']: EXIT!');
+       }
+
        /**
         * Checks whether a CSV file has been loaded (added to the stack)
         *
-        * @return      $isLoaded       Whether a CSV file has been loaded
+        * @return      $isAdded        Whether a CSV file has been loaded
         */
        private function isCsvFileAdded () {
                // Check whether the stacker is not empty
-               $isLoaded = (($this->getStackSourceInstance()->isStackInitialized(self::STACK_NAME_CSV_FILE)) && (!$this->getStackSourceInstance()->isStackEmpty(self::STACK_NAME_CSV_FILE)));
+               $isAdded = (($this->getStackSourceInstance()->isStackInitialized(self::STACK_NAME_CSV_FILE)) && (!$this->getStackSourceInstance()->isStackEmpty(self::STACK_NAME_CSV_FILE)));
+
+               // Return the result
+               return $isAdded;
+       }
+
+       /**
+        * Checks whether a CSV entry has been added to the stack
+        *
+        * @return      $isAdded        Whether a CSV entry has been added
+        */
+       private function isCsvEntryAdded () {
+               // Check whether the stacker is not empty
+               $isAdded = (($this->getStackSourceInstance()->isStackInitialized(self::STACK_NAME_CSV_ENTRY)) && (!$this->getStackSourceInstance()->isStackEmpty(self::STACK_NAME_CSV_ENTRY)));
 
                // Return the result
-               return $isLoaded;
+               return $isAdded;
        }
 
        /**
@@ -167,20 +248,80 @@ class CrawlerUploadedListUrlSource extends BaseUrlSource implements UrlSource, R
        }
 
        /**
-        * Parses the next stacked CSV by reading only one line from it. Then the
-        * read line is being validated and if found good being feed to the next
+        * Parses the next stacked CSV file by reading only one line from it. Then
+        * the read line is being validated and if found good being feed to the next
         * stack. The file is removed from stack only if it has been fully parsed.
         *
         * @return      void
         */
-       private function parseCsvEntry () {
+       private function parseCsvFile () {
+               // Debug message
+               //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput('CRAWLER-SOURCE [' . __METHOD__ . ':' . __LINE__ . ']: CALLED!');
+
+               // Get next entry
+               $csvFileInstance = $this->getStackSourceInstance()->popNamed(self::STACK_NAME_CSV_FILE);
+
+               // Read full "CSV line"
+               $csvData = $csvFileInstance->readCsvFileLine($this->columnSeparator);
+
                // Debug message
-               /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput('CRAWLER-SOURCE [' . __METHOD__ . ':' . __LINE__ . ']: CALLED!');
+               //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput('CRAWLER-SOURCE [' . __METHOD__ . ':' . __LINE__ . ']: csvData[' . gettype($csvData) . ']=' . print_r($csvData, TRUE));
+
+               // Expect always an array
+               assert(is_array($csvData));
+
+               // Is the array empty?
+               if (count($csvData) == 0) {
+                       // Debug message
+                       //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput('CRAWLER-SOURCE [' . __METHOD__ . ':' . __LINE__ . ']: File ' . $csvFileInstance->getFileName() . ' has been fully read.');
+
+                       // Try to close it by actually unsetting (destructing) it
+                       unset($csvFileInstance);
+
+                       // This file as been fully read, so don't push it back on stack.
+                       return;
+               } // END - if
+
+               // ...  with 3 elements, later enhancements may accept more
+               assert(count($csvData) == self::CRAWL_ENTRY_SIZE);
+
+               /*
+                * Push the file back on stack as it may contain more entries. This way
+                * all files got rotated on stack which may improve crawler performance.
+                */
+               $this->getStackSourceInstance()->pushNamed(self::STACK_NAME_CSV_FILE, $csvFileInstance);
+
+               // Push array on next stack
+               $this->getStackSourceInstance()->pushNamed(self::STACK_NAME_CSV_ENTRY, $csvData);
 
                // Debug message
-               /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput('CRAWLER-SOURCE [' . __METHOD__ . ':' . __LINE__ . ']: EXIT!');
+               //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput('CRAWLER-SOURCE [' . __METHOD__ . ':' . __LINE__ . ']: EXIT!');
        }
 
+       /**
+        * Parses the next stacked CSV entry.
+        *
+        * @return      void
+        */
+       private function parseCsvEntry () {
+               // Debug message
+               //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput('CRAWLER-SOURCE [' . __METHOD__ . ':' . __LINE__ . ']: CALLED!');
+
+               // Pop it from stack
+               $csvData = $this->getStackSourceInstance()->popNamed(self::STACK_NAME_CSV_ENTRY);
+
+               // Debug message
+               //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput('CRAWLER-SOURCE [' . __METHOD__ . ':' . __LINE__ . ']: csvData[' . gettype($csvData) . ']=' . print_r($csvData, TRUE));
+
+               // It must have 3 elements (see method parseCsvFile() for details)
+               assert(count($csvData) == self::CRAWL_ENTRY_SIZE);
+
+               // Save it in crawler queue (which will enrich it with way more informations
+               $this->saveCsvDataInCrawlerQueue($csvData);
+
+               // Debug message
+               //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput('CRAWLER-SOURCE [' . __METHOD__ . ':' . __LINE__ . ']: EXIT!');
+       }
 
        /**
         * Getter for stackSourceInstance variable
@@ -192,19 +333,25 @@ class CrawlerUploadedListUrlSource extends BaseUrlSource implements UrlSource, R
        }
 
        /**
-        * Processes entries in the stack.
+        * Fills the URL stack with new entries from source
         *
         * @return      void
-        * @todo        ~20% done
+        * @todo        ~40% done
         */
-       public function processStack () {
+       public function fillUrlStack () {
                // Does the stack have some entries left?
-               if ($this->isCsvFileAdded()) {
+               if ($this->isCsvEntryAdded()) {
                        /*
                         * A CSV file has been found and "imported" (added to stack). Now
                         * the file can be read line by line and checked every one of it.
                         */
                        $this->parseCsvEntry();
+               } elseif ($this->isCsvFileAdded()) {
+                       /*
+                        * A CSV file has been found and "imported" (added to stack). Now
+                        * the file can be read line by line and checked every one of it.
+                        */
+                       $this->parseCsvFile();
                } elseif ($this->isCsvFileFound()) {
                        /*
                         * A file containing an URL list is found. Please note the format is
@@ -212,12 +359,6 @@ class CrawlerUploadedListUrlSource extends BaseUrlSource implements UrlSource, R
                         * depth, handling of 3rd-party URLs and such.
                         */
                        $this->addCsvFile();
-               } elseif (!$this->isUrlStackEmpty()) {
-                       /*
-                        * Handle next entry. This method will be called very often, so need
-                        * to process more than one entry at a time.
-                        */
-                       $this->processNextEntry();
                }
 
                $this->partialStub('Please implement this method.');
diff --git a/application/hub/main/tasks/crawler/class_BaseUrlSourceTask.php b/application/hub/main/tasks/crawler/class_BaseUrlSourceTask.php
new file mode 100644 (file)
index 0000000..9399ae8
--- /dev/null
@@ -0,0 +1,51 @@
+<?php
+/**
+ * A general URL source Task
+ *
+ * @author             Roland Haeder <webmaster@shipsimu.org>
+ * @version            0.0.0
+ * @copyright  Copyright (c) 2007, 2008 Roland Haeder, 2009 - 2014 Hub Developer Team
+ * @license            GNU GPL 3.0 or any newer version
+ * @link               http://www.shipsimu.org
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+class BaseUrlSourceTask extends BaseTask {
+       /**
+        * Protected constructor
+        *
+        * @param       $className      Name of the class
+        * @return      void
+        */
+       protected function __construct ($className) {
+               // Call parent constructor
+               parent::__construct($className);
+       }
+
+       /**
+        * Initializes URL source task (to keep the constructor small)
+        *
+        * @return      void
+        */
+       protected function initUrlSourceTask () {
+               // Get source instance
+               $sourceInstance = UrlSourceObjectFactory::createUrlSourceInstance($this);
+
+               // And set it here
+               $this->setUrlSourceInstance($sourceInstance);
+       }
+}
+
+// [EOF]
+?>
diff --git a/application/hub/main/tasks/crawler/url_crawler/.htaccess b/application/hub/main/tasks/crawler/url_crawler/.htaccess
new file mode 100644 (file)
index 0000000..3a42882
--- /dev/null
@@ -0,0 +1 @@
+Deny from all
diff --git a/application/hub/main/tasks/crawler/url_crawler/local/.htaccess b/application/hub/main/tasks/crawler/url_crawler/local/.htaccess
new file mode 100644 (file)
index 0000000..3a42882
--- /dev/null
@@ -0,0 +1 @@
+Deny from all
diff --git a/application/hub/main/tasks/crawler/url_crawler/local/class_CrawlerLocalUrlCrawlerTask.php b/application/hub/main/tasks/crawler/url_crawler/local/class_CrawlerLocalUrlCrawlerTask.php
new file mode 100644 (file)
index 0000000..3b7e3c1
--- /dev/null
@@ -0,0 +1,72 @@
+<?php
+/**
+ * A LocalUrlCrawler task for crawlers
+ *
+ * @author             Roland Haeder <webmaster@ship-simu.org>
+ * @version            0.0.0
+ * @copyright  Copyright (c) 2014 Crawler Developer Team
+ * @license            GNU GPL 3.0 or any newer version
+ * @link               http://www.ship-simu.org
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+class CrawlerLocalUrlCrawlerTask extends BaseTask implements Taskable, Visitable {
+       /**
+        * Protected constructor
+        *
+        * @return      void
+        */
+       protected function __construct () {
+               // Call parent constructor
+               parent::__construct(__CLASS__);
+       }
+
+       /**
+        * Creates an instance of this class
+        *
+        * @return      $taskInstance   An instance of a Visitable class
+        */
+       public final static function createCrawlerLocalUrlCrawlerTask () {
+               // Get new instance
+               $taskInstance = new CrawlerLocalUrlCrawlerTask();
+
+               // Return the prepared instance
+               return $taskInstance;
+       }
+
+       /**
+        * Accepts the visitor to process the visitor
+        *
+        * @param       $visitorInstance        An instance of a Visitor class
+        * @return      void
+        * @todo        Maybe visit some sub-objects
+        */
+       public function accept (Visitor $visitorInstance) {
+               // Visit this task
+               $visitorInstance->visitTask($this);
+       }
+
+       /**
+        * Executes the task
+        *
+        * @return      void
+        * @todo        0%
+        */
+       public function executeTask () {
+               $this->partialStub('Unimplemented task.');
+       }
+}
+
+// [EOF]
+?>
diff --git a/application/hub/main/tasks/crawler/url_crawler/remote/.htaccess b/application/hub/main/tasks/crawler/url_crawler/remote/.htaccess
new file mode 100644 (file)
index 0000000..3a42882
--- /dev/null
@@ -0,0 +1 @@
+Deny from all
diff --git a/application/hub/main/tasks/crawler/url_crawler/remote/class_CrawlerRemoteUrlCrawlerTask.php b/application/hub/main/tasks/crawler/url_crawler/remote/class_CrawlerRemoteUrlCrawlerTask.php
new file mode 100644 (file)
index 0000000..465f1a3
--- /dev/null
@@ -0,0 +1,72 @@
+<?php
+/**
+ * A RemoteUrlCrawler task for crawlers
+ *
+ * @author             Roland Haeder <webmaster@ship-simu.org>
+ * @version            0.0.0
+ * @copyright  Copyright (c) 2014 Crawler Developer Team
+ * @license            GNU GPL 3.0 or any newer version
+ * @link               http://www.ship-simu.org
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+class CrawlerRemoteUrlCrawlerTask extends BaseTask implements Taskable, Visitable {
+       /**
+        * Protected constructor
+        *
+        * @return      void
+        */
+       protected function __construct () {
+               // Call parent constructor
+               parent::__construct(__CLASS__);
+       }
+
+       /**
+        * Creates an instance of this class
+        *
+        * @return      $taskInstance   An instance of a Visitable class
+        */
+       public final static function createCrawlerRemoteUrlCrawlerTask () {
+               // Get new instance
+               $taskInstance = new CrawlerRemoteUrlCrawlerTask();
+
+               // Return the prepared instance
+               return $taskInstance;
+       }
+
+       /**
+        * Accepts the visitor to process the visitor
+        *
+        * @param       $visitorInstance        An instance of a Visitor class
+        * @return      void
+        * @todo        Maybe visit some sub-objects
+        */
+       public function accept (Visitor $visitorInstance) {
+               // Visit this task
+               $visitorInstance->visitTask($this);
+       }
+
+       /**
+        * Executes the task
+        *
+        * @return      void
+        * @todo        0%
+        */
+       public function executeTask () {
+               $this->partialStub('Unimplemented task.');
+       }
+}
+
+// [EOF]
+?>
diff --git a/application/hub/main/tasks/crawler/url_getter/.htaccess b/application/hub/main/tasks/crawler/url_getter/.htaccess
deleted file mode 100644 (file)
index 3a42882..0000000
+++ /dev/null
@@ -1 +0,0 @@
-Deny from all
diff --git a/application/hub/main/tasks/crawler/url_getter/local/.htaccess b/application/hub/main/tasks/crawler/url_getter/local/.htaccess
deleted file mode 100644 (file)
index 3a42882..0000000
+++ /dev/null
@@ -1 +0,0 @@
-Deny from all
diff --git a/application/hub/main/tasks/crawler/url_getter/local/class_CrawlerLocalUrlGetterTask.php b/application/hub/main/tasks/crawler/url_getter/local/class_CrawlerLocalUrlGetterTask.php
deleted file mode 100644 (file)
index e3ecc6c..0000000
+++ /dev/null
@@ -1,72 +0,0 @@
-<?php
-/**
- * A LocalUrlGetter task for crawlers
- *
- * @author             Roland Haeder <webmaster@ship-simu.org>
- * @version            0.0.0
- * @copyright  Copyright (c) 2014 Crawler Developer Team
- * @license            GNU GPL 3.0 or any newer version
- * @link               http://www.ship-simu.org
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-class CrawlerLocalUrlGetterTask extends BaseTask implements Taskable, Visitable {
-       /**
-        * Protected constructor
-        *
-        * @return      void
-        */
-       protected function __construct () {
-               // Call parent constructor
-               parent::__construct(__CLASS__);
-       }
-
-       /**
-        * Creates an instance of this class
-        *
-        * @return      $taskInstance   An instance of a Visitable class
-        */
-       public final static function createCrawlerLocalUrlGetterTask () {
-               // Get new instance
-               $taskInstance = new CrawlerLocalUrlGetterTask();
-
-               // Return the prepared instance
-               return $taskInstance;
-       }
-
-       /**
-        * Accepts the visitor to process the visitor
-        *
-        * @param       $visitorInstance        An instance of a Visitor class
-        * @return      void
-        * @todo        Maybe visit some sub-objects
-        */
-       public function accept (Visitor $visitorInstance) {
-               // Visit this task
-               $visitorInstance->visitTask($this);
-       }
-
-       /**
-        * Executes the task
-        *
-        * @return      void
-        * @todo        0%
-        */
-       public function executeTask () {
-               $this->partialStub('Unimplemented task.');
-       }
-}
-
-// [EOF]
-?>
diff --git a/application/hub/main/tasks/crawler/url_getter/remote/.htaccess b/application/hub/main/tasks/crawler/url_getter/remote/.htaccess
deleted file mode 100644 (file)
index 3a42882..0000000
+++ /dev/null
@@ -1 +0,0 @@
-Deny from all
diff --git a/application/hub/main/tasks/crawler/url_getter/remote/class_CrawlerRemoteUrlGetterTask.php b/application/hub/main/tasks/crawler/url_getter/remote/class_CrawlerRemoteUrlGetterTask.php
deleted file mode 100644 (file)
index 9dc5b9d..0000000
+++ /dev/null
@@ -1,72 +0,0 @@
-<?php
-/**
- * A RemoteUrlGetter task for crawlers
- *
- * @author             Roland Haeder <webmaster@ship-simu.org>
- * @version            0.0.0
- * @copyright  Copyright (c) 2014 Crawler Developer Team
- * @license            GNU GPL 3.0 or any newer version
- * @link               http://www.ship-simu.org
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-class CrawlerRemoteUrlGetterTask extends BaseTask implements Taskable, Visitable {
-       /**
-        * Protected constructor
-        *
-        * @return      void
-        */
-       protected function __construct () {
-               // Call parent constructor
-               parent::__construct(__CLASS__);
-       }
-
-       /**
-        * Creates an instance of this class
-        *
-        * @return      $taskInstance   An instance of a Visitable class
-        */
-       public final static function createCrawlerRemoteUrlGetterTask () {
-               // Get new instance
-               $taskInstance = new CrawlerRemoteUrlGetterTask();
-
-               // Return the prepared instance
-               return $taskInstance;
-       }
-
-       /**
-        * Accepts the visitor to process the visitor
-        *
-        * @param       $visitorInstance        An instance of a Visitor class
-        * @return      void
-        * @todo        Maybe visit some sub-objects
-        */
-       public function accept (Visitor $visitorInstance) {
-               // Visit this task
-               $visitorInstance->visitTask($this);
-       }
-
-       /**
-        * Executes the task
-        *
-        * @return      void
-        * @todo        0%
-        */
-       public function executeTask () {
-               $this->partialStub('Unimplemented task.');
-       }
-}
-
-// [EOF]
-?>
index b62a9df06507024861b7d1c39659581c4fc7a656..284cf5d322486dfdeaa42b1c13e143f0e23d7051 100644 (file)
@@ -21,7 +21,7 @@
  * You should have received a copy of the GNU General Public License
  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
-class CrawlerUrlSource???Task extends BaseTask implements Taskable, Visitable {
+class CrawlerUrlSource???Task extends BaseUrlSourceTask implements Taskable, Visitable {
        /**
         * Protected constructor
         *
@@ -63,8 +63,20 @@ class CrawlerUrlSource???Task extends BaseTask implements Taskable, Visitable {
         * @return      void
         */
        public function executeTask () {
-               // Get the URL source instance and announce us
-               UrlSourceObjectFactory::createUrlSourceInstance($this)->processStack();
+               // Get source instance
+               $sourceInstance = $this->getUrlSourceInstance();
+
+               // Is it not set?
+               if (is_null($sourceInstance)) {
+                       // Initialize it
+                       $this->initUrlSourceTask();
+
+                       // And re-get it
+                       $sourceInstance = $this->getUrlSourceInstance();
+               } // END - if
+
+               // Get the URL source instance and fill the stack with crawl entries
+               $sourceInstance->fillUrlStack();
        }
 }
 
index 14236c6ffa6ed537675ecefb1549d905ce3f02eb..823ba33366cb5b217c842896aee0f33b87cc9cc4 100644 (file)
@@ -21,7 +21,7 @@
  * You should have received a copy of the GNU General Public License
  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
-class CrawlerUrlSourceFoundRssTask extends BaseTask implements Taskable, Visitable {
+class CrawlerUrlSourceFoundRssTask extends BaseUrlSourceTask implements Taskable, Visitable {
        /**
         * Protected constructor
         *
@@ -63,8 +63,20 @@ class CrawlerUrlSourceFoundRssTask extends BaseTask implements Taskable, Visitab
         * @return      void
         */
        public function executeTask () {
-               // Get the URL source instance and announce us
-               UrlSourceObjectFactory::createUrlSourceInstance($this)->processStack();
+               // Get source instance
+               $sourceInstance = $this->getUrlSourceInstance();
+
+               // Is it not set?
+               if (is_null($sourceInstance)) {
+                       // Initialize it
+                       $this->initUrlSourceTask();
+
+                       // And re-get it
+                       $sourceInstance = $this->getUrlSourceInstance();
+               } // END - if
+
+               // Get the URL source instance and fill the stack with crawl entries
+               $sourceInstance->fillUrlStack();
        }
 }
 
index 9fdb71d1dd90054bb62a6ec6cd2631ca51626fca..eb2839eb93f99c1bfde221e10bd393286c66462b 100644 (file)
@@ -21,7 +21,7 @@
  * You should have received a copy of the GNU General Public License
  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
-class CrawlerUrlSourceLocalStartTask extends BaseTask implements Taskable, Visitable {
+class CrawlerUrlSourceLocalStartTask extends BaseUrlSourceTask implements Taskable, Visitable {
        /**
         * Protected constructor
         *
@@ -63,8 +63,20 @@ class CrawlerUrlSourceLocalStartTask extends BaseTask implements Taskable, Visit
         * @return      void
         */
        public function executeTask () {
-               // Get the URL source instance and announce us
-               UrlSourceObjectFactory::createUrlSourceInstance($this)->processStack();
+               // Get source instance
+               $sourceInstance = $this->getUrlSourceInstance();
+
+               // Is it not set?
+               if (is_null($sourceInstance)) {
+                       // Initialize it
+                       $this->initUrlSourceTask();
+
+                       // And re-get it
+                       $sourceInstance = $this->getUrlSourceInstance();
+               } // END - if
+
+               // Get the URL source instance and fill the stack with crawl entries
+               $sourceInstance->fillUrlStack();
        }
 }
 
index 413c7ad5d83503f293bca55d5af61b07770b55f1..c414ce2bcede3ee372bf0343195dbaf9e619489a 100644 (file)
@@ -21,7 +21,7 @@
  * You should have received a copy of the GNU General Public License
  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
-class CrawlerUrlSourceRssStartTask extends BaseTask implements Taskable, Visitable {
+class CrawlerUrlSourceRssStartTask extends BaseUrlSourceTask implements Taskable, Visitable {
        /**
         * Protected constructor
         *
@@ -63,8 +63,20 @@ class CrawlerUrlSourceRssStartTask extends BaseTask implements Taskable, Visitab
         * @return      void
         */
        public function executeTask () {
-               // Get the URL source instance and announce us
-               UrlSourceObjectFactory::createUrlSourceInstance($this)->processStack();
+               // Get source instance
+               $sourceInstance = $this->getUrlSourceInstance();
+
+               // Is it not set?
+               if (is_null($sourceInstance)) {
+                       // Initialize it
+                       $this->initUrlSourceTask();
+
+                       // And re-get it
+                       $sourceInstance = $this->getUrlSourceInstance();
+               } // END - if
+
+               // Get the URL source instance and fill the stack with crawl entries
+               $sourceInstance->fillUrlStack();
        }
 }
 
index 7330dda2d886d6b10b421a0e88fc2b54282d5846..50875ab1e5890dde5a01454e528c5a4309484742 100644 (file)
@@ -21,7 +21,7 @@
  * You should have received a copy of the GNU General Public License
  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
-class CrawlerUrlSourceUploadedListTask extends BaseTask implements Taskable, Visitable {
+class CrawlerUrlSourceUploadedListTask extends BaseUrlSourceTask implements Taskable, Visitable {
        /**
         * Protected constructor
         *
@@ -63,8 +63,20 @@ class CrawlerUrlSourceUploadedListTask extends BaseTask implements Taskable, Vis
         * @return      void
         */
        public function executeTask () {
-               // Get the URL source instance and announce us
-               UrlSourceObjectFactory::createUrlSourceInstance($this)->processStack();
+               // Get source instance
+               $sourceInstance = $this->getUrlSourceInstance();
+
+               // Is it not set?
+               if (is_null($sourceInstance)) {
+                       // Initialize it
+                       $this->initUrlSourceTask();
+
+                       // And re-get it
+                       $sourceInstance = $this->getUrlSourceInstance();
+               } // END - if
+
+               // Get the URL source instance and fill the stack with crawl entries
+               $sourceInstance->fillUrlStack();
        }
 }
 
diff --git a/core b/core
index 4648ede712da5257e96eb4d88f0cc01cb2890740..daff0462a4ebcae895e5946acc0906fbd93618b8 160000 (submodule)
--- a/core
+++ b/core
@@ -1 +1 @@
-Subproject commit 4648ede712da5257e96eb4d88f0cc01cb2890740
+Subproject commit daff0462a4ebcae895e5946acc0906fbd93618b8
index 746e1f2b9289038e3e45ac4d00f61f5f4c0a905f..9f17dbc700c4ae6f6d4f7c8c8a1e3421a40daaf2 100644 (file)
@@ -1,7 +1,7 @@
 ### WARNING: THIS FILE IS AUTO-GENERATED BY ./todo-builder.sh ###
 ### DO NOT EDIT THIS FILE. ###
-./application/hub/config.php:775:// @TODO This and the next value is very static again
-./application/hub/config.php:839:// @TODO This is very static, rewrite it to more flexible
+./application/hub/config.php:772:// @TODO This and the next value is very static again
+./application/hub/config.php:836:// @TODO This is very static, rewrite it to more flexible
 ./application/hub/interfaces/apt-proxy/class_AptProxy.php:10: * @todo          We need to find a better name for this interface
 ./application/hub/interfaces/blocks/class_Minable.php:10: * @todo              We need to find a better name for this interface
 ./application/hub/interfaces/chat/class_Chatter.php:10: * @todo                We need to find a better name for this interface
@@ -15,7 +15,7 @@
 ./application/hub/interfaces/wrapper/class_NodeDhtWrapper.php:122:      * @todo        Add minimum/maximum age limitations
 ./application/hub/interfaces/wrapper/class_NodeDhtWrapper.php:132:      * @todo        Add timestamp to dataset instance
 ./application/hub/main/chains/class_PackageFilterChain.php:54:  * @todo        This may be slow if a message with a lot tags arrived
-./application/hub/main/class_BaseHubSystem.php:577:                            // @TODO On some systems it is 134, on some 107?
+./application/hub/main/class_BaseHubSystem.php:604:                            // @TODO On some systems it is 134, on some 107?
 ./application/hub/main/commands/console/class_HubConsoleAptProxyCommand.php:107:        * @todo        Should we add some more filters?
 ./application/hub/main/commands/console/class_HubConsoleAptProxyCommand.php:58:         * @todo        Try to create a AptProxyActivationTask or so
 ./application/hub/main/commands/console/class_HubConsoleChatCommand.php:107:    * @todo        Should we add some more filters?
 ./application/hub/main/dht/class_BaseDht.php:253:       * @todo        Switch flag 'accept_bootstrap'
 ./application/hub/main/dht/class_BaseDht.php:86:        * @todo        Find more to do here
 ./application/hub/main/dht/node/class_NodeDhtFacade.php:61:     * @todo        Does this data need to be enriched with more meta data?
-./application/hub/main/discovery/protocol/class_ProtocolDiscovery.php:71:              // @TODO Add some validation here???
+./application/hub/main/discovery/protocol/class_ProtocolDiscovery.php:94:              // @TODO Add some validation here???
+./application/hub/main/discovery/recipient/package/class_PackageRecipientDiscovery.php:115:                    // @TODO Unfinished: $this->getListInstance()->addEntry('unl', $decodedData[NetworkPackage::PACKAGE_DATA_RECIPIENT]);
 ./application/hub/main/discovery/recipient/package/class_PackageRecipientDiscovery.php:86:      * @todo        Add some validation of recipient field, e.g. an Universal Node Locator is found
-./application/hub/main/discovery/recipient/package/class_PackageRecipientDiscovery.php:87:      * @todo        The if() does only check for TCP, not UDP, e.g. try to get a $handlerInstance here
+./application/hub/main/discovery/recipient/package/class_PackageRecipientDiscovery.php:87:      * @todo        Enrich both messages with recipient data
+./application/hub/main/discovery/recipient/socket/class_PackageSocketDiscovery.php:159:                // @TODO FIXME: I don't like these abuse of variables, better strict types
 ./application/hub/main/factories/handler/class_ProtocolHandlerFactory.php:10: * @todo          Unfinished stuff
 ./application/hub/main/factories/socket/class_SocketFactory.php:10: * @todo            Find an interface for hub helper
 ./application/hub/main/filter/apt-proxy/class_AptProxyInitializationFilter.php:54:      * @todo        0% done
 ./application/hub/main/handler/message-types/self-connect/class_NodeMessageSelfConnectHandler.php:71:                  // @TODO Throw an exception here instead of dying
 ./application/hub/main/handler/network/class_BaseRawDataHandler.php:148:        * @todo        This method will be moved to a better place
 ./application/hub/main/handler/network/udp/class_UdpRawDataHandler.php:58:      * @todo        0%
+./application/hub/main/handler/protocol/class_BaseProtocolHandler.php:110:              * @TODO If you know why, please fix and explain it to me.
 ./application/hub/main/handler/tasks/class_TaskHandler.php:139:                // @TODO Messurement can be added around this call
 ./application/hub/main/helper/class_BaseHubSystemHelper.php:87:         * @todo        0% done
-./application/hub/main/helper/connection/class_BaseConnectionHelper.php:204:                   // @TODO Move this to the socket error handler
-./application/hub/main/helper/connection/class_BaseConnectionHelper.php:232:    * @todo        Rewrite the while() loop to a iterator to not let the software stay very long here
-./application/hub/main/helper/connection/tcp/class_TcpConnectionHelper.php:10: * @todo         Find an interface for hub helper
-./application/hub/main/helper/connection/tcp/class_TcpConnectionHelper.php:110:                                // @TODO Rewrite this test for UNLs
-./application/hub/main/helper/connection/tcp/class_TcpConnectionHelper.php:117:                                // @TODO Rewrite this test for UNLs
-./application/hub/main/helper/connection/tcp/class_TcpConnectionHelper.php:147:         * @todo        We may want to implement a filter for ease notification of other objects like our pool
-./application/hub/main/helper/connection/tcp/class_TcpConnectionHelper.php:49:  * @todo        $errorCode/-Message are now in handleSocketError()'s call-back methods
-./application/hub/main/helper/connection/tcp/class_TcpConnectionHelper.php:89:         // @TODO The whole resolving part should be moved out and made more configurable
-./application/hub/main/helper/connection/udp/class_UdpConnectionHelper.php:10: * @todo         Find an interface for hub helper
-./application/hub/main/helper/connection/udp/class_UdpConnectionHelper.php:56:  * @todo        Implement a filter for ease notification of other objects like the pool
+./application/hub/main/helper/connection/ipv4/class_BaseIpV4ConnectionHelper.php:105:   * @todo        Rewrite the while() loop to a iterator to not let the software stay very long here
+./application/hub/main/helper/connection/ipv4/class_BaseIpV4ConnectionHelper.php:10: * @todo           Find an interface for hub helper
+./application/hub/main/helper/connection/ipv4/class_BaseIpV4ConnectionHelper.php:77:                   // @TODO Move this to the socket error handler
+./application/hub/main/helper/connection/ipv4/tcp/class_TcpConnectionHelper.php:10: * @todo            Find an interface for hub helper
+./application/hub/main/helper/connection/ipv4/tcp/class_TcpConnectionHelper.php:118:                           // @TODO Rewrite this test for UNLs
+./application/hub/main/helper/connection/ipv4/tcp/class_TcpConnectionHelper.php:125:                           // @TODO Rewrite this test for UNLs
+./application/hub/main/helper/connection/ipv4/tcp/class_TcpConnectionHelper.php:155:    * @todo        We may want to implement a filter for ease notification of other objects like our pool
+./application/hub/main/helper/connection/ipv4/tcp/class_TcpConnectionHelper.php:49:     * @todo        $errorCode/-Message are now in handleSocketError()'s call-back methods
+./application/hub/main/helper/connection/ipv4/tcp/class_TcpConnectionHelper.php:89:            // @TODO The whole resolving part should be moved out and made more configurable
+./application/hub/main/helper/connection/ipv4/udp/class_UdpConnectionHelper.php:10: * @todo            Find an interface for hub helper
+./application/hub/main/helper/connection/ipv4/udp/class_UdpConnectionHelper.php:56:     * @todo        Implement a filter for ease notification of other objects like the pool
 ./application/hub/main/helper/dht/class_DhtBootstrapHelper.php:10: * @todo             Find an interface for hub helper
 ./application/hub/main/helper/dht/class_DhtPublishEntryHelper.php:10: * @todo          Find an interface for hub helper
 ./application/hub/main/helper/node/announcement/class_NodeAnnouncementHelper.php:10: * @todo           Find an interface for hub helper
 ./application/hub/main/iterator/pool/tasks/class_TaskPoolIterator.php:11: * @todo              latency-based iteration or similar approaches
 ./application/hub/main/listener/tcp/class_TcpListener.php:252:                 // @TODO Does this work on Windozer boxes???
 ./application/hub/main/listener/udp/class_UdpListener.php:153:  * @todo        ~50% done
-./application/hub/main/lists/class_BaseList.php:276:                   // @TODO Extend this somehow?
+./application/hub/main/lists/class_BaseList.php:305:                   // @TODO Extend this somehow?
 ./application/hub/main/lists/groups/class_ListGroupList.php:61:         * @todo        0% done
 ./application/hub/main/miner/chash/class_HubChashMiner.php:108:         * @todo        Implement this method
 ./application/hub/main/miner/chash/class_HubChashMiner.php:138:         * @todo        0% done
 ./application/hub/main/nodes/class_BaseHubNode.php:432:         * @todo        Change the first if() block to check for a specific state
 ./application/hub/main/nodes/class_BaseHubNode.php:638:         * @todo        Add checking if this node has been announced to the sender node
 ./application/hub/main/nodes/class_BaseHubNode.php:658:         * @todo        Add checking if this node has been announced to the sender node
-./application/hub/main/nodes/class_BaseHubNode.php:761:         * @todo        Find more to do here
-./application/hub/main/nodes/class_BaseHubNode.php:774:         * @todo        Handle thrown exception
+./application/hub/main/nodes/class_BaseHubNode.php:763:         * @todo        Find more to do here
+./application/hub/main/nodes/class_BaseHubNode.php:776:         * @todo        Handle thrown exception
 ./application/hub/main/nodes/list/class_HubListNode.php:58:     * @todo        Implement more bootstrap steps
 ./application/hub/main/nodes/list/class_HubListNode.php:79:            // @TODO Add some filters here
 ./application/hub/main/nodes/list/class_HubListNode.php:88:     * @todo        0% done
 ./application/hub/main/nodes/regular/class_HubRegularNode.php:58:       * @todo        Implement this method
 ./application/hub/main/nodes/regular/class_HubRegularNode.php:79:              // @TODO Add some filters here
 ./application/hub/main/nodes/regular/class_HubRegularNode.php:88:       * @todo        0% done
-./application/hub/main/package/class_NetworkPackage.php:1150:   * @todo        This may be enchanced for outgoing packages?
-./application/hub/main/package/class_NetworkPackage.php:1181:           * @todo Unsupported feature of "signed" messages commented out
-./application/hub/main/package/class_NetworkPackage.php:1270:   * @todo        Implement verification of all sent tags here?
+./application/hub/main/package/class_NetworkPackage.php:1167:   * @todo        This may be enchanced for outgoing packages?
+./application/hub/main/package/class_NetworkPackage.php:1198:           * @todo Unsupported feature of "signed" messages commented out
+./application/hub/main/package/class_NetworkPackage.php:1287:   * @todo        Implement verification of all sent tags here?
 ./application/hub/main/package/class_NetworkPackage.php:23: * @todo            Needs to add functionality for handling the object's type
 ./application/hub/main/package/class_NetworkPackage.php:338:           // @TODO md5() is very weak, but it needs to be fast
 ./application/hub/main/package/class_NetworkPackage.php:412:           // @TODO md5() is very weak, but it needs to be fast
-./application/hub/main/package/class_NetworkPackage.php:578:                   // @TODO We may want to do somthing more here?
-./application/hub/main/package/class_NetworkPackage.php:613:    * @todo        Unfinished area, signatures are currently NOT fully supported
+./application/hub/main/package/class_NetworkPackage.php:595:                   // @TODO We may want to do somthing more here?
+./application/hub/main/package/class_NetworkPackage.php:630:    * @todo        Unfinished area, signatures are currently NOT fully supported
 ./application/hub/main/package/fragmenter/class_PackageFragmenter.php:275:      * @todo        Implement a way to send non-announcement packages with extra-salt
 ./application/hub/main/package/fragmenter/class_PackageFragmenter.php:370:             // @TODO This assert broke packages where the hash chunk was very large: assert(strlen($rawData) <= NetworkPackage::TCP_PACKAGE_SIZE);
 ./application/hub/main/package/fragmenter/class_PackageFragmenter.php:441:      * @todo        $helperInstance is unused
 ./application/hub/main/producer/miner/blocks/class_MinerTestGenesisBlockProducer.php:86:        * @todo        ~5% done
 ./application/hub/main/recipient/dht/class_DhtRecipient.php:76:                        // @TODO Unfinished
 ./application/hub/main/recipient/self/class_SelfRecipient.php:61:              // @TODO Add more checks on data
-./application/hub/main/registry/socket/class_SocketRegistry.php:75:            // @TODO Tested again base class, rewrite it to a generic interface!
 ./application/hub/main/resolver/protocol/tcp/class_TcpProtocolResolver.php:57:  * @todo        0% done
 ./application/hub/main/resolver/state/peer/class_PeerStateResolver.php:59:      * @todo        ~30% done
 ./application/hub/main/scanner/crawler/uploaded_list/class_CrawlerUploadedListScanner.php:52:   * @todo        0% done
-./application/hub/main/source/urls/class_CrawlerFoundRssUrlSource.php:55:       * @todo        ~10% done
-./application/hub/main/source/urls/class_CrawlerLocalStartUrlSource.php:55:     * @todo        ~10% done
-./application/hub/main/source/urls/class_CrawlerRssStartUrlSource.php:55:       * @todo        ~10% done
-./application/hub/main/source/urls/class_CrawlerUploadedListUrlSource.php:196:  * @todo        ~20% done
+./application/hub/main/source/urls/class_CrawlerFoundRssUrlSource.php:55:       * @todo        0% done
+./application/hub/main/source/urls/class_CrawlerLocalStartUrlSource.php:55:     * @todo        0% done
+./application/hub/main/source/urls/class_CrawlerRssStartUrlSource.php:55:       * @todo        0% done
+./application/hub/main/source/urls/class_CrawlerUploadedListUrlSource.php:327:  * @todo        ~40% done
 ./application/hub/main/states/communicator/init/class_CommunicatorInitState.php:60:     * @todo        0% done?
 ./application/hub/main/states/crawler/active/class_CrawlerActiveState.php:60:   * @todo        0% done
 ./application/hub/main/states/crawler/booting/class_CrawlerBootingState.php:60:         * @todo        0% done
 ./application/hub/main/tasks/crawler/snippet_extractor/class_CrawlerSnippetExtractorTask.php:64:        * @todo        0%
 ./application/hub/main/tasks/crawler/structure_analyzer/class_CrawlerStructureAnalyzerTask.php:53:      * @todo        Maybe visit some sub-objects
 ./application/hub/main/tasks/crawler/structure_analyzer/class_CrawlerStructureAnalyzerTask.php:64:      * @todo        0%
-./application/hub/main/tasks/crawler/url_getter/local/class_CrawlerLocalUrlGetterTask.php:53:   * @todo        Maybe visit some sub-objects
-./application/hub/main/tasks/crawler/url_getter/local/class_CrawlerLocalUrlGetterTask.php:64:   * @todo        0%
-./application/hub/main/tasks/crawler/url_getter/remote/class_CrawlerRemoteUrlGetterTask.php:53:         * @todo        Maybe visit some sub-objects
-./application/hub/main/tasks/crawler/url_getter/remote/class_CrawlerRemoteUrlGetterTask.php:64:         * @todo        0%
+./application/hub/main/tasks/crawler/url_crawler/local/class_CrawlerLocalUrlCrawlerTask.php:53:         * @todo        Maybe visit some sub-objects
+./application/hub/main/tasks/crawler/url_crawler/local/class_CrawlerLocalUrlCrawlerTask.php:64:         * @todo        0%
+./application/hub/main/tasks/crawler/url_crawler/remote/class_CrawlerRemoteUrlCrawlerTask.php:53:       * @todo        Maybe visit some sub-objects
+./application/hub/main/tasks/crawler/url_crawler/remote/class_CrawlerRemoteUrlCrawlerTask.php:64:       * @todo        0%
 ./application/hub/main/tasks/crawler/url_source/class_CrawlerUrlSourceFoundRssTask.php:53:      * @todo        Maybe visit some sub-objects
 ./application/hub/main/tasks/crawler/url_source/class_CrawlerUrlSourceLocalStartTask.php:53:    * @todo        Maybe visit some sub-objects
 ./application/hub/main/tasks/crawler/url_source/class_CrawlerUrlSourceRssStartTask.php:53:      * @todo        Maybe visit some sub-objects
 ./application/hub/main/template/requests/class_XmlRequestNodeListTemplateEngine.php:10: * @todo                This template engine does not make use of setTemplateType()
 ./application/hub/main/template/requests/class_XmlRequestNodeListTemplateEngine.php:74:         * @todo        Find something useful with this!
 ./application/hub/main/tools/class_HubTools.php:158:                   // @TODO ((25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9])\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[0-9])):([0-9]{3,5})
-./application/hub/main/tools/class_HubTools.php:248:                   // @TODO Find a better validation than empty()
-./application/hub/main/tools/class_HubTools.php:276:                   // @TODO Find a better validation than empty()
+./application/hub/main/tools/class_HubTools.php:263:                   // @TODO Find a better validation than empty()
+./application/hub/main/tools/class_HubTools.php:291:                   // @TODO Find a better validation than empty()
 ./application/hub/main/wrapper/node/class_NodeDistributedHashTableDatabaseWrapper.php:163:             // @TODO Bad check on UNL, better use a proper validator
 ./application/hub/main/wrapper/node/class_NodeDistributedHashTableDatabaseWrapper.php:209:             // @TODO Bad check on UNL, better use a proper validator
 ./application/hub/main/wrapper/node/class_NodeDistributedHashTableDatabaseWrapper.php:442:             // @TODO Unimplemented part
 ./application/hub/main/wrapper/node/class_NodeDistributedHashTableDatabaseWrapper.php:540:      * @todo        Add timestamp to dataset instance
 ./application/hub/main/wrapper/states/class_PeerStateLookupDatabaseWrapper.php:174:     * @todo        Unfinished area
 ./application/hub/main/wrapper/states/class_PeerStateLookupDatabaseWrapper.php:216:     * @todo        Unfinished area
+./core/inc/classes.php:10: * @todo             Minimize these includes
 ./core/inc/classes/exceptions/main/class_MissingMethodException.php:13: * @todo                Try to rewrite user/guest login classes and mark this exception as deprecated
 ./core/inc/classes/exceptions/main/class_NoConfigEntryException.php:10: * @todo                Rename this class to NoFoundEntryException
 ./core/inc/classes/interfaces/class_FrameworkInterface.php:11: * @todo         Find a better name for this interface
 ./core/inc/classes/interfaces/criteria/extended/class_LocalSearchCriteria.php:30:       * @todo        Find a nice casting here. (int) allows until and including 32766.
 ./core/inc/classes/interfaces/criteria/extended/class_LocalSearchCriteria.php:54:       * @todo        Find a nice casting here. (int) allows until and including 32766.
-./core/inc/classes/main/class_BaseFrameworkSystem.php:1927:     * @todo        Write a logging mechanism for productive mode
-./core/inc/classes/main/class_BaseFrameworkSystem.php:1942:                    // @TODO Finish this part!
-./core/inc/classes/main/class_BaseFrameworkSystem.php:240:     // @todo Try to clean these constants up
-./core/inc/classes/main/class_BaseFrameworkSystem.php:465:             // @TODO __CLASS__ does always return BaseFrameworkSystem but not the extending (=child) class
-./core/inc/classes/main/class_BaseFrameworkSystem.php:539:      * @todo        SearchableResult and UpdateableResult shall have a super interface to use here
+./core/inc/classes/main/class_BaseFrameworkSystem.php:1977:     * @todo        Write a logging mechanism for productive mode
+./core/inc/classes/main/class_BaseFrameworkSystem.php:1992:                    // @TODO Finish this part!
+./core/inc/classes/main/class_BaseFrameworkSystem.php:250:     // @todo Try to clean these constants up
+./core/inc/classes/main/class_BaseFrameworkSystem.php:475:             // @TODO __CLASS__ does always return BaseFrameworkSystem but not the extending (=child) class
+./core/inc/classes/main/class_BaseFrameworkSystem.php:549:      * @todo        SearchableResult and UpdateableResult shall have a super interface to use here
 ./core/inc/classes/main/commands/web/class_WebLoginAreaCommand.php:64:  * @todo        Add some stuff here: Some personal data, app/game related data
 ./core/inc/classes/main/commands/web/class_WebProblemCommand.php:58:    * @todo        0% done
 ./core/inc/classes/main/commands/web/class_WebStatusCommand.php:58:     * @todo        0% done
 ./core/inc/classes/main/controller/web/class_WebStatusController.php:10: * @todo               This controller shall still provide some headlines for sidebars
 ./core/inc/classes/main/criteria/search/class_SearchCriteria.php:102:   * @todo        Find a nice casting here. (int) allows until and including 32766.
 ./core/inc/classes/main/criteria/search/class_SearchCriteria.php:70:    * @todo        Find a nice casting here. (int) allows until and including 32766.
-./core/inc/classes/main/database/databases/class_LocalFileDatabase.php:327:     * @todo        Do some checks on the database directory and files here
-./core/inc/classes/main/database/databases/class_LocalFileDatabase.php:616:     * @todo        Add more generic non-public data for removal
+./core/inc/classes/main/database/backend/class_CachedLocalFileDatabase.php:327:         * @todo        Do some checks on the database directory and files here
+./core/inc/classes/main/database/backend/class_CachedLocalFileDatabase.php:616:         * @todo        Add more generic non-public data for removal
 ./core/inc/classes/main/decorator/template/class_XmlRewriterTemplateDecorator.php:427:  * @todo        Find something useful with this!
 ./core/inc/classes/main/discovery/payment/class_LocalPaymentDiscovery.php:85:   * @todo        0% done
-./core/inc/classes/main/file_directories/class_BaseFileIo.php:162:      * @todo        Handle seekStatus
 ./core/inc/classes/main/file_directories/class_BaseFile.php:135:        * @todo        ~10% done?
 ./core/inc/classes/main/file_directories/class_BaseFile.php:148:        * @todo        Handle seekStatus
+./core/inc/classes/main/file_directories/class_BaseFileIo.php:162:      * @todo        Handle seekStatus
 ./core/inc/classes/main/file_directories/directory/class_FrameworkDirectoryPointer.php:68:      * @todo        Get rid of inConstructor, could be old-lost code.
 ./core/inc/classes/main/file_directories/io_stream/class_FileIoStream.php:270:  * @todo        0% done
 ./core/inc/classes/main/file_directories/io_stream/class_FileIoStream.php:74:   * @todo        This method needs heavy rewrite
 ./core/inc/classes/middleware/compressor/class_CompressorChannel.php:103:                      // @TODO Is there a configurable fall-back compressor needed, or is NullCompressor okay?
 ./core/inc/classes/middleware/debug/class_DebugMiddleware.php:113:                     // @TODO Initialization phase
 ./core/inc/classes/middleware/io/class_FileIoHandler.php:174:   * @todo        0% done
-./core/inc/classes.php:10: * @todo             Minimize these includes
 ./core/inc/classes/third_party/api/wernisportal/class_WernisApi.php:10: * @todo                Out-dated since 0.6-BETA
 ./core/inc/config/class_FrameworkConfiguration.php:115:         * @todo        This method encapsulates a deprecated PHP function and should be deprecated, too.
 ./core/inc/config/class_FrameworkConfiguration.php:223:         * @todo        We have to add some more entries from $_SERVER here
 ./core/inc/loader/class_ClassLoader.php:319:                   /* @TODO: Do not exit here. */
 ./core/inc/output.php:11: * @todo              Minimize these includes
 ./core/inc/selector.php:11: * @todo            Minimize these includes
+./core/index.php:43:    * @todo        This method is old code and needs heavy rewrite and should be moved to ApplicationHelper
 ./index.php:43:         * @todo        This method is old code and needs heavy rewrite and should be moved to ApplicationHelper
 ### ### DEPRECATION FOLLOWS: ### ###
 ./application/hub/main/nodes/class_BaseHubNode.php:46:  * @deprecated
+./core/inc/classes.php:9: * @deprecated
 ./core/inc/classes/exceptions/main/class_MissingMethodException.php:14: * @deprecated  Please do no longer use this exception
 ./core/inc/classes/interfaces/database/backend/class_DatabaseFrontendInterface.php:2:// @DEPRECATED
 ./core/inc/classes/interfaces/database/frontend/class_DatabaseFrontendInterface.php:2:// @DEPRECATED
 ./core/inc/classes/main/database/class_BaseDatabaseFrontend.php:2:// @DEPRECATED
 ./core/inc/classes/main/handler/class_BaseHandler.php:2:// @DEPRECATED
 ./core/inc/classes/main/handler/raw_data/class_BaseRawDataHandler.php:2:// @DEPRECATED
-./core/inc/classes.php:9: * @deprecated
 ./core/inc/database.php:10: * @deprecated
 ./core/inc/hooks.php:2:// @DEPRECATED
 ./core/inc/includes.php:10: * @deprecated