// CFG: CRAWLER-URL-SOURCE-FOUND-RSS-CLASS
$cfg->setConfigEntry('crawler_url_source_found_rss_class', 'CrawlerFoundRssUrlSource');
+// CFG: CRAWLER-URL-STACKS
+$cfg->setConfigEntry('crawler_url_stacks', 'local_start:uploaded_list:rss_start:found_rss');
+
// CFG: CRAWLER-NODE-COMMUNICATOR-TASK-CLASS
$cfg->setConfigEntry('crawler_node_communicator_task_class', 'CrawlerNodeCommunicatorTask');
// CFG: STACKER-CSV-FILE-MAX-SIZE
$cfg->setConfigEntry('stacker_csv_file_max_size', 10);
+// CFG: STACKER-CSV-ENTRY-MAX-SIZE
+$cfg->setConfigEntry('stacker_csv_entry_max_size', 100);
+
// CFG: TASK-CRAWLER-NODE-COMMUNICATOR-STARTUP-DELAY
$cfg->setConfigEntry('task_crawler_node_communicator_startup_delay', 500);
// CFG: TASK-CRAWLER-NODE-COMMUNICATOR-MAX-RUNS
$cfg->setConfigEntry('task_crawler_node_communicator_max_runs', 0);
-// CFG: CRAWLER-LOCAL-URL-GETTER-TASK-CLASS
-$cfg->setConfigEntry('crawler_local_url_getter_task_class', 'CrawlerLocalUrlGetterTask');
+// CFG: CRAWLER-LOCAL-URL-CRAWLER-TASK-CLASS
+$cfg->setConfigEntry('crawler_local_url_crawler_task_class', 'CrawlerLocalUrlCrawlerTask');
-// CFG: TASK-CRAWLER-LOCAL-URL-GETTER-STARTUP-DELAY
-$cfg->setConfigEntry('task_crawler_local_url_getter_startup_delay', 1500);
+// CFG: TASK-CRAWLER-LOCAL-URL-CRAWLER-STARTUP-DELAY
+$cfg->setConfigEntry('task_crawler_local_url_crawler_startup_delay', 1500);
-// CFG: TASK-CRAWLER-LOCAL-URL-GETTER-INTERVAL-DELAY
-$cfg->setConfigEntry('task_crawler_local_url_getter_interval_delay', 200);
+// CFG: TASK-CRAWLER-LOCAL-URL-CRAWLER-INTERVAL-DELAY
+$cfg->setConfigEntry('task_crawler_local_url_crawler_interval_delay', 200);
-// CFG: TASK-CRAWLER-LOCAL-URL-GETTER-MAX-RUNS
-$cfg->setConfigEntry('task_crawler_local_url_getter_max_runs', 0);
+// CFG: TASK-CRAWLER-LOCAL-URL-CRAWLER-MAX-RUNS
+$cfg->setConfigEntry('task_crawler_local_url_crawler_max_runs', 0);
-// CFG: CRAWLER-REMOTE-URL-GETTER-TASK-CLASS
-$cfg->setConfigEntry('crawler_remote_url_getter_task_class', 'CrawlerRemoteUrlGetterTask');
+// CFG: CRAWLER-REMOTE-URL-CRAWLER-TASK-CLASS
+$cfg->setConfigEntry('crawler_remote_url_crawler_task_class', 'CrawlerRemoteUrlCrawlerTask');
-// CFG: TASK-CRAWLER-REMOTE-URL-GETTER-STARTUP-DELAY
-$cfg->setConfigEntry('task_crawler_remote_url_getter_startup_delay', 1500);
+// CFG: TASK-CRAWLER-REMOTE-URL-CRAWLER-STARTUP-DELAY
+$cfg->setConfigEntry('task_crawler_remote_url_crawler_startup_delay', 1500);
-// CFG: TASK-CRAWLER-REMOTE-URL-GETTER-INTERVAL-DELAY
-$cfg->setConfigEntry('task_crawler_remote_url_getter_interval_delay', 200);
+// CFG: TASK-CRAWLER-REMOTE-URL-CRAWLER-INTERVAL-DELAY
+$cfg->setConfigEntry('task_crawler_remote_url_crawler_interval_delay', 200);
-// CFG: TASK-CRAWLER-REMOTE-URL-GETTER-MAX-RUNS
-$cfg->setConfigEntry('task_crawler_remote_url_getter_max_runs', 0);
+// CFG: TASK-CRAWLER-REMOTE-URL-CRAWLER-MAX-RUNS
+$cfg->setConfigEntry('task_crawler_remote_url_crawler_max_runs', 0);
// CFG: CRAWLER-REMOTE-JOB-PUBLISHER-TASK-CLASS
$cfg->setConfigEntry('crawler_remote_job_publisher_task_class', 'CrawlerRemoteJobPublisherTask');
// CFG: CRAWLER-CSV-FILE-PATH
$cfg->setConfigEntry('crawler_csv_file_path', 'data/url_lists');
+// CFG: CRAWLER-URL-LIST-COLUMN-SEPARATOR
+$cfg->setConfigEntry('crawler_url_list_column_separator', ',');
+
///////////////////////////////////////////////////////////////////////////////
// HTTP Configuration
///////////////////////////////////////////////////////////////////////////////
*/
interface UrlSource extends Source {
/**
- * Processes entries in the stack.
+ * Fills the URL stack with new entries from source
*
* @return void
*/
- function processStack ();
+ function fillUrlStack ();
+
+ /**
+ * Determines whether the stack 'urls' is empty.
+ *
+ * @return $isEmpty Whether the stack 'urls' is empty.
+ */
+ function isUrlStackEmpty ();
}
// [EOF]
// Get a new task handler instance
$handlerInstance = ObjectFactory::createObjectByConfiguredName('task_handler_class');
+ // Put the task handler in registry
+ Registry::getRegistry()->addInstance('task_handler', $handlerInstance);
+
/*
* Register all tasks:
*
*/
$taskInstance = ObjectFactory::createObjectByConfiguredName('apt_proxy_listener_task_class');
$handlerInstance->registerTask('apt_proxy_listener', $taskInstance);
-
- // Put the task handler in registry
- Registry::getRegistry()->addInstance('task_handler', $handlerInstance);
}
}
// Get a new task handler instance
$handlerInstance = ObjectFactory::createObjectByConfiguredName('task_handler_class');
+ // Put the task handler in registry
+ Registry::getRegistry()->addInstance('task_handler', $handlerInstance);
+
/*
* Register all tasks:
*
*/
$taskInstance = ObjectFactory::createObjectByConfiguredName('chat_telnet_listener_task_class');
$handlerInstance->registerTask('chat_telnet_listener', $taskInstance);
-
- // Put the task handler in registry
- Registry::getRegistry()->addInstance('task_handler', $handlerInstance);
}
}
// Get a new task handler instance
$handlerInstance = ObjectFactory::createObjectByConfiguredName('task_handler_class');
+ // Put the task handler in registry
+ Registry::getRegistry()->addInstance('task_handler', $handlerInstance);
+
/*
* Register all tasks:
*
$taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_node_communicator_task_class');
$handlerInstance->registerTask('crawler_node_communicator', $taskInstance);
- // 2) Local URL getter (gets URLs locally and adds them to the analyzer's input stack)
- $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_local_url_getter_task_class');
- $handlerInstance->registerTask('crawler_local_url_getter', $taskInstance);
+ // 2) Local URL crawler (gets URLs locally and adds them to the analyzer's input stack)
+ $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_local_url_crawler_task_class');
+ $handlerInstance->registerTask('crawler_local_url_crawler', $taskInstance);
- // 3) Remote URL getter (gets URLs locally for other nodes, also includes the crawled URL in local index)
- $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_remote_url_getter_task_class');
- $handlerInstance->registerTask('crawler_remote_url_getter', $taskInstance);
+ // 3) Remote URL crawler (gets URLs locally for other nodes, also includes the crawled URL in local index)
+ $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_remote_url_crawler_task_class');
+ $handlerInstance->registerTask('crawler_remote_url_crawler', $taskInstance);
// 4) Remote-crawl publisher (publishes crawl jobs for remote retrieval)
$taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_remote_job_publisher_task_class');
$taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_ping_task_class');
$handlerInstance->registerTask('crawler_ping', $taskInstance);
- // 10) URL source: local start
- $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_url_source_local_start_task_class');
- $handlerInstance->registerTask('crawler_url_source_local_start', $taskInstance);
-
- // 11) URL source: uploaded list
- $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_url_source_uploaded_list_task_class');
- $handlerInstance->registerTask('crawler_url_source_uploaded_list', $taskInstance);
+ // 10) URL sources
+ foreach (explode(':', $this->getConfigInstance()->getConfigEntry('crawler_url_stacks')) as $stack) {
+ // Init task instance
+ $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_url_source_' . $stack . '_task_class');
- // 12) URL source: RSS feed
- $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_url_source_rss_start_task_class');
- $handlerInstance->registerTask('crawler_url_source_rss_start', $taskInstance);
+ // And register it
+ $handlerInstance->registerTask('crawler_url_source_' . $stack, $taskInstance);
+ } // END - foreach
- // 13) URL source: found RSS/ATOM feed
- $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_url_source_found_rss_task_class');
- $handlerInstance->registerTask('crawler_url_source_found_rss', $taskInstance);
-
- // 14) Uploaded list scanner (checks for wanted files)
+ // 11) Uploaded list scanner (checks for wanted files)
$taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_uploaded_list_scanner_task_class');
$handlerInstance->registerTask('crawler_uploaded_list_scanner', $taskInstance);
-
- // Put the task handler in registry
- Registry::getRegistry()->addInstance('task_handler', $handlerInstance);
}
}
// Get a new task handler instance
$handlerInstance = ObjectFactory::createObjectByConfiguredName('task_handler_class');
+ // Put the task handler in registry
+ Registry::getRegistry()->addInstance('task_handler', $handlerInstance);
+
// Register all tasks:
//
// 1) A task for fetching WUs (work units) or test units
*/
$taskInstance = ObjectFactory::createObjectByConfiguredName('cruncher_key_producer_task_class');
$handlerInstance->registerTask('cruncher_key_producer', $taskInstance);
-
- // Put the task handler in registry
- Registry::getRegistry()->addInstance('task_handler', $handlerInstance);
}
}
// Get a new task handler instance
$handlerInstance = ObjectFactory::createObjectByConfiguredName('task_handler_class');
+ // Put the task handler in registry
+ Registry::getRegistry()->addInstance('task_handler', $handlerInstance);
+
// Register all tasks:
//
// 1) A task for fetching blocks from the network
*/
$taskInstance = ObjectFactory::createObjectByConfiguredName('miner_node_communicator_task_class');
$handlerInstance->registerTask('miner_node_communicator', $taskInstance);
-
- // Put the task handler in registry
- Registry::getRegistry()->addInstance('task_handler', $handlerInstance);
}
}
// Get a new task handler instance
$handlerInstance = ObjectFactory::createObjectByConfiguredName('task_handler_class');
+ // Put the task handler in registry
+ Registry::getRegistry()->addInstance('task_handler', $handlerInstance);
+
// Prepare a package-tags initialization task for the listeners
$taskInstance = ObjectFactory::createObjectByConfiguredName('node_package_tags_init_task_class');
// Register it
$handlerInstance->registerTask('ping', $taskInstance);
- // Put the task handler in registry
- Registry::getRegistry()->addInstance('task_handler', $handlerInstance);
-
/*
* Allow extra node-depending tasks, e.g. the bootstrapper node needs
* booting its DHT. DHTs are decentralized and are working on
$resultInstance = $nodeInstance->getWrapperInstance()->doSelectByCriteria($searchInstance);
// Is the result valid?
- if ((!$resultInstance->valid()) || (! $resultInstance->next())) {
+ if ((!$resultInstance->valid()) || (!$resultInstance->next())) {
// Node not found in database, this could mean that your database file is damaged.
return NULL;
} // END - if
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
class BaseUrlSource extends BaseSource {
+ // Stack name for all URLs
+ const STACKER_NAME_URLS = 'urls';
+
+ // Array elements for CSV data array
+ const CRAWL_JOB_ARRAY_START_URL = 'start_url';
+ const CRAWL_JOB_ARRAY_DEPTH = 'start_depth';
+ const CRAWL_JOB_ARRAY_EXTERNAL_DEPTH = 'external_depth';
+
/**
* Protected constructor
*
*
* @return $isEmpty Whether the stack 'urls' is empty.
*/
- protected function isUrlStackEmpty () {
+ public function isUrlStackEmpty () {
// Determine it
- $isEmpty = $this->getStackInstance()->isStackEmpty('urls');
+ $isEmpty = $this->getStackInstance()->isStackEmpty(self::STACKER_NAME_URLS);
// Return result
return $isEmpty;
}
+
+ /**
+ * Enriches the given associative array with more data, now at least 2
+ * elements are required:
+ *
+ * 'start_url' - Starting URL
+ * 'start_depth' - Crawl depth for starting URL
+ *
+ * @param $crawlData Array with partial data for being queued
+ * @return void
+ * @todo ~10% done
+ */
+ protected function enrichCrawlerQueueData (array &$crawlData) {
+ // Debug message
+ //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput('CRAWLER-SOURCE [' . __METHOD__ . ':' . __LINE__ . ']: crawlData()=' . count($crawlData) . ' - CALLED!');
+
+ // Check for minimum array elements
+ assert(isset($crawlData[self::CRAWL_JOB_ARRAY_START_URL]));
+ assert(isset($crawlData[self::CRAWL_JOB_ARRAY_DEPTH]));
+
+ // @TODO Add more elements
+
+ // Debug message
+ //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput('CRAWLER-SOURCE [' . __METHOD__ . ':' . __LINE__ . ']: EXIT!');
+ }
+
+ /**
+ * Enqueues given crawler array in assigned file-based stack
+ *
+ * @param $crawlData Array with partial data for being queued
+ * @return void
+ */
+ protected function enqueueInFileStack (array $crawlData) {
+ // Debug message
+ //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput('CRAWLER-SOURCE [' . __METHOD__ . ':' . __LINE__ . ']: crawlData()=' . count($crawlData) . ' - CALLED!');
+
+ // Get the stack instance and enqueue it
+ $this->getStackInstance()->pushNamed(self::STACKER_NAME_URLS, $crawlData);
+
+ // Debug message
+ //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput('CRAWLER-SOURCE [' . __METHOD__ . ':' . __LINE__ . ']: EXIT!');
+ }
}
// [EOF]
}
/**
- * Processes entries in the stack.
+ * Fills the URL stack with new entries from source
*
* @return void
- * @todo ~10% done
+ * @todo 0% done
*/
- public function processStack () {
- // Does the stack have some entries left?
- if ($this->isUrlStackEmpty()) {
- // Nothing to handle here
- return;
- } // END - if
-
+ public function fillUrlStack () {
$this->partialStub('Please implement this method.');
}
}
}
/**
- * Processes entries in the stack.
+ * Fills the URL stack with new entries from source
*
* @return void
- * @todo ~10% done
+ * @todo 0% done
*/
- public function processStack () {
- // Does the stack have some entries left?
- if ($this->isUrlStackEmpty()) {
- // Nothing to handle here
- return;
- } // END - if
-
+ public function fillUrlStack () {
$this->partialStub('Please implement this method.');
}
}
}
/**
- * Processes entries in the stack.
+ * Fills the URL stack with new entries from source
*
* @return void
- * @todo ~10% done
+ * @todo 0% done
*/
- public function processStack () {
- // Does the stack have some entries left?
- if ($this->isUrlStackEmpty()) {
- // Nothing to handle here
- return;
- } // END - if
-
+ public function fillUrlStack () {
$this->partialStub('Please implement this method.');
}
}
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
class CrawlerUploadedListUrlSource extends BaseUrlSource implements UrlSource, Registerable {
+ /**
+ * Stack name for a CSV file
+ */
+ const STACK_NAME_CSV_FILE = 'csv_file';
+
+ /**
+ * Stack name for a CSV entry
+ */
+ const STACK_NAME_CSV_ENTRY = 'csv_entry';
+
+ /**
+ * Size of crawl (CSV) entry which is an indexed array:
+ *
+ * 0 = URL to crawl
+ * 1 = Crawl depth of URL
+ * 2 = Crawl depth of linked URLs (same other host only)
+ */
+ const CRAWL_ENTRY_SIZE = 3;
+
/**
* "Cached" CSV path
*/
private $stackSourceInstance = NULL;
/**
- * Stack name for a CSV file
+ * "Imported" CSV files
*/
- const STACK_NAME_CSV_FILE = 'csv_file';
+ private $csvFileImported = array();
/**
- * "Imported" CSV files
+ * "Cached" separator for columns
*/
- private $csvFileImported = array();
+ private $columnSeparator = '';
/**
* Protected constructor
// Init stack instance
$this->stackSourceInstance = ObjectFactory::createObjectByConfiguredName('crawler_uploaded_list_url_source_stack_class');
- // Init stack
+ // Init stacks
$this->getStackSourceInstance()->initStack(self::STACK_NAME_CSV_FILE);
+ $this->getStackSourceInstance()->initStack(self::STACK_NAME_CSV_ENTRY);
+
+ // "Cache" column separator
+ $this->columnSeparator = $this->getConfigInstance()->getConfigEntry('crawler_url_list_column_separator');
}
/**
$directoryEntry = $this->getDirectoryInstance()->readDirectoryExcept(array_merge(array('.htaccess', '.', '..'), $this->csvFileImported));
// Debug message
- /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput('CRAWLER-SOURCE[' . __METHOD__ . ':' . __LINE__ . '] directoryEntry(' . strlen($directoryEntry) . ')=' . $directoryEntry);
+ //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput('CRAWLER-SOURCE[' . __METHOD__ . ':' . __LINE__ . '] directoryEntry(' . strlen($directoryEntry) . ')=' . $directoryEntry);
// Is it empty or wrong file extension?
if ((empty($directoryEntry)) || (substr($directoryEntry, -4, 4) != '.csv')) {
// Skip further processing
- /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput('CRAWLER-SOURCE[' . __METHOD__ . ':' . __LINE__ . '] directoryEntry(' . strlen($directoryEntry) . ')=' . $directoryEntry . ' - SKIPPED!');
+ //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput('CRAWLER-SOURCE[' . __METHOD__ . ':' . __LINE__ . '] directoryEntry(' . strlen($directoryEntry) . ')=' . $directoryEntry . ' - SKIPPED!');
return FALSE;
} // END - if
return $sourceInstance;
}
+ /**
+ * Enriches and saves the given CSV entry (array) in the assigned
+ * file-based stack. To such entry a lot more informations are added, such
+ * as which files shall be crawled and many more.
+ *
+ * @param $csvData Array with data from a CSV file
+ * @return void
+ */
+ private function saveCsvDataInCrawlerQueue (array $csvData) {
+ // Debug message
+ //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput('CRAWLER-SOURCE [' . __METHOD__ . ':' . __LINE__ . ']: csvData()=' . count($csvData) . ' - CALLED!');
+
+ // The array has 3 elements, later enhancements may accept more
+ assert(count($csvData) == self::CRAWL_ENTRY_SIZE);
+
+ /*
+ * First converted the indexed array into an assoziative array. Don't
+ * forget to expand this array as well when you want to add another
+ * column to the CSV file.
+ */
+ $csvArray = array(
+ self::CRAWL_JOB_ARRAY_START_URL => $csvData[0],
+ self::CRAWL_JOB_ARRAY_DEPTH => $csvData[1],
+ self::CRAWL_JOB_ARRAY_EXTERNAL_DEPTH => $csvData[2]
+ );
+
+ // Debug message
+ //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput('CRAWLER-SOURCE [' . __METHOD__ . ':' . __LINE__ . ']: csvArray()=' . count($csvArray) . ' - BEFORE!');
+
+ // Then add more data to it
+ $this->enrichCrawlerQueueData($csvArray);
+
+ // Debug message
+ //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput('CRAWLER-SOURCE [' . __METHOD__ . ':' . __LINE__ . ']: csvArray()=' . count($csvArray) . ' - AFTER!');
+
+ /*
+ * Then enqueue it in the file stack. The local crawler "task" will
+ * then pick this up.
+ */
+ $this->enqueueInFileStack($csvArray);
+
+ // Debug message
+ //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput('CRAWLER-SOURCE [' . __METHOD__ . ':' . __LINE__ . ']: EXIT!');
+ }
+
/**
* Checks whether a CSV file has been loaded (added to the stack)
*
- * @return $isLoaded Whether a CSV file has been loaded
+ * @return $isAdded Whether a CSV file has been loaded
*/
private function isCsvFileAdded () {
// Check whether the stacker is not empty
- $isLoaded = (($this->getStackSourceInstance()->isStackInitialized(self::STACK_NAME_CSV_FILE)) && (!$this->getStackSourceInstance()->isStackEmpty(self::STACK_NAME_CSV_FILE)));
+ $isAdded = (($this->getStackSourceInstance()->isStackInitialized(self::STACK_NAME_CSV_FILE)) && (!$this->getStackSourceInstance()->isStackEmpty(self::STACK_NAME_CSV_FILE)));
+
+ // Return the result
+ return $isAdded;
+ }
+
+ /**
+ * Checks whether a CSV entry has been added to the stack
+ *
+ * @return $isAdded Whether a CSV entry has been added
+ */
+ private function isCsvEntryAdded () {
+ // Check whether the stacker is not empty
+ $isAdded = (($this->getStackSourceInstance()->isStackInitialized(self::STACK_NAME_CSV_ENTRY)) && (!$this->getStackSourceInstance()->isStackEmpty(self::STACK_NAME_CSV_ENTRY)));
// Return the result
- return $isLoaded;
+ return $isAdded;
}
/**
}
/**
- * Parses the next stacked CSV by reading only one line from it. Then the
- * read line is being validated and if found good being feed to the next
+ * Parses the next stacked CSV file by reading only one line from it. Then
+ * the read line is being validated and if found good being feed to the next
* stack. The file is removed from stack only if it has been fully parsed.
*
* @return void
*/
- private function parseCsvEntry () {
+ private function parseCsvFile () {
+ // Debug message
+ //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput('CRAWLER-SOURCE [' . __METHOD__ . ':' . __LINE__ . ']: CALLED!');
+
+ // Get next entry
+ $csvFileInstance = $this->getStackSourceInstance()->popNamed(self::STACK_NAME_CSV_FILE);
+
+ // Read full "CSV line"
+ $csvData = $csvFileInstance->readCsvFileLine($this->columnSeparator);
+
// Debug message
- /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput('CRAWLER-SOURCE [' . __METHOD__ . ':' . __LINE__ . ']: CALLED!');
+ //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput('CRAWLER-SOURCE [' . __METHOD__ . ':' . __LINE__ . ']: csvData[' . gettype($csvData) . ']=' . print_r($csvData, TRUE));
+
+ // Expect always an array
+ assert(is_array($csvData));
+
+ // Is the array empty?
+ if (count($csvData) == 0) {
+ // Debug message
+ //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput('CRAWLER-SOURCE [' . __METHOD__ . ':' . __LINE__ . ']: File ' . $csvFileInstance->getFileName() . ' has been fully read.');
+
+ // Try to close it by actually unsetting (destructing) it
+ unset($csvFileInstance);
+
+ // This file as been fully read, so don't push it back on stack.
+ return;
+ } // END - if
+
+ // ... with 3 elements, later enhancements may accept more
+ assert(count($csvData) == self::CRAWL_ENTRY_SIZE);
+
+ /*
+ * Push the file back on stack as it may contain more entries. This way
+ * all files got rotated on stack which may improve crawler performance.
+ */
+ $this->getStackSourceInstance()->pushNamed(self::STACK_NAME_CSV_FILE, $csvFileInstance);
+
+ // Push array on next stack
+ $this->getStackSourceInstance()->pushNamed(self::STACK_NAME_CSV_ENTRY, $csvData);
// Debug message
- /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput('CRAWLER-SOURCE [' . __METHOD__ . ':' . __LINE__ . ']: EXIT!');
+ //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput('CRAWLER-SOURCE [' . __METHOD__ . ':' . __LINE__ . ']: EXIT!');
}
+ /**
+ * Parses the next stacked CSV entry.
+ *
+ * @return void
+ */
+ private function parseCsvEntry () {
+ // Debug message
+ //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput('CRAWLER-SOURCE [' . __METHOD__ . ':' . __LINE__ . ']: CALLED!');
+
+ // Pop it from stack
+ $csvData = $this->getStackSourceInstance()->popNamed(self::STACK_NAME_CSV_ENTRY);
+
+ // Debug message
+ //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput('CRAWLER-SOURCE [' . __METHOD__ . ':' . __LINE__ . ']: csvData[' . gettype($csvData) . ']=' . print_r($csvData, TRUE));
+
+ // It must have 3 elements (see method parseCsvFile() for details)
+ assert(count($csvData) == self::CRAWL_ENTRY_SIZE);
+
+ // Save it in crawler queue (which will enrich it with way more informations
+ $this->saveCsvDataInCrawlerQueue($csvData);
+
+ // Debug message
+ //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput('CRAWLER-SOURCE [' . __METHOD__ . ':' . __LINE__ . ']: EXIT!');
+ }
/**
* Getter for stackSourceInstance variable
}
/**
- * Processes entries in the stack.
+ * Fills the URL stack with new entries from source
*
* @return void
- * @todo ~20% done
+ * @todo ~40% done
*/
- public function processStack () {
+ public function fillUrlStack () {
// Does the stack have some entries left?
- if ($this->isCsvFileAdded()) {
+ if ($this->isCsvEntryAdded()) {
/*
* A CSV file has been found and "imported" (added to stack). Now
* the file can be read line by line and checked every one of it.
*/
$this->parseCsvEntry();
+ } elseif ($this->isCsvFileAdded()) {
+ /*
+ * A CSV file has been found and "imported" (added to stack). Now
+ * the file can be read line by line and checked every one of it.
+ */
+ $this->parseCsvFile();
} elseif ($this->isCsvFileFound()) {
/*
* A file containing an URL list is found. Please note the format is
* depth, handling of 3rd-party URLs and such.
*/
$this->addCsvFile();
- } elseif (!$this->isUrlStackEmpty()) {
- /*
- * Handle next entry. This method will be called very often, so need
- * to process more than one entry at a time.
- */
- $this->processNextEntry();
}
$this->partialStub('Please implement this method.');
--- /dev/null
+<?php
+/**
+ * A general URL source Task
+ *
+ * @author Roland Haeder <webmaster@shipsimu.org>
+ * @version 0.0.0
+ * @copyright Copyright (c) 2007, 2008 Roland Haeder, 2009 - 2014 Hub Developer Team
+ * @license GNU GPL 3.0 or any newer version
+ * @link http://www.shipsimu.org
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+class BaseUrlSourceTask extends BaseTask {
+ /**
+ * Protected constructor
+ *
+ * @param $className Name of the class
+ * @return void
+ */
+ protected function __construct ($className) {
+ // Call parent constructor
+ parent::__construct($className);
+ }
+
+ /**
+ * Initializes URL source task (to keep the constructor small)
+ *
+ * @return void
+ */
+ protected function initUrlSourceTask () {
+ // Get source instance
+ $sourceInstance = UrlSourceObjectFactory::createUrlSourceInstance($this);
+
+ // And set it here
+ $this->setUrlSourceInstance($sourceInstance);
+ }
+}
+
+// [EOF]
+?>
--- /dev/null
+Deny from all
--- /dev/null
+Deny from all
--- /dev/null
+<?php
+/**
+ * A LocalUrlCrawler task for crawlers
+ *
+ * @author Roland Haeder <webmaster@ship-simu.org>
+ * @version 0.0.0
+ * @copyright Copyright (c) 2014 Crawler Developer Team
+ * @license GNU GPL 3.0 or any newer version
+ * @link http://www.ship-simu.org
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+class CrawlerLocalUrlCrawlerTask extends BaseTask implements Taskable, Visitable {
+ /**
+ * Protected constructor
+ *
+ * @return void
+ */
+ protected function __construct () {
+ // Call parent constructor
+ parent::__construct(__CLASS__);
+ }
+
+ /**
+ * Creates an instance of this class
+ *
+ * @return $taskInstance An instance of a Visitable class
+ */
+ public final static function createCrawlerLocalUrlCrawlerTask () {
+ // Get new instance
+ $taskInstance = new CrawlerLocalUrlCrawlerTask();
+
+ // Return the prepared instance
+ return $taskInstance;
+ }
+
+ /**
+ * Accepts the visitor to process the visitor
+ *
+ * @param $visitorInstance An instance of a Visitor class
+ * @return void
+ * @todo Maybe visit some sub-objects
+ */
+ public function accept (Visitor $visitorInstance) {
+ // Visit this task
+ $visitorInstance->visitTask($this);
+ }
+
+ /**
+ * Executes the task
+ *
+ * @return void
+ * @todo 0%
+ */
+ public function executeTask () {
+ $this->partialStub('Unimplemented task.');
+ }
+}
+
+// [EOF]
+?>
--- /dev/null
+Deny from all
--- /dev/null
+<?php
+/**
+ * A RemoteUrlCrawler task for crawlers
+ *
+ * @author Roland Haeder <webmaster@ship-simu.org>
+ * @version 0.0.0
+ * @copyright Copyright (c) 2014 Crawler Developer Team
+ * @license GNU GPL 3.0 or any newer version
+ * @link http://www.ship-simu.org
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+class CrawlerRemoteUrlCrawlerTask extends BaseTask implements Taskable, Visitable {
+ /**
+ * Protected constructor
+ *
+ * @return void
+ */
+ protected function __construct () {
+ // Call parent constructor
+ parent::__construct(__CLASS__);
+ }
+
+ /**
+ * Creates an instance of this class
+ *
+ * @return $taskInstance An instance of a Visitable class
+ */
+ public final static function createCrawlerRemoteUrlCrawlerTask () {
+ // Get new instance
+ $taskInstance = new CrawlerRemoteUrlCrawlerTask();
+
+ // Return the prepared instance
+ return $taskInstance;
+ }
+
+ /**
+ * Accepts the visitor to process the visitor
+ *
+ * @param $visitorInstance An instance of a Visitor class
+ * @return void
+ * @todo Maybe visit some sub-objects
+ */
+ public function accept (Visitor $visitorInstance) {
+ // Visit this task
+ $visitorInstance->visitTask($this);
+ }
+
+ /**
+ * Executes the task
+ *
+ * @return void
+ * @todo 0%
+ */
+ public function executeTask () {
+ $this->partialStub('Unimplemented task.');
+ }
+}
+
+// [EOF]
+?>
+++ /dev/null
-Deny from all
+++ /dev/null
-Deny from all
+++ /dev/null
-<?php
-/**
- * A LocalUrlGetter task for crawlers
- *
- * @author Roland Haeder <webmaster@ship-simu.org>
- * @version 0.0.0
- * @copyright Copyright (c) 2014 Crawler Developer Team
- * @license GNU GPL 3.0 or any newer version
- * @link http://www.ship-simu.org
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
-class CrawlerLocalUrlGetterTask extends BaseTask implements Taskable, Visitable {
- /**
- * Protected constructor
- *
- * @return void
- */
- protected function __construct () {
- // Call parent constructor
- parent::__construct(__CLASS__);
- }
-
- /**
- * Creates an instance of this class
- *
- * @return $taskInstance An instance of a Visitable class
- */
- public final static function createCrawlerLocalUrlGetterTask () {
- // Get new instance
- $taskInstance = new CrawlerLocalUrlGetterTask();
-
- // Return the prepared instance
- return $taskInstance;
- }
-
- /**
- * Accepts the visitor to process the visitor
- *
- * @param $visitorInstance An instance of a Visitor class
- * @return void
- * @todo Maybe visit some sub-objects
- */
- public function accept (Visitor $visitorInstance) {
- // Visit this task
- $visitorInstance->visitTask($this);
- }
-
- /**
- * Executes the task
- *
- * @return void
- * @todo 0%
- */
- public function executeTask () {
- $this->partialStub('Unimplemented task.');
- }
-}
-
-// [EOF]
-?>
+++ /dev/null
-Deny from all
+++ /dev/null
-<?php
-/**
- * A RemoteUrlGetter task for crawlers
- *
- * @author Roland Haeder <webmaster@ship-simu.org>
- * @version 0.0.0
- * @copyright Copyright (c) 2014 Crawler Developer Team
- * @license GNU GPL 3.0 or any newer version
- * @link http://www.ship-simu.org
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
-class CrawlerRemoteUrlGetterTask extends BaseTask implements Taskable, Visitable {
- /**
- * Protected constructor
- *
- * @return void
- */
- protected function __construct () {
- // Call parent constructor
- parent::__construct(__CLASS__);
- }
-
- /**
- * Creates an instance of this class
- *
- * @return $taskInstance An instance of a Visitable class
- */
- public final static function createCrawlerRemoteUrlGetterTask () {
- // Get new instance
- $taskInstance = new CrawlerRemoteUrlGetterTask();
-
- // Return the prepared instance
- return $taskInstance;
- }
-
- /**
- * Accepts the visitor to process the visitor
- *
- * @param $visitorInstance An instance of a Visitor class
- * @return void
- * @todo Maybe visit some sub-objects
- */
- public function accept (Visitor $visitorInstance) {
- // Visit this task
- $visitorInstance->visitTask($this);
- }
-
- /**
- * Executes the task
- *
- * @return void
- * @todo 0%
- */
- public function executeTask () {
- $this->partialStub('Unimplemented task.');
- }
-}
-
-// [EOF]
-?>
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
-class CrawlerUrlSource???Task extends BaseTask implements Taskable, Visitable {
+class CrawlerUrlSource???Task extends BaseUrlSourceTask implements Taskable, Visitable {
/**
* Protected constructor
*
* @return void
*/
public function executeTask () {
- // Get the URL source instance and announce us
- UrlSourceObjectFactory::createUrlSourceInstance($this)->processStack();
+ // Get source instance
+ $sourceInstance = $this->getUrlSourceInstance();
+
+ // Is it not set?
+ if (is_null($sourceInstance)) {
+ // Initialize it
+ $this->initUrlSourceTask();
+
+ // And re-get it
+ $sourceInstance = $this->getUrlSourceInstance();
+ } // END - if
+
+ // Get the URL source instance and fill the stack with crawl entries
+ $sourceInstance->fillUrlStack();
}
}
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
-class CrawlerUrlSourceFoundRssTask extends BaseTask implements Taskable, Visitable {
+class CrawlerUrlSourceFoundRssTask extends BaseUrlSourceTask implements Taskable, Visitable {
/**
* Protected constructor
*
* @return void
*/
public function executeTask () {
- // Get the URL source instance and announce us
- UrlSourceObjectFactory::createUrlSourceInstance($this)->processStack();
+ // Get source instance
+ $sourceInstance = $this->getUrlSourceInstance();
+
+ // Is it not set?
+ if (is_null($sourceInstance)) {
+ // Initialize it
+ $this->initUrlSourceTask();
+
+ // And re-get it
+ $sourceInstance = $this->getUrlSourceInstance();
+ } // END - if
+
+ // Get the URL source instance and fill the stack with crawl entries
+ $sourceInstance->fillUrlStack();
}
}
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
-class CrawlerUrlSourceLocalStartTask extends BaseTask implements Taskable, Visitable {
+class CrawlerUrlSourceLocalStartTask extends BaseUrlSourceTask implements Taskable, Visitable {
/**
* Protected constructor
*
* @return void
*/
public function executeTask () {
- // Get the URL source instance and announce us
- UrlSourceObjectFactory::createUrlSourceInstance($this)->processStack();
+ // Get source instance
+ $sourceInstance = $this->getUrlSourceInstance();
+
+ // Is it not set?
+ if (is_null($sourceInstance)) {
+ // Initialize it
+ $this->initUrlSourceTask();
+
+ // And re-get it
+ $sourceInstance = $this->getUrlSourceInstance();
+ } // END - if
+
+ // Get the URL source instance and fill the stack with crawl entries
+ $sourceInstance->fillUrlStack();
}
}
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
-class CrawlerUrlSourceRssStartTask extends BaseTask implements Taskable, Visitable {
+class CrawlerUrlSourceRssStartTask extends BaseUrlSourceTask implements Taskable, Visitable {
/**
* Protected constructor
*
* @return void
*/
public function executeTask () {
- // Get the URL source instance and announce us
- UrlSourceObjectFactory::createUrlSourceInstance($this)->processStack();
+ // Get source instance
+ $sourceInstance = $this->getUrlSourceInstance();
+
+ // Is it not set?
+ if (is_null($sourceInstance)) {
+ // Initialize it
+ $this->initUrlSourceTask();
+
+ // And re-get it
+ $sourceInstance = $this->getUrlSourceInstance();
+ } // END - if
+
+ // Get the URL source instance and fill the stack with crawl entries
+ $sourceInstance->fillUrlStack();
}
}
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
-class CrawlerUrlSourceUploadedListTask extends BaseTask implements Taskable, Visitable {
+class CrawlerUrlSourceUploadedListTask extends BaseUrlSourceTask implements Taskable, Visitable {
/**
* Protected constructor
*
* @return void
*/
public function executeTask () {
- // Get the URL source instance and announce us
- UrlSourceObjectFactory::createUrlSourceInstance($this)->processStack();
+ // Get source instance
+ $sourceInstance = $this->getUrlSourceInstance();
+
+ // Is it not set?
+ if (is_null($sourceInstance)) {
+ // Initialize it
+ $this->initUrlSourceTask();
+
+ // And re-get it
+ $sourceInstance = $this->getUrlSourceInstance();
+ } // END - if
+
+ // Get the URL source instance and fill the stack with crawl entries
+ $sourceInstance->fillUrlStack();
}
}
-Subproject commit 4648ede712da5257e96eb4d88f0cc01cb2890740
+Subproject commit daff0462a4ebcae895e5946acc0906fbd93618b8
### WARNING: THIS FILE IS AUTO-GENERATED BY ./todo-builder.sh ###
### DO NOT EDIT THIS FILE. ###
-./application/hub/config.php:775:// @TODO This and the next value is very static again
-./application/hub/config.php:839:// @TODO This is very static, rewrite it to more flexible
+./application/hub/config.php:772:// @TODO This and the next value is very static again
+./application/hub/config.php:836:// @TODO This is very static, rewrite it to more flexible
./application/hub/interfaces/apt-proxy/class_AptProxy.php:10: * @todo We need to find a better name for this interface
./application/hub/interfaces/blocks/class_Minable.php:10: * @todo We need to find a better name for this interface
./application/hub/interfaces/chat/class_Chatter.php:10: * @todo We need to find a better name for this interface
./application/hub/interfaces/wrapper/class_NodeDhtWrapper.php:122: * @todo Add minimum/maximum age limitations
./application/hub/interfaces/wrapper/class_NodeDhtWrapper.php:132: * @todo Add timestamp to dataset instance
./application/hub/main/chains/class_PackageFilterChain.php:54: * @todo This may be slow if a message with a lot tags arrived
-./application/hub/main/class_BaseHubSystem.php:577: // @TODO On some systems it is 134, on some 107?
+./application/hub/main/class_BaseHubSystem.php:604: // @TODO On some systems it is 134, on some 107?
./application/hub/main/commands/console/class_HubConsoleAptProxyCommand.php:107: * @todo Should we add some more filters?
./application/hub/main/commands/console/class_HubConsoleAptProxyCommand.php:58: * @todo Try to create a AptProxyActivationTask or so
./application/hub/main/commands/console/class_HubConsoleChatCommand.php:107: * @todo Should we add some more filters?
./application/hub/main/dht/class_BaseDht.php:253: * @todo Switch flag 'accept_bootstrap'
./application/hub/main/dht/class_BaseDht.php:86: * @todo Find more to do here
./application/hub/main/dht/node/class_NodeDhtFacade.php:61: * @todo Does this data need to be enriched with more meta data?
-./application/hub/main/discovery/protocol/class_ProtocolDiscovery.php:71: // @TODO Add some validation here???
+./application/hub/main/discovery/protocol/class_ProtocolDiscovery.php:94: // @TODO Add some validation here???
+./application/hub/main/discovery/recipient/package/class_PackageRecipientDiscovery.php:115: // @TODO Unfinished: $this->getListInstance()->addEntry('unl', $decodedData[NetworkPackage::PACKAGE_DATA_RECIPIENT]);
./application/hub/main/discovery/recipient/package/class_PackageRecipientDiscovery.php:86: * @todo Add some validation of recipient field, e.g. an Universal Node Locator is found
-./application/hub/main/discovery/recipient/package/class_PackageRecipientDiscovery.php:87: * @todo The if() does only check for TCP, not UDP, e.g. try to get a $handlerInstance here
+./application/hub/main/discovery/recipient/package/class_PackageRecipientDiscovery.php:87: * @todo Enrich both messages with recipient data
+./application/hub/main/discovery/recipient/socket/class_PackageSocketDiscovery.php:159: // @TODO FIXME: I don't like these abuse of variables, better strict types
./application/hub/main/factories/handler/class_ProtocolHandlerFactory.php:10: * @todo Unfinished stuff
./application/hub/main/factories/socket/class_SocketFactory.php:10: * @todo Find an interface for hub helper
./application/hub/main/filter/apt-proxy/class_AptProxyInitializationFilter.php:54: * @todo 0% done
./application/hub/main/handler/message-types/self-connect/class_NodeMessageSelfConnectHandler.php:71: // @TODO Throw an exception here instead of dying
./application/hub/main/handler/network/class_BaseRawDataHandler.php:148: * @todo This method will be moved to a better place
./application/hub/main/handler/network/udp/class_UdpRawDataHandler.php:58: * @todo 0%
+./application/hub/main/handler/protocol/class_BaseProtocolHandler.php:110: * @TODO If you know why, please fix and explain it to me.
./application/hub/main/handler/tasks/class_TaskHandler.php:139: // @TODO Messurement can be added around this call
./application/hub/main/helper/class_BaseHubSystemHelper.php:87: * @todo 0% done
-./application/hub/main/helper/connection/class_BaseConnectionHelper.php:204: // @TODO Move this to the socket error handler
-./application/hub/main/helper/connection/class_BaseConnectionHelper.php:232: * @todo Rewrite the while() loop to a iterator to not let the software stay very long here
-./application/hub/main/helper/connection/tcp/class_TcpConnectionHelper.php:10: * @todo Find an interface for hub helper
-./application/hub/main/helper/connection/tcp/class_TcpConnectionHelper.php:110: // @TODO Rewrite this test for UNLs
-./application/hub/main/helper/connection/tcp/class_TcpConnectionHelper.php:117: // @TODO Rewrite this test for UNLs
-./application/hub/main/helper/connection/tcp/class_TcpConnectionHelper.php:147: * @todo We may want to implement a filter for ease notification of other objects like our pool
-./application/hub/main/helper/connection/tcp/class_TcpConnectionHelper.php:49: * @todo $errorCode/-Message are now in handleSocketError()'s call-back methods
-./application/hub/main/helper/connection/tcp/class_TcpConnectionHelper.php:89: // @TODO The whole resolving part should be moved out and made more configurable
-./application/hub/main/helper/connection/udp/class_UdpConnectionHelper.php:10: * @todo Find an interface for hub helper
-./application/hub/main/helper/connection/udp/class_UdpConnectionHelper.php:56: * @todo Implement a filter for ease notification of other objects like the pool
+./application/hub/main/helper/connection/ipv4/class_BaseIpV4ConnectionHelper.php:105: * @todo Rewrite the while() loop to a iterator to not let the software stay very long here
+./application/hub/main/helper/connection/ipv4/class_BaseIpV4ConnectionHelper.php:10: * @todo Find an interface for hub helper
+./application/hub/main/helper/connection/ipv4/class_BaseIpV4ConnectionHelper.php:77: // @TODO Move this to the socket error handler
+./application/hub/main/helper/connection/ipv4/tcp/class_TcpConnectionHelper.php:10: * @todo Find an interface for hub helper
+./application/hub/main/helper/connection/ipv4/tcp/class_TcpConnectionHelper.php:118: // @TODO Rewrite this test for UNLs
+./application/hub/main/helper/connection/ipv4/tcp/class_TcpConnectionHelper.php:125: // @TODO Rewrite this test for UNLs
+./application/hub/main/helper/connection/ipv4/tcp/class_TcpConnectionHelper.php:155: * @todo We may want to implement a filter for ease notification of other objects like our pool
+./application/hub/main/helper/connection/ipv4/tcp/class_TcpConnectionHelper.php:49: * @todo $errorCode/-Message are now in handleSocketError()'s call-back methods
+./application/hub/main/helper/connection/ipv4/tcp/class_TcpConnectionHelper.php:89: // @TODO The whole resolving part should be moved out and made more configurable
+./application/hub/main/helper/connection/ipv4/udp/class_UdpConnectionHelper.php:10: * @todo Find an interface for hub helper
+./application/hub/main/helper/connection/ipv4/udp/class_UdpConnectionHelper.php:56: * @todo Implement a filter for ease notification of other objects like the pool
./application/hub/main/helper/dht/class_DhtBootstrapHelper.php:10: * @todo Find an interface for hub helper
./application/hub/main/helper/dht/class_DhtPublishEntryHelper.php:10: * @todo Find an interface for hub helper
./application/hub/main/helper/node/announcement/class_NodeAnnouncementHelper.php:10: * @todo Find an interface for hub helper
./application/hub/main/iterator/pool/tasks/class_TaskPoolIterator.php:11: * @todo latency-based iteration or similar approaches
./application/hub/main/listener/tcp/class_TcpListener.php:252: // @TODO Does this work on Windozer boxes???
./application/hub/main/listener/udp/class_UdpListener.php:153: * @todo ~50% done
-./application/hub/main/lists/class_BaseList.php:276: // @TODO Extend this somehow?
+./application/hub/main/lists/class_BaseList.php:305: // @TODO Extend this somehow?
./application/hub/main/lists/groups/class_ListGroupList.php:61: * @todo 0% done
./application/hub/main/miner/chash/class_HubChashMiner.php:108: * @todo Implement this method
./application/hub/main/miner/chash/class_HubChashMiner.php:138: * @todo 0% done
./application/hub/main/nodes/class_BaseHubNode.php:432: * @todo Change the first if() block to check for a specific state
./application/hub/main/nodes/class_BaseHubNode.php:638: * @todo Add checking if this node has been announced to the sender node
./application/hub/main/nodes/class_BaseHubNode.php:658: * @todo Add checking if this node has been announced to the sender node
-./application/hub/main/nodes/class_BaseHubNode.php:761: * @todo Find more to do here
-./application/hub/main/nodes/class_BaseHubNode.php:774: * @todo Handle thrown exception
+./application/hub/main/nodes/class_BaseHubNode.php:763: * @todo Find more to do here
+./application/hub/main/nodes/class_BaseHubNode.php:776: * @todo Handle thrown exception
./application/hub/main/nodes/list/class_HubListNode.php:58: * @todo Implement more bootstrap steps
./application/hub/main/nodes/list/class_HubListNode.php:79: // @TODO Add some filters here
./application/hub/main/nodes/list/class_HubListNode.php:88: * @todo 0% done
./application/hub/main/nodes/regular/class_HubRegularNode.php:58: * @todo Implement this method
./application/hub/main/nodes/regular/class_HubRegularNode.php:79: // @TODO Add some filters here
./application/hub/main/nodes/regular/class_HubRegularNode.php:88: * @todo 0% done
-./application/hub/main/package/class_NetworkPackage.php:1150: * @todo This may be enchanced for outgoing packages?
-./application/hub/main/package/class_NetworkPackage.php:1181: * @todo Unsupported feature of "signed" messages commented out
-./application/hub/main/package/class_NetworkPackage.php:1270: * @todo Implement verification of all sent tags here?
+./application/hub/main/package/class_NetworkPackage.php:1167: * @todo This may be enchanced for outgoing packages?
+./application/hub/main/package/class_NetworkPackage.php:1198: * @todo Unsupported feature of "signed" messages commented out
+./application/hub/main/package/class_NetworkPackage.php:1287: * @todo Implement verification of all sent tags here?
./application/hub/main/package/class_NetworkPackage.php:23: * @todo Needs to add functionality for handling the object's type
./application/hub/main/package/class_NetworkPackage.php:338: // @TODO md5() is very weak, but it needs to be fast
./application/hub/main/package/class_NetworkPackage.php:412: // @TODO md5() is very weak, but it needs to be fast
-./application/hub/main/package/class_NetworkPackage.php:578: // @TODO We may want to do somthing more here?
-./application/hub/main/package/class_NetworkPackage.php:613: * @todo Unfinished area, signatures are currently NOT fully supported
+./application/hub/main/package/class_NetworkPackage.php:595: // @TODO We may want to do somthing more here?
+./application/hub/main/package/class_NetworkPackage.php:630: * @todo Unfinished area, signatures are currently NOT fully supported
./application/hub/main/package/fragmenter/class_PackageFragmenter.php:275: * @todo Implement a way to send non-announcement packages with extra-salt
./application/hub/main/package/fragmenter/class_PackageFragmenter.php:370: // @TODO This assert broke packages where the hash chunk was very large: assert(strlen($rawData) <= NetworkPackage::TCP_PACKAGE_SIZE);
./application/hub/main/package/fragmenter/class_PackageFragmenter.php:441: * @todo $helperInstance is unused
./application/hub/main/producer/miner/blocks/class_MinerTestGenesisBlockProducer.php:86: * @todo ~5% done
./application/hub/main/recipient/dht/class_DhtRecipient.php:76: // @TODO Unfinished
./application/hub/main/recipient/self/class_SelfRecipient.php:61: // @TODO Add more checks on data
-./application/hub/main/registry/socket/class_SocketRegistry.php:75: // @TODO Tested again base class, rewrite it to a generic interface!
./application/hub/main/resolver/protocol/tcp/class_TcpProtocolResolver.php:57: * @todo 0% done
./application/hub/main/resolver/state/peer/class_PeerStateResolver.php:59: * @todo ~30% done
./application/hub/main/scanner/crawler/uploaded_list/class_CrawlerUploadedListScanner.php:52: * @todo 0% done
-./application/hub/main/source/urls/class_CrawlerFoundRssUrlSource.php:55: * @todo ~10% done
-./application/hub/main/source/urls/class_CrawlerLocalStartUrlSource.php:55: * @todo ~10% done
-./application/hub/main/source/urls/class_CrawlerRssStartUrlSource.php:55: * @todo ~10% done
-./application/hub/main/source/urls/class_CrawlerUploadedListUrlSource.php:196: * @todo ~20% done
+./application/hub/main/source/urls/class_CrawlerFoundRssUrlSource.php:55: * @todo 0% done
+./application/hub/main/source/urls/class_CrawlerLocalStartUrlSource.php:55: * @todo 0% done
+./application/hub/main/source/urls/class_CrawlerRssStartUrlSource.php:55: * @todo 0% done
+./application/hub/main/source/urls/class_CrawlerUploadedListUrlSource.php:327: * @todo ~40% done
./application/hub/main/states/communicator/init/class_CommunicatorInitState.php:60: * @todo 0% done?
./application/hub/main/states/crawler/active/class_CrawlerActiveState.php:60: * @todo 0% done
./application/hub/main/states/crawler/booting/class_CrawlerBootingState.php:60: * @todo 0% done
./application/hub/main/tasks/crawler/snippet_extractor/class_CrawlerSnippetExtractorTask.php:64: * @todo 0%
./application/hub/main/tasks/crawler/structure_analyzer/class_CrawlerStructureAnalyzerTask.php:53: * @todo Maybe visit some sub-objects
./application/hub/main/tasks/crawler/structure_analyzer/class_CrawlerStructureAnalyzerTask.php:64: * @todo 0%
-./application/hub/main/tasks/crawler/url_getter/local/class_CrawlerLocalUrlGetterTask.php:53: * @todo Maybe visit some sub-objects
-./application/hub/main/tasks/crawler/url_getter/local/class_CrawlerLocalUrlGetterTask.php:64: * @todo 0%
-./application/hub/main/tasks/crawler/url_getter/remote/class_CrawlerRemoteUrlGetterTask.php:53: * @todo Maybe visit some sub-objects
-./application/hub/main/tasks/crawler/url_getter/remote/class_CrawlerRemoteUrlGetterTask.php:64: * @todo 0%
+./application/hub/main/tasks/crawler/url_crawler/local/class_CrawlerLocalUrlCrawlerTask.php:53: * @todo Maybe visit some sub-objects
+./application/hub/main/tasks/crawler/url_crawler/local/class_CrawlerLocalUrlCrawlerTask.php:64: * @todo 0%
+./application/hub/main/tasks/crawler/url_crawler/remote/class_CrawlerRemoteUrlCrawlerTask.php:53: * @todo Maybe visit some sub-objects
+./application/hub/main/tasks/crawler/url_crawler/remote/class_CrawlerRemoteUrlCrawlerTask.php:64: * @todo 0%
./application/hub/main/tasks/crawler/url_source/class_CrawlerUrlSourceFoundRssTask.php:53: * @todo Maybe visit some sub-objects
./application/hub/main/tasks/crawler/url_source/class_CrawlerUrlSourceLocalStartTask.php:53: * @todo Maybe visit some sub-objects
./application/hub/main/tasks/crawler/url_source/class_CrawlerUrlSourceRssStartTask.php:53: * @todo Maybe visit some sub-objects
./application/hub/main/template/requests/class_XmlRequestNodeListTemplateEngine.php:10: * @todo This template engine does not make use of setTemplateType()
./application/hub/main/template/requests/class_XmlRequestNodeListTemplateEngine.php:74: * @todo Find something useful with this!
./application/hub/main/tools/class_HubTools.php:158: // @TODO ((25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9])\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[0-9])):([0-9]{3,5})
-./application/hub/main/tools/class_HubTools.php:248: // @TODO Find a better validation than empty()
-./application/hub/main/tools/class_HubTools.php:276: // @TODO Find a better validation than empty()
+./application/hub/main/tools/class_HubTools.php:263: // @TODO Find a better validation than empty()
+./application/hub/main/tools/class_HubTools.php:291: // @TODO Find a better validation than empty()
./application/hub/main/wrapper/node/class_NodeDistributedHashTableDatabaseWrapper.php:163: // @TODO Bad check on UNL, better use a proper validator
./application/hub/main/wrapper/node/class_NodeDistributedHashTableDatabaseWrapper.php:209: // @TODO Bad check on UNL, better use a proper validator
./application/hub/main/wrapper/node/class_NodeDistributedHashTableDatabaseWrapper.php:442: // @TODO Unimplemented part
./application/hub/main/wrapper/node/class_NodeDistributedHashTableDatabaseWrapper.php:540: * @todo Add timestamp to dataset instance
./application/hub/main/wrapper/states/class_PeerStateLookupDatabaseWrapper.php:174: * @todo Unfinished area
./application/hub/main/wrapper/states/class_PeerStateLookupDatabaseWrapper.php:216: * @todo Unfinished area
+./core/inc/classes.php:10: * @todo Minimize these includes
./core/inc/classes/exceptions/main/class_MissingMethodException.php:13: * @todo Try to rewrite user/guest login classes and mark this exception as deprecated
./core/inc/classes/exceptions/main/class_NoConfigEntryException.php:10: * @todo Rename this class to NoFoundEntryException
./core/inc/classes/interfaces/class_FrameworkInterface.php:11: * @todo Find a better name for this interface
./core/inc/classes/interfaces/criteria/extended/class_LocalSearchCriteria.php:30: * @todo Find a nice casting here. (int) allows until and including 32766.
./core/inc/classes/interfaces/criteria/extended/class_LocalSearchCriteria.php:54: * @todo Find a nice casting here. (int) allows until and including 32766.
-./core/inc/classes/main/class_BaseFrameworkSystem.php:1927: * @todo Write a logging mechanism for productive mode
-./core/inc/classes/main/class_BaseFrameworkSystem.php:1942: // @TODO Finish this part!
-./core/inc/classes/main/class_BaseFrameworkSystem.php:240: // @todo Try to clean these constants up
-./core/inc/classes/main/class_BaseFrameworkSystem.php:465: // @TODO __CLASS__ does always return BaseFrameworkSystem but not the extending (=child) class
-./core/inc/classes/main/class_BaseFrameworkSystem.php:539: * @todo SearchableResult and UpdateableResult shall have a super interface to use here
+./core/inc/classes/main/class_BaseFrameworkSystem.php:1977: * @todo Write a logging mechanism for productive mode
+./core/inc/classes/main/class_BaseFrameworkSystem.php:1992: // @TODO Finish this part!
+./core/inc/classes/main/class_BaseFrameworkSystem.php:250: // @todo Try to clean these constants up
+./core/inc/classes/main/class_BaseFrameworkSystem.php:475: // @TODO __CLASS__ does always return BaseFrameworkSystem but not the extending (=child) class
+./core/inc/classes/main/class_BaseFrameworkSystem.php:549: * @todo SearchableResult and UpdateableResult shall have a super interface to use here
./core/inc/classes/main/commands/web/class_WebLoginAreaCommand.php:64: * @todo Add some stuff here: Some personal data, app/game related data
./core/inc/classes/main/commands/web/class_WebProblemCommand.php:58: * @todo 0% done
./core/inc/classes/main/commands/web/class_WebStatusCommand.php:58: * @todo 0% done
./core/inc/classes/main/controller/web/class_WebStatusController.php:10: * @todo This controller shall still provide some headlines for sidebars
./core/inc/classes/main/criteria/search/class_SearchCriteria.php:102: * @todo Find a nice casting here. (int) allows until and including 32766.
./core/inc/classes/main/criteria/search/class_SearchCriteria.php:70: * @todo Find a nice casting here. (int) allows until and including 32766.
-./core/inc/classes/main/database/databases/class_LocalFileDatabase.php:327: * @todo Do some checks on the database directory and files here
-./core/inc/classes/main/database/databases/class_LocalFileDatabase.php:616: * @todo Add more generic non-public data for removal
+./core/inc/classes/main/database/backend/class_CachedLocalFileDatabase.php:327: * @todo Do some checks on the database directory and files here
+./core/inc/classes/main/database/backend/class_CachedLocalFileDatabase.php:616: * @todo Add more generic non-public data for removal
./core/inc/classes/main/decorator/template/class_XmlRewriterTemplateDecorator.php:427: * @todo Find something useful with this!
./core/inc/classes/main/discovery/payment/class_LocalPaymentDiscovery.php:85: * @todo 0% done
-./core/inc/classes/main/file_directories/class_BaseFileIo.php:162: * @todo Handle seekStatus
./core/inc/classes/main/file_directories/class_BaseFile.php:135: * @todo ~10% done?
./core/inc/classes/main/file_directories/class_BaseFile.php:148: * @todo Handle seekStatus
+./core/inc/classes/main/file_directories/class_BaseFileIo.php:162: * @todo Handle seekStatus
./core/inc/classes/main/file_directories/directory/class_FrameworkDirectoryPointer.php:68: * @todo Get rid of inConstructor, could be old-lost code.
./core/inc/classes/main/file_directories/io_stream/class_FileIoStream.php:270: * @todo 0% done
./core/inc/classes/main/file_directories/io_stream/class_FileIoStream.php:74: * @todo This method needs heavy rewrite
./core/inc/classes/middleware/compressor/class_CompressorChannel.php:103: // @TODO Is there a configurable fall-back compressor needed, or is NullCompressor okay?
./core/inc/classes/middleware/debug/class_DebugMiddleware.php:113: // @TODO Initialization phase
./core/inc/classes/middleware/io/class_FileIoHandler.php:174: * @todo 0% done
-./core/inc/classes.php:10: * @todo Minimize these includes
./core/inc/classes/third_party/api/wernisportal/class_WernisApi.php:10: * @todo Out-dated since 0.6-BETA
./core/inc/config/class_FrameworkConfiguration.php:115: * @todo This method encapsulates a deprecated PHP function and should be deprecated, too.
./core/inc/config/class_FrameworkConfiguration.php:223: * @todo We have to add some more entries from $_SERVER here
./core/inc/loader/class_ClassLoader.php:319: /* @TODO: Do not exit here. */
./core/inc/output.php:11: * @todo Minimize these includes
./core/inc/selector.php:11: * @todo Minimize these includes
+./core/index.php:43: * @todo This method is old code and needs heavy rewrite and should be moved to ApplicationHelper
./index.php:43: * @todo This method is old code and needs heavy rewrite and should be moved to ApplicationHelper
### ### DEPRECATION FOLLOWS: ### ###
./application/hub/main/nodes/class_BaseHubNode.php:46: * @deprecated
+./core/inc/classes.php:9: * @deprecated
./core/inc/classes/exceptions/main/class_MissingMethodException.php:14: * @deprecated Please do no longer use this exception
./core/inc/classes/interfaces/database/backend/class_DatabaseFrontendInterface.php:2:// @DEPRECATED
./core/inc/classes/interfaces/database/frontend/class_DatabaseFrontendInterface.php:2:// @DEPRECATED
./core/inc/classes/main/database/class_BaseDatabaseFrontend.php:2:// @DEPRECATED
./core/inc/classes/main/handler/class_BaseHandler.php:2:// @DEPRECATED
./core/inc/classes/main/handler/raw_data/class_BaseRawDataHandler.php:2:// @DEPRECATED
-./core/inc/classes.php:9: * @deprecated
./core/inc/database.php:10: * @deprecated
./core/inc/hooks.php:2:// @DEPRECATED
./core/inc/includes.php:10: * @deprecated