From: Roland Haeder Date: Tue, 17 Mar 2015 21:24:00 +0000 (+0100) Subject: Merge branch 'refacuring/protocol_handler' into latest-core/crawler X-Git-Url: https://git.mxchange.org/?a=commitdiff_plain;h=3e80b362d56cce84b1b33e66f13dba51e4cc9606;hp=5061be761f4d0c2bb9b1943d19f8ad55ff7170dd;p=hub.git Merge branch 'refacuring/protocol_handler' into latest-core/crawler Signed-off-by: Roland Haeder --- diff --git a/application/hub/config.php b/application/hub/config.php index 8130471e5..c516e96c4 100644 --- a/application/hub/config.php +++ b/application/hub/config.php @@ -1280,6 +1280,9 @@ $cfg->setConfigEntry('crawler_url_source_rss_start_class', 'CrawlerRssStartUrlSo // CFG: CRAWLER-URL-SOURCE-FOUND-RSS-CLASS $cfg->setConfigEntry('crawler_url_source_found_rss_class', 'CrawlerFoundRssUrlSource'); +// CFG: CRAWLER-URL-STACKS +$cfg->setConfigEntry('crawler_url_stacks', 'local_start:uploaded_list:rss_start:found_rss'); + // CFG: CRAWLER-NODE-COMMUNICATOR-TASK-CLASS $cfg->setConfigEntry('crawler_node_communicator_task_class', 'CrawlerNodeCommunicatorTask'); @@ -1313,6 +1316,9 @@ $cfg->setConfigEntry('crawler_uploaded_list_url_source_stack_class', 'FiFoStacke // CFG: STACKER-CSV-FILE-MAX-SIZE $cfg->setConfigEntry('stacker_csv_file_max_size', 10); +// CFG: STACKER-CSV-ENTRY-MAX-SIZE +$cfg->setConfigEntry('stacker_csv_entry_max_size', 100); + // CFG: TASK-CRAWLER-NODE-COMMUNICATOR-STARTUP-DELAY $cfg->setConfigEntry('task_crawler_node_communicator_startup_delay', 500); @@ -1322,29 +1328,29 @@ $cfg->setConfigEntry('task_crawler_node_communicator_interval_delay', 250); // CFG: TASK-CRAWLER-NODE-COMMUNICATOR-MAX-RUNS $cfg->setConfigEntry('task_crawler_node_communicator_max_runs', 0); -// CFG: CRAWLER-LOCAL-URL-GETTER-TASK-CLASS -$cfg->setConfigEntry('crawler_local_url_getter_task_class', 'CrawlerLocalUrlGetterTask'); +// CFG: CRAWLER-LOCAL-URL-CRAWLER-TASK-CLASS +$cfg->setConfigEntry('crawler_local_url_crawler_task_class', 'CrawlerLocalUrlCrawlerTask'); -// CFG: TASK-CRAWLER-LOCAL-URL-GETTER-STARTUP-DELAY -$cfg->setConfigEntry('task_crawler_local_url_getter_startup_delay', 1500); +// CFG: TASK-CRAWLER-LOCAL-URL-CRAWLER-STARTUP-DELAY +$cfg->setConfigEntry('task_crawler_local_url_crawler_startup_delay', 1500); -// CFG: TASK-CRAWLER-LOCAL-URL-GETTER-INTERVAL-DELAY -$cfg->setConfigEntry('task_crawler_local_url_getter_interval_delay', 200); +// CFG: TASK-CRAWLER-LOCAL-URL-CRAWLER-INTERVAL-DELAY +$cfg->setConfigEntry('task_crawler_local_url_crawler_interval_delay', 200); -// CFG: TASK-CRAWLER-LOCAL-URL-GETTER-MAX-RUNS -$cfg->setConfigEntry('task_crawler_local_url_getter_max_runs', 0); +// CFG: TASK-CRAWLER-LOCAL-URL-CRAWLER-MAX-RUNS +$cfg->setConfigEntry('task_crawler_local_url_crawler_max_runs', 0); -// CFG: CRAWLER-REMOTE-URL-GETTER-TASK-CLASS -$cfg->setConfigEntry('crawler_remote_url_getter_task_class', 'CrawlerRemoteUrlGetterTask'); +// CFG: CRAWLER-REMOTE-URL-CRAWLER-TASK-CLASS +$cfg->setConfigEntry('crawler_remote_url_crawler_task_class', 'CrawlerRemoteUrlCrawlerTask'); -// CFG: TASK-CRAWLER-REMOTE-URL-GETTER-STARTUP-DELAY -$cfg->setConfigEntry('task_crawler_remote_url_getter_startup_delay', 1500); +// CFG: TASK-CRAWLER-REMOTE-URL-CRAWLER-STARTUP-DELAY +$cfg->setConfigEntry('task_crawler_remote_url_crawler_startup_delay', 1500); -// CFG: TASK-CRAWLER-REMOTE-URL-GETTER-INTERVAL-DELAY -$cfg->setConfigEntry('task_crawler_remote_url_getter_interval_delay', 200); +// CFG: TASK-CRAWLER-REMOTE-URL-CRAWLER-INTERVAL-DELAY +$cfg->setConfigEntry('task_crawler_remote_url_crawler_interval_delay', 200); -// CFG: TASK-CRAWLER-REMOTE-URL-GETTER-MAX-RUNS -$cfg->setConfigEntry('task_crawler_remote_url_getter_max_runs', 0); +// CFG: TASK-CRAWLER-REMOTE-URL-CRAWLER-MAX-RUNS +$cfg->setConfigEntry('task_crawler_remote_url_crawler_max_runs', 0); // CFG: CRAWLER-REMOTE-JOB-PUBLISHER-TASK-CLASS $cfg->setConfigEntry('crawler_remote_job_publisher_task_class', 'CrawlerRemoteJobPublisherTask'); @@ -1481,6 +1487,9 @@ $cfg->setConfigEntry('task_crawler_uploaded_list_scanner_max_runs', 0); // CFG: CRAWLER-CSV-FILE-PATH $cfg->setConfigEntry('crawler_csv_file_path', 'data/url_lists'); +// CFG: CRAWLER-URL-LIST-COLUMN-SEPARATOR +$cfg->setConfigEntry('crawler_url_list_column_separator', ','); + /////////////////////////////////////////////////////////////////////////////// // HTTP Configuration /////////////////////////////////////////////////////////////////////////////// diff --git a/application/hub/interfaces/source/urls/class_UrlSource.php b/application/hub/interfaces/source/urls/class_UrlSource.php index 996eb52b5..8f30d0a5f 100644 --- a/application/hub/interfaces/source/urls/class_UrlSource.php +++ b/application/hub/interfaces/source/urls/class_UrlSource.php @@ -23,11 +23,18 @@ */ interface UrlSource extends Source { /** - * Processes entries in the stack. + * Fills the URL stack with new entries from source * * @return void */ - function processStack (); + function fillUrlStack (); + + /** + * Determines whether the stack 'urls' is empty. + * + * @return $isEmpty Whether the stack 'urls' is empty. + */ + function isUrlStackEmpty (); } // [EOF] diff --git a/application/hub/main/filter/task/apt-proxy/class_AptProxyTaskHandlerInitializerFilter.php b/application/hub/main/filter/task/apt-proxy/class_AptProxyTaskHandlerInitializerFilter.php index a39575967..17e808a62 100644 --- a/application/hub/main/filter/task/apt-proxy/class_AptProxyTaskHandlerInitializerFilter.php +++ b/application/hub/main/filter/task/apt-proxy/class_AptProxyTaskHandlerInitializerFilter.php @@ -61,6 +61,9 @@ class AptProxyTaskHandlerInitializerFilter extends BaseAptProxyFilter implements // Get a new task handler instance $handlerInstance = ObjectFactory::createObjectByConfiguredName('task_handler_class'); + // Put the task handler in registry + Registry::getRegistry()->addInstance('task_handler', $handlerInstance); + /* * Register all tasks: * @@ -68,9 +71,6 @@ class AptProxyTaskHandlerInitializerFilter extends BaseAptProxyFilter implements */ $taskInstance = ObjectFactory::createObjectByConfiguredName('apt_proxy_listener_task_class'); $handlerInstance->registerTask('apt_proxy_listener', $taskInstance); - - // Put the task handler in registry - Registry::getRegistry()->addInstance('task_handler', $handlerInstance); } } diff --git a/application/hub/main/filter/task/chat/class_ChatTaskHandlerInitializerFilter.php b/application/hub/main/filter/task/chat/class_ChatTaskHandlerInitializerFilter.php index 178f70743..85db533fb 100644 --- a/application/hub/main/filter/task/chat/class_ChatTaskHandlerInitializerFilter.php +++ b/application/hub/main/filter/task/chat/class_ChatTaskHandlerInitializerFilter.php @@ -61,6 +61,9 @@ class ChatTaskHandlerInitializerFilter extends BaseChatFilter implements Filtera // Get a new task handler instance $handlerInstance = ObjectFactory::createObjectByConfiguredName('task_handler_class'); + // Put the task handler in registry + Registry::getRegistry()->addInstance('task_handler', $handlerInstance); + /* * Register all tasks: * @@ -68,9 +71,6 @@ class ChatTaskHandlerInitializerFilter extends BaseChatFilter implements Filtera */ $taskInstance = ObjectFactory::createObjectByConfiguredName('chat_telnet_listener_task_class'); $handlerInstance->registerTask('chat_telnet_listener', $taskInstance); - - // Put the task handler in registry - Registry::getRegistry()->addInstance('task_handler', $handlerInstance); } } diff --git a/application/hub/main/filter/task/crawler/class_CrawlerTaskHandlerInitializerFilter.php b/application/hub/main/filter/task/crawler/class_CrawlerTaskHandlerInitializerFilter.php index 46a84ffef..1a5ee9eb7 100644 --- a/application/hub/main/filter/task/crawler/class_CrawlerTaskHandlerInitializerFilter.php +++ b/application/hub/main/filter/task/crawler/class_CrawlerTaskHandlerInitializerFilter.php @@ -61,6 +61,9 @@ class CrawlerTaskHandlerInitializerFilter extends BaseCrawlerFilter implements F // Get a new task handler instance $handlerInstance = ObjectFactory::createObjectByConfiguredName('task_handler_class'); + // Put the task handler in registry + Registry::getRegistry()->addInstance('task_handler', $handlerInstance); + /* * Register all tasks: * @@ -69,13 +72,13 @@ class CrawlerTaskHandlerInitializerFilter extends BaseCrawlerFilter implements F $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_node_communicator_task_class'); $handlerInstance->registerTask('crawler_node_communicator', $taskInstance); - // 2) Local URL getter (gets URLs locally and adds them to the analyzer's input stack) - $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_local_url_getter_task_class'); - $handlerInstance->registerTask('crawler_local_url_getter', $taskInstance); + // 2) Local URL crawler (gets URLs locally and adds them to the analyzer's input stack) + $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_local_url_crawler_task_class'); + $handlerInstance->registerTask('crawler_local_url_crawler', $taskInstance); - // 3) Remote URL getter (gets URLs locally for other nodes, also includes the crawled URL in local index) - $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_remote_url_getter_task_class'); - $handlerInstance->registerTask('crawler_remote_url_getter', $taskInstance); + // 3) Remote URL crawler (gets URLs locally for other nodes, also includes the crawled URL in local index) + $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_remote_url_crawler_task_class'); + $handlerInstance->registerTask('crawler_remote_url_crawler', $taskInstance); // 4) Remote-crawl publisher (publishes crawl jobs for remote retrieval) $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_remote_job_publisher_task_class'); @@ -101,28 +104,18 @@ class CrawlerTaskHandlerInitializerFilter extends BaseCrawlerFilter implements F $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_ping_task_class'); $handlerInstance->registerTask('crawler_ping', $taskInstance); - // 10) URL source: local start - $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_url_source_local_start_task_class'); - $handlerInstance->registerTask('crawler_url_source_local_start', $taskInstance); - - // 11) URL source: uploaded list - $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_url_source_uploaded_list_task_class'); - $handlerInstance->registerTask('crawler_url_source_uploaded_list', $taskInstance); + // 10) URL sources + foreach (explode(':', $this->getConfigInstance()->getConfigEntry('crawler_url_stacks')) as $stack) { + // Init task instance + $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_url_source_' . $stack . '_task_class'); - // 12) URL source: RSS feed - $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_url_source_rss_start_task_class'); - $handlerInstance->registerTask('crawler_url_source_rss_start', $taskInstance); + // And register it + $handlerInstance->registerTask('crawler_url_source_' . $stack, $taskInstance); + } // END - foreach - // 13) URL source: found RSS/ATOM feed - $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_url_source_found_rss_task_class'); - $handlerInstance->registerTask('crawler_url_source_found_rss', $taskInstance); - - // 14) Uploaded list scanner (checks for wanted files) + // 11) Uploaded list scanner (checks for wanted files) $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_uploaded_list_scanner_task_class'); $handlerInstance->registerTask('crawler_uploaded_list_scanner', $taskInstance); - - // Put the task handler in registry - Registry::getRegistry()->addInstance('task_handler', $handlerInstance); } } diff --git a/application/hub/main/filter/task/cruncher/class_CruncherTaskHandlerInitializerFilter.php b/application/hub/main/filter/task/cruncher/class_CruncherTaskHandlerInitializerFilter.php index 0b49446e9..2d0095ec2 100644 --- a/application/hub/main/filter/task/cruncher/class_CruncherTaskHandlerInitializerFilter.php +++ b/application/hub/main/filter/task/cruncher/class_CruncherTaskHandlerInitializerFilter.php @@ -61,6 +61,9 @@ class CruncherTaskHandlerInitializerFilter extends BaseCruncherFilter implements // Get a new task handler instance $handlerInstance = ObjectFactory::createObjectByConfiguredName('task_handler_class'); + // Put the task handler in registry + Registry::getRegistry()->addInstance('task_handler', $handlerInstance); + // Register all tasks: // // 1) A task for fetching WUs (work units) or test units @@ -82,9 +85,6 @@ class CruncherTaskHandlerInitializerFilter extends BaseCruncherFilter implements */ $taskInstance = ObjectFactory::createObjectByConfiguredName('cruncher_key_producer_task_class'); $handlerInstance->registerTask('cruncher_key_producer', $taskInstance); - - // Put the task handler in registry - Registry::getRegistry()->addInstance('task_handler', $handlerInstance); } } diff --git a/application/hub/main/filter/task/miner/class_MinerTaskHandlerInitializerFilter.php b/application/hub/main/filter/task/miner/class_MinerTaskHandlerInitializerFilter.php index fefaf3c1e..742b40be4 100644 --- a/application/hub/main/filter/task/miner/class_MinerTaskHandlerInitializerFilter.php +++ b/application/hub/main/filter/task/miner/class_MinerTaskHandlerInitializerFilter.php @@ -61,6 +61,9 @@ class MinerTaskHandlerInitializerFilter extends BaseMinerFilter implements Filte // Get a new task handler instance $handlerInstance = ObjectFactory::createObjectByConfiguredName('task_handler_class'); + // Put the task handler in registry + Registry::getRegistry()->addInstance('task_handler', $handlerInstance); + // Register all tasks: // // 1) A task for fetching blocks from the network @@ -82,9 +85,6 @@ class MinerTaskHandlerInitializerFilter extends BaseMinerFilter implements Filte */ $taskInstance = ObjectFactory::createObjectByConfiguredName('miner_node_communicator_task_class'); $handlerInstance->registerTask('miner_node_communicator', $taskInstance); - - // Put the task handler in registry - Registry::getRegistry()->addInstance('task_handler', $handlerInstance); } } diff --git a/application/hub/main/filter/task/node/class_NodeTaskHandlerInitializerFilter.php b/application/hub/main/filter/task/node/class_NodeTaskHandlerInitializerFilter.php index c497604f8..bb5ab1ac4 100644 --- a/application/hub/main/filter/task/node/class_NodeTaskHandlerInitializerFilter.php +++ b/application/hub/main/filter/task/node/class_NodeTaskHandlerInitializerFilter.php @@ -61,6 +61,9 @@ class NodeTaskHandlerInitializerFilter extends BaseNodeFilter implements Filtera // Get a new task handler instance $handlerInstance = ObjectFactory::createObjectByConfiguredName('task_handler_class'); + // Put the task handler in registry + Registry::getRegistry()->addInstance('task_handler', $handlerInstance); + // Prepare a package-tags initialization task for the listeners $taskInstance = ObjectFactory::createObjectByConfiguredName('node_package_tags_init_task_class'); @@ -142,9 +145,6 @@ class NodeTaskHandlerInitializerFilter extends BaseNodeFilter implements Filtera // Register it $handlerInstance->registerTask('ping', $taskInstance); - // Put the task handler in registry - Registry::getRegistry()->addInstance('task_handler', $handlerInstance); - /* * Allow extra node-depending tasks, e.g. the bootstrapper node needs * booting its DHT. DHTs are decentralized and are working on diff --git a/application/hub/main/resolver/protocol/tcp/class_TcpProtocolResolver.php b/application/hub/main/resolver/protocol/tcp/class_TcpProtocolResolver.php index c2aab32d9..a7f79e98c 100644 --- a/application/hub/main/resolver/protocol/tcp/class_TcpProtocolResolver.php +++ b/application/hub/main/resolver/protocol/tcp/class_TcpProtocolResolver.php @@ -67,7 +67,7 @@ class TcpProtocolResolver extends BaseProtocolResolver implements ProtocolResolv $resultInstance = $nodeInstance->getWrapperInstance()->doSelectByCriteria($searchInstance); // Is the result valid? - if ((!$resultInstance->valid()) || (! $resultInstance->next())) { + if ((!$resultInstance->valid()) || (!$resultInstance->next())) { // Node not found in database, this could mean that your database file is damaged. return NULL; } // END - if diff --git a/application/hub/main/source/class_BaseUrlSource.php b/application/hub/main/source/class_BaseUrlSource.php index c302f17ac..c4ef08404 100644 --- a/application/hub/main/source/class_BaseUrlSource.php +++ b/application/hub/main/source/class_BaseUrlSource.php @@ -22,6 +22,14 @@ * along with this program. If not, see . */ class BaseUrlSource extends BaseSource { + // Stack name for all URLs + const STACKER_NAME_URLS = 'urls'; + + // Array elements for CSV data array + const CRAWL_JOB_ARRAY_START_URL = 'start_url'; + const CRAWL_JOB_ARRAY_DEPTH = 'start_depth'; + const CRAWL_JOB_ARRAY_EXTERNAL_DEPTH = 'external_depth'; + /** * Protected constructor * @@ -53,13 +61,55 @@ class BaseUrlSource extends BaseSource { * * @return $isEmpty Whether the stack 'urls' is empty. */ - protected function isUrlStackEmpty () { + public function isUrlStackEmpty () { // Determine it - $isEmpty = $this->getStackInstance()->isStackEmpty('urls'); + $isEmpty = $this->getStackInstance()->isStackEmpty(self::STACKER_NAME_URLS); // Return result return $isEmpty; } + + /** + * Enriches the given associative array with more data, now at least 2 + * elements are required: + * + * 'start_url' - Starting URL + * 'start_depth' - Crawl depth for starting URL + * + * @param $crawlData Array with partial data for being queued + * @return void + * @todo ~10% done + */ + protected function enrichCrawlerQueueData (array &$crawlData) { + // Debug message + //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput('CRAWLER-SOURCE [' . __METHOD__ . ':' . __LINE__ . ']: crawlData()=' . count($crawlData) . ' - CALLED!'); + + // Check for minimum array elements + assert(isset($crawlData[self::CRAWL_JOB_ARRAY_START_URL])); + assert(isset($crawlData[self::CRAWL_JOB_ARRAY_DEPTH])); + + // @TODO Add more elements + + // Debug message + //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput('CRAWLER-SOURCE [' . __METHOD__ . ':' . __LINE__ . ']: EXIT!'); + } + + /** + * Enqueues given crawler array in assigned file-based stack + * + * @param $crawlData Array with partial data for being queued + * @return void + */ + protected function enqueueInFileStack (array $crawlData) { + // Debug message + //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput('CRAWLER-SOURCE [' . __METHOD__ . ':' . __LINE__ . ']: crawlData()=' . count($crawlData) . ' - CALLED!'); + + // Get the stack instance and enqueue it + $this->getStackInstance()->pushNamed(self::STACKER_NAME_URLS, $crawlData); + + // Debug message + //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput('CRAWLER-SOURCE [' . __METHOD__ . ':' . __LINE__ . ']: EXIT!'); + } } // [EOF] diff --git a/application/hub/main/source/urls/class_CrawlerFoundRssUrlSource.php b/application/hub/main/source/urls/class_CrawlerFoundRssUrlSource.php index d77847c2c..cad4691e7 100644 --- a/application/hub/main/source/urls/class_CrawlerFoundRssUrlSource.php +++ b/application/hub/main/source/urls/class_CrawlerFoundRssUrlSource.php @@ -49,18 +49,12 @@ class CrawlerFoundRssUrlSource extends BaseUrlSource implements UrlSource, Regis } /** - * Processes entries in the stack. + * Fills the URL stack with new entries from source * * @return void - * @todo ~10% done + * @todo 0% done */ - public function processStack () { - // Does the stack have some entries left? - if ($this->isUrlStackEmpty()) { - // Nothing to handle here - return; - } // END - if - + public function fillUrlStack () { $this->partialStub('Please implement this method.'); } } diff --git a/application/hub/main/source/urls/class_CrawlerLocalStartUrlSource.php b/application/hub/main/source/urls/class_CrawlerLocalStartUrlSource.php index a2679ebdb..fdabe0642 100644 --- a/application/hub/main/source/urls/class_CrawlerLocalStartUrlSource.php +++ b/application/hub/main/source/urls/class_CrawlerLocalStartUrlSource.php @@ -49,18 +49,12 @@ class CrawlerLocalStartUrlSource extends BaseUrlSource implements UrlSource, Reg } /** - * Processes entries in the stack. + * Fills the URL stack with new entries from source * * @return void - * @todo ~10% done + * @todo 0% done */ - public function processStack () { - // Does the stack have some entries left? - if ($this->isUrlStackEmpty()) { - // Nothing to handle here - return; - } // END - if - + public function fillUrlStack () { $this->partialStub('Please implement this method.'); } } diff --git a/application/hub/main/source/urls/class_CrawlerRssStartUrlSource.php b/application/hub/main/source/urls/class_CrawlerRssStartUrlSource.php index e955d027f..ef6ade1bd 100644 --- a/application/hub/main/source/urls/class_CrawlerRssStartUrlSource.php +++ b/application/hub/main/source/urls/class_CrawlerRssStartUrlSource.php @@ -49,18 +49,12 @@ class CrawlerRssStartUrlSource extends BaseUrlSource implements UrlSource, Regis } /** - * Processes entries in the stack. + * Fills the URL stack with new entries from source * * @return void - * @todo ~10% done + * @todo 0% done */ - public function processStack () { - // Does the stack have some entries left? - if ($this->isUrlStackEmpty()) { - // Nothing to handle here - return; - } // END - if - + public function fillUrlStack () { $this->partialStub('Please implement this method.'); } } diff --git a/application/hub/main/source/urls/class_CrawlerUploadedListUrlSource.php b/application/hub/main/source/urls/class_CrawlerUploadedListUrlSource.php index 3fd94f31d..ba2c81e97 100644 --- a/application/hub/main/source/urls/class_CrawlerUploadedListUrlSource.php +++ b/application/hub/main/source/urls/class_CrawlerUploadedListUrlSource.php @@ -22,6 +22,25 @@ * along with this program. If not, see . */ class CrawlerUploadedListUrlSource extends BaseUrlSource implements UrlSource, Registerable { + /** + * Stack name for a CSV file + */ + const STACK_NAME_CSV_FILE = 'csv_file'; + + /** + * Stack name for a CSV entry + */ + const STACK_NAME_CSV_ENTRY = 'csv_entry'; + + /** + * Size of crawl (CSV) entry which is an indexed array: + * + * 0 = URL to crawl + * 1 = Crawl depth of URL + * 2 = Crawl depth of linked URLs (same other host only) + */ + const CRAWL_ENTRY_SIZE = 3; + /** * "Cached" CSV path */ @@ -38,14 +57,14 @@ class CrawlerUploadedListUrlSource extends BaseUrlSource implements UrlSource, R private $stackSourceInstance = NULL; /** - * Stack name for a CSV file + * "Imported" CSV files */ - const STACK_NAME_CSV_FILE = 'csv_file'; + private $csvFileImported = array(); /** - * "Imported" CSV files + * "Cached" separator for columns */ - private $csvFileImported = array(); + private $columnSeparator = ''; /** * Protected constructor @@ -68,8 +87,12 @@ class CrawlerUploadedListUrlSource extends BaseUrlSource implements UrlSource, R // Init stack instance $this->stackSourceInstance = ObjectFactory::createObjectByConfiguredName('crawler_uploaded_list_url_source_stack_class'); - // Init stack + // Init stacks $this->getStackSourceInstance()->initStack(self::STACK_NAME_CSV_FILE); + $this->getStackSourceInstance()->initStack(self::STACK_NAME_CSV_ENTRY); + + // "Cache" column separator + $this->columnSeparator = $this->getConfigInstance()->getConfigEntry('crawler_url_list_column_separator'); } /** @@ -91,12 +114,12 @@ class CrawlerUploadedListUrlSource extends BaseUrlSource implements UrlSource, R $directoryEntry = $this->getDirectoryInstance()->readDirectoryExcept(array_merge(array('.htaccess', '.', '..'), $this->csvFileImported)); // Debug message - /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput('CRAWLER-SOURCE[' . __METHOD__ . ':' . __LINE__ . '] directoryEntry(' . strlen($directoryEntry) . ')=' . $directoryEntry); + //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput('CRAWLER-SOURCE[' . __METHOD__ . ':' . __LINE__ . '] directoryEntry(' . strlen($directoryEntry) . ')=' . $directoryEntry); // Is it empty or wrong file extension? if ((empty($directoryEntry)) || (substr($directoryEntry, -4, 4) != '.csv')) { // Skip further processing - /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput('CRAWLER-SOURCE[' . __METHOD__ . ':' . __LINE__ . '] directoryEntry(' . strlen($directoryEntry) . ')=' . $directoryEntry . ' - SKIPPED!'); + //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput('CRAWLER-SOURCE[' . __METHOD__ . ':' . __LINE__ . '] directoryEntry(' . strlen($directoryEntry) . ')=' . $directoryEntry . ' - SKIPPED!'); return FALSE; } // END - if @@ -126,17 +149,75 @@ class CrawlerUploadedListUrlSource extends BaseUrlSource implements UrlSource, R return $sourceInstance; } + /** + * Enriches and saves the given CSV entry (array) in the assigned + * file-based stack. To such entry a lot more informations are added, such + * as which files shall be crawled and many more. + * + * @param $csvData Array with data from a CSV file + * @return void + */ + private function saveCsvDataInCrawlerQueue (array $csvData) { + // Debug message + //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput('CRAWLER-SOURCE [' . __METHOD__ . ':' . __LINE__ . ']: csvData()=' . count($csvData) . ' - CALLED!'); + + // The array has 3 elements, later enhancements may accept more + assert(count($csvData) == self::CRAWL_ENTRY_SIZE); + + /* + * First converted the indexed array into an assoziative array. Don't + * forget to expand this array as well when you want to add another + * column to the CSV file. + */ + $csvArray = array( + self::CRAWL_JOB_ARRAY_START_URL => $csvData[0], + self::CRAWL_JOB_ARRAY_DEPTH => $csvData[1], + self::CRAWL_JOB_ARRAY_EXTERNAL_DEPTH => $csvData[2] + ); + + // Debug message + //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput('CRAWLER-SOURCE [' . __METHOD__ . ':' . __LINE__ . ']: csvArray()=' . count($csvArray) . ' - BEFORE!'); + + // Then add more data to it + $this->enrichCrawlerQueueData($csvArray); + + // Debug message + //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput('CRAWLER-SOURCE [' . __METHOD__ . ':' . __LINE__ . ']: csvArray()=' . count($csvArray) . ' - AFTER!'); + + /* + * Then enqueue it in the file stack. The local crawler "task" will + * then pick this up. + */ + $this->enqueueInFileStack($csvArray); + + // Debug message + //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput('CRAWLER-SOURCE [' . __METHOD__ . ':' . __LINE__ . ']: EXIT!'); + } + /** * Checks whether a CSV file has been loaded (added to the stack) * - * @return $isLoaded Whether a CSV file has been loaded + * @return $isAdded Whether a CSV file has been loaded */ private function isCsvFileAdded () { // Check whether the stacker is not empty - $isLoaded = (($this->getStackSourceInstance()->isStackInitialized(self::STACK_NAME_CSV_FILE)) && (!$this->getStackSourceInstance()->isStackEmpty(self::STACK_NAME_CSV_FILE))); + $isAdded = (($this->getStackSourceInstance()->isStackInitialized(self::STACK_NAME_CSV_FILE)) && (!$this->getStackSourceInstance()->isStackEmpty(self::STACK_NAME_CSV_FILE))); + + // Return the result + return $isAdded; + } + + /** + * Checks whether a CSV entry has been added to the stack + * + * @return $isAdded Whether a CSV entry has been added + */ + private function isCsvEntryAdded () { + // Check whether the stacker is not empty + $isAdded = (($this->getStackSourceInstance()->isStackInitialized(self::STACK_NAME_CSV_ENTRY)) && (!$this->getStackSourceInstance()->isStackEmpty(self::STACK_NAME_CSV_ENTRY))); // Return the result - return $isLoaded; + return $isAdded; } /** @@ -167,20 +248,80 @@ class CrawlerUploadedListUrlSource extends BaseUrlSource implements UrlSource, R } /** - * Parses the next stacked CSV by reading only one line from it. Then the - * read line is being validated and if found good being feed to the next + * Parses the next stacked CSV file by reading only one line from it. Then + * the read line is being validated and if found good being feed to the next * stack. The file is removed from stack only if it has been fully parsed. * * @return void */ - private function parseCsvEntry () { + private function parseCsvFile () { + // Debug message + //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput('CRAWLER-SOURCE [' . __METHOD__ . ':' . __LINE__ . ']: CALLED!'); + + // Get next entry + $csvFileInstance = $this->getStackSourceInstance()->popNamed(self::STACK_NAME_CSV_FILE); + + // Read full "CSV line" + $csvData = $csvFileInstance->readCsvFileLine($this->columnSeparator); + // Debug message - /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput('CRAWLER-SOURCE [' . __METHOD__ . ':' . __LINE__ . ']: CALLED!'); + //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput('CRAWLER-SOURCE [' . __METHOD__ . ':' . __LINE__ . ']: csvData[' . gettype($csvData) . ']=' . print_r($csvData, TRUE)); + + // Expect always an array + assert(is_array($csvData)); + + // Is the array empty? + if (count($csvData) == 0) { + // Debug message + //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput('CRAWLER-SOURCE [' . __METHOD__ . ':' . __LINE__ . ']: File ' . $csvFileInstance->getFileName() . ' has been fully read.'); + + // Try to close it + $csvFileInstance->closeFile(); + + // This file as been fully read, so don't push it back on stack. + return; + } // END - if + + // ... with 3 elements, later enhancements may accept more + assert(count($csvData) == self::CRAWL_ENTRY_SIZE); + + /* + * Push the file back on stack as it may contain more entries. This way + * all files got rotated on stack which may improve crawler performance. + */ + $this->getStackSourceInstance()->pushNamed(self::STACK_NAME_CSV_FILE, $csvFileInstance); + + // Push array on next stack + $this->getStackSourceInstance()->pushNamed(self::STACK_NAME_CSV_ENTRY, $csvData); // Debug message - /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput('CRAWLER-SOURCE [' . __METHOD__ . ':' . __LINE__ . ']: EXIT!'); + //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput('CRAWLER-SOURCE [' . __METHOD__ . ':' . __LINE__ . ']: EXIT!'); } + /** + * Parses the next stacked CSV entry. + * + * @return void + */ + private function parseCsvEntry () { + // Debug message + //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput('CRAWLER-SOURCE [' . __METHOD__ . ':' . __LINE__ . ']: CALLED!'); + + // Pop it from stack + $csvData = $this->getStackSourceInstance()->popNamed(self::STACK_NAME_CSV_ENTRY); + + // Debug message + //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput('CRAWLER-SOURCE [' . __METHOD__ . ':' . __LINE__ . ']: csvData[' . gettype($csvData) . ']=' . print_r($csvData, TRUE)); + + // It must have 3 elements (see method parseCsvFile() for details) + assert(count($csvData) == self::CRAWL_ENTRY_SIZE); + + // Save it in crawler queue (which will enrich it with way more informations + $this->saveCsvDataInCrawlerQueue($csvData); + + // Debug message + //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput('CRAWLER-SOURCE [' . __METHOD__ . ':' . __LINE__ . ']: EXIT!'); + } /** * Getter for stackSourceInstance variable @@ -192,19 +333,25 @@ class CrawlerUploadedListUrlSource extends BaseUrlSource implements UrlSource, R } /** - * Processes entries in the stack. + * Fills the URL stack with new entries from source * * @return void - * @todo ~20% done + * @todo ~40% done */ - public function processStack () { + public function fillUrlStack () { // Does the stack have some entries left? - if ($this->isCsvFileAdded()) { + if ($this->isCsvEntryAdded()) { /* * A CSV file has been found and "imported" (added to stack). Now * the file can be read line by line and checked every one of it. */ $this->parseCsvEntry(); + } elseif ($this->isCsvFileAdded()) { + /* + * A CSV file has been found and "imported" (added to stack). Now + * the file can be read line by line and checked every one of it. + */ + $this->parseCsvFile(); } elseif ($this->isCsvFileFound()) { /* * A file containing an URL list is found. Please note the format is @@ -212,12 +359,6 @@ class CrawlerUploadedListUrlSource extends BaseUrlSource implements UrlSource, R * depth, handling of 3rd-party URLs and such. */ $this->addCsvFile(); - } elseif (!$this->isUrlStackEmpty()) { - /* - * Handle next entry. This method will be called very often, so need - * to process more than one entry at a time. - */ - $this->processNextEntry(); } $this->partialStub('Please implement this method.'); diff --git a/application/hub/main/tasks/crawler/class_BaseUrlSourceTask.php b/application/hub/main/tasks/crawler/class_BaseUrlSourceTask.php new file mode 100644 index 000000000..9399ae89c --- /dev/null +++ b/application/hub/main/tasks/crawler/class_BaseUrlSourceTask.php @@ -0,0 +1,51 @@ + + * @version 0.0.0 + * @copyright Copyright (c) 2007, 2008 Roland Haeder, 2009 - 2014 Hub Developer Team + * @license GNU GPL 3.0 or any newer version + * @link http://www.shipsimu.org + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +class BaseUrlSourceTask extends BaseTask { + /** + * Protected constructor + * + * @param $className Name of the class + * @return void + */ + protected function __construct ($className) { + // Call parent constructor + parent::__construct($className); + } + + /** + * Initializes URL source task (to keep the constructor small) + * + * @return void + */ + protected function initUrlSourceTask () { + // Get source instance + $sourceInstance = UrlSourceObjectFactory::createUrlSourceInstance($this); + + // And set it here + $this->setUrlSourceInstance($sourceInstance); + } +} + +// [EOF] +?> diff --git a/application/hub/main/tasks/crawler/url_crawler/.htaccess b/application/hub/main/tasks/crawler/url_crawler/.htaccess new file mode 100644 index 000000000..3a4288278 --- /dev/null +++ b/application/hub/main/tasks/crawler/url_crawler/.htaccess @@ -0,0 +1 @@ +Deny from all diff --git a/application/hub/main/tasks/crawler/url_crawler/local/.htaccess b/application/hub/main/tasks/crawler/url_crawler/local/.htaccess new file mode 100644 index 000000000..3a4288278 --- /dev/null +++ b/application/hub/main/tasks/crawler/url_crawler/local/.htaccess @@ -0,0 +1 @@ +Deny from all diff --git a/application/hub/main/tasks/crawler/url_crawler/local/class_CrawlerLocalUrlCrawlerTask.php b/application/hub/main/tasks/crawler/url_crawler/local/class_CrawlerLocalUrlCrawlerTask.php new file mode 100644 index 000000000..3b7e3c18e --- /dev/null +++ b/application/hub/main/tasks/crawler/url_crawler/local/class_CrawlerLocalUrlCrawlerTask.php @@ -0,0 +1,72 @@ + + * @version 0.0.0 + * @copyright Copyright (c) 2014 Crawler Developer Team + * @license GNU GPL 3.0 or any newer version + * @link http://www.ship-simu.org + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +class CrawlerLocalUrlCrawlerTask extends BaseTask implements Taskable, Visitable { + /** + * Protected constructor + * + * @return void + */ + protected function __construct () { + // Call parent constructor + parent::__construct(__CLASS__); + } + + /** + * Creates an instance of this class + * + * @return $taskInstance An instance of a Visitable class + */ + public final static function createCrawlerLocalUrlCrawlerTask () { + // Get new instance + $taskInstance = new CrawlerLocalUrlCrawlerTask(); + + // Return the prepared instance + return $taskInstance; + } + + /** + * Accepts the visitor to process the visitor + * + * @param $visitorInstance An instance of a Visitor class + * @return void + * @todo Maybe visit some sub-objects + */ + public function accept (Visitor $visitorInstance) { + // Visit this task + $visitorInstance->visitTask($this); + } + + /** + * Executes the task + * + * @return void + * @todo 0% + */ + public function executeTask () { + $this->partialStub('Unimplemented task.'); + } +} + +// [EOF] +?> diff --git a/application/hub/main/tasks/crawler/url_crawler/remote/.htaccess b/application/hub/main/tasks/crawler/url_crawler/remote/.htaccess new file mode 100644 index 000000000..3a4288278 --- /dev/null +++ b/application/hub/main/tasks/crawler/url_crawler/remote/.htaccess @@ -0,0 +1 @@ +Deny from all diff --git a/application/hub/main/tasks/crawler/url_crawler/remote/class_CrawlerRemoteUrlCrawlerTask.php b/application/hub/main/tasks/crawler/url_crawler/remote/class_CrawlerRemoteUrlCrawlerTask.php new file mode 100644 index 000000000..465f1a305 --- /dev/null +++ b/application/hub/main/tasks/crawler/url_crawler/remote/class_CrawlerRemoteUrlCrawlerTask.php @@ -0,0 +1,72 @@ + + * @version 0.0.0 + * @copyright Copyright (c) 2014 Crawler Developer Team + * @license GNU GPL 3.0 or any newer version + * @link http://www.ship-simu.org + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +class CrawlerRemoteUrlCrawlerTask extends BaseTask implements Taskable, Visitable { + /** + * Protected constructor + * + * @return void + */ + protected function __construct () { + // Call parent constructor + parent::__construct(__CLASS__); + } + + /** + * Creates an instance of this class + * + * @return $taskInstance An instance of a Visitable class + */ + public final static function createCrawlerRemoteUrlCrawlerTask () { + // Get new instance + $taskInstance = new CrawlerRemoteUrlCrawlerTask(); + + // Return the prepared instance + return $taskInstance; + } + + /** + * Accepts the visitor to process the visitor + * + * @param $visitorInstance An instance of a Visitor class + * @return void + * @todo Maybe visit some sub-objects + */ + public function accept (Visitor $visitorInstance) { + // Visit this task + $visitorInstance->visitTask($this); + } + + /** + * Executes the task + * + * @return void + * @todo 0% + */ + public function executeTask () { + $this->partialStub('Unimplemented task.'); + } +} + +// [EOF] +?> diff --git a/application/hub/main/tasks/crawler/url_getter/.htaccess b/application/hub/main/tasks/crawler/url_getter/.htaccess deleted file mode 100644 index 3a4288278..000000000 --- a/application/hub/main/tasks/crawler/url_getter/.htaccess +++ /dev/null @@ -1 +0,0 @@ -Deny from all diff --git a/application/hub/main/tasks/crawler/url_getter/local/.htaccess b/application/hub/main/tasks/crawler/url_getter/local/.htaccess deleted file mode 100644 index 3a4288278..000000000 --- a/application/hub/main/tasks/crawler/url_getter/local/.htaccess +++ /dev/null @@ -1 +0,0 @@ -Deny from all diff --git a/application/hub/main/tasks/crawler/url_getter/local/class_CrawlerLocalUrlGetterTask.php b/application/hub/main/tasks/crawler/url_getter/local/class_CrawlerLocalUrlGetterTask.php deleted file mode 100644 index e3ecc6c9c..000000000 --- a/application/hub/main/tasks/crawler/url_getter/local/class_CrawlerLocalUrlGetterTask.php +++ /dev/null @@ -1,72 +0,0 @@ - - * @version 0.0.0 - * @copyright Copyright (c) 2014 Crawler Developer Team - * @license GNU GPL 3.0 or any newer version - * @link http://www.ship-simu.org - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ -class CrawlerLocalUrlGetterTask extends BaseTask implements Taskable, Visitable { - /** - * Protected constructor - * - * @return void - */ - protected function __construct () { - // Call parent constructor - parent::__construct(__CLASS__); - } - - /** - * Creates an instance of this class - * - * @return $taskInstance An instance of a Visitable class - */ - public final static function createCrawlerLocalUrlGetterTask () { - // Get new instance - $taskInstance = new CrawlerLocalUrlGetterTask(); - - // Return the prepared instance - return $taskInstance; - } - - /** - * Accepts the visitor to process the visitor - * - * @param $visitorInstance An instance of a Visitor class - * @return void - * @todo Maybe visit some sub-objects - */ - public function accept (Visitor $visitorInstance) { - // Visit this task - $visitorInstance->visitTask($this); - } - - /** - * Executes the task - * - * @return void - * @todo 0% - */ - public function executeTask () { - $this->partialStub('Unimplemented task.'); - } -} - -// [EOF] -?> diff --git a/application/hub/main/tasks/crawler/url_getter/remote/.htaccess b/application/hub/main/tasks/crawler/url_getter/remote/.htaccess deleted file mode 100644 index 3a4288278..000000000 --- a/application/hub/main/tasks/crawler/url_getter/remote/.htaccess +++ /dev/null @@ -1 +0,0 @@ -Deny from all diff --git a/application/hub/main/tasks/crawler/url_getter/remote/class_CrawlerRemoteUrlGetterTask.php b/application/hub/main/tasks/crawler/url_getter/remote/class_CrawlerRemoteUrlGetterTask.php deleted file mode 100644 index 9dc5b9d88..000000000 --- a/application/hub/main/tasks/crawler/url_getter/remote/class_CrawlerRemoteUrlGetterTask.php +++ /dev/null @@ -1,72 +0,0 @@ - - * @version 0.0.0 - * @copyright Copyright (c) 2014 Crawler Developer Team - * @license GNU GPL 3.0 or any newer version - * @link http://www.ship-simu.org - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ -class CrawlerRemoteUrlGetterTask extends BaseTask implements Taskable, Visitable { - /** - * Protected constructor - * - * @return void - */ - protected function __construct () { - // Call parent constructor - parent::__construct(__CLASS__); - } - - /** - * Creates an instance of this class - * - * @return $taskInstance An instance of a Visitable class - */ - public final static function createCrawlerRemoteUrlGetterTask () { - // Get new instance - $taskInstance = new CrawlerRemoteUrlGetterTask(); - - // Return the prepared instance - return $taskInstance; - } - - /** - * Accepts the visitor to process the visitor - * - * @param $visitorInstance An instance of a Visitor class - * @return void - * @todo Maybe visit some sub-objects - */ - public function accept (Visitor $visitorInstance) { - // Visit this task - $visitorInstance->visitTask($this); - } - - /** - * Executes the task - * - * @return void - * @todo 0% - */ - public function executeTask () { - $this->partialStub('Unimplemented task.'); - } -} - -// [EOF] -?> diff --git a/application/hub/main/tasks/crawler/url_source/class_CrawlerUrlSource b/application/hub/main/tasks/crawler/url_source/class_CrawlerUrlSource index b62a9df06..284cf5d32 100644 --- a/application/hub/main/tasks/crawler/url_source/class_CrawlerUrlSource +++ b/application/hub/main/tasks/crawler/url_source/class_CrawlerUrlSource @@ -21,7 +21,7 @@ * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ -class CrawlerUrlSource???Task extends BaseTask implements Taskable, Visitable { +class CrawlerUrlSource???Task extends BaseUrlSourceTask implements Taskable, Visitable { /** * Protected constructor * @@ -63,8 +63,20 @@ class CrawlerUrlSource???Task extends BaseTask implements Taskable, Visitable { * @return void */ public function executeTask () { - // Get the URL source instance and announce us - UrlSourceObjectFactory::createUrlSourceInstance($this)->processStack(); + // Get source instance + $sourceInstance = $this->getUrlSourceInstance(); + + // Is it not set? + if (is_null($sourceInstance)) { + // Initialize it + $this->initUrlSourceTask(); + + // And re-get it + $sourceInstance = $this->getUrlSourceInstance(); + } // END - if + + // Get the URL source instance and fill the stack with crawl entries + $sourceInstance->fillUrlStack(); } } diff --git a/application/hub/main/tasks/crawler/url_source/class_CrawlerUrlSourceFoundRssTask.php b/application/hub/main/tasks/crawler/url_source/class_CrawlerUrlSourceFoundRssTask.php index 14236c6ff..823ba3336 100644 --- a/application/hub/main/tasks/crawler/url_source/class_CrawlerUrlSourceFoundRssTask.php +++ b/application/hub/main/tasks/crawler/url_source/class_CrawlerUrlSourceFoundRssTask.php @@ -21,7 +21,7 @@ * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ -class CrawlerUrlSourceFoundRssTask extends BaseTask implements Taskable, Visitable { +class CrawlerUrlSourceFoundRssTask extends BaseUrlSourceTask implements Taskable, Visitable { /** * Protected constructor * @@ -63,8 +63,20 @@ class CrawlerUrlSourceFoundRssTask extends BaseTask implements Taskable, Visitab * @return void */ public function executeTask () { - // Get the URL source instance and announce us - UrlSourceObjectFactory::createUrlSourceInstance($this)->processStack(); + // Get source instance + $sourceInstance = $this->getUrlSourceInstance(); + + // Is it not set? + if (is_null($sourceInstance)) { + // Initialize it + $this->initUrlSourceTask(); + + // And re-get it + $sourceInstance = $this->getUrlSourceInstance(); + } // END - if + + // Get the URL source instance and fill the stack with crawl entries + $sourceInstance->fillUrlStack(); } } diff --git a/application/hub/main/tasks/crawler/url_source/class_CrawlerUrlSourceLocalStartTask.php b/application/hub/main/tasks/crawler/url_source/class_CrawlerUrlSourceLocalStartTask.php index 9fdb71d1d..eb2839eb9 100644 --- a/application/hub/main/tasks/crawler/url_source/class_CrawlerUrlSourceLocalStartTask.php +++ b/application/hub/main/tasks/crawler/url_source/class_CrawlerUrlSourceLocalStartTask.php @@ -21,7 +21,7 @@ * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ -class CrawlerUrlSourceLocalStartTask extends BaseTask implements Taskable, Visitable { +class CrawlerUrlSourceLocalStartTask extends BaseUrlSourceTask implements Taskable, Visitable { /** * Protected constructor * @@ -63,8 +63,20 @@ class CrawlerUrlSourceLocalStartTask extends BaseTask implements Taskable, Visit * @return void */ public function executeTask () { - // Get the URL source instance and announce us - UrlSourceObjectFactory::createUrlSourceInstance($this)->processStack(); + // Get source instance + $sourceInstance = $this->getUrlSourceInstance(); + + // Is it not set? + if (is_null($sourceInstance)) { + // Initialize it + $this->initUrlSourceTask(); + + // And re-get it + $sourceInstance = $this->getUrlSourceInstance(); + } // END - if + + // Get the URL source instance and fill the stack with crawl entries + $sourceInstance->fillUrlStack(); } } diff --git a/application/hub/main/tasks/crawler/url_source/class_CrawlerUrlSourceRssStartTask.php b/application/hub/main/tasks/crawler/url_source/class_CrawlerUrlSourceRssStartTask.php index 413c7ad5d..c414ce2bc 100644 --- a/application/hub/main/tasks/crawler/url_source/class_CrawlerUrlSourceRssStartTask.php +++ b/application/hub/main/tasks/crawler/url_source/class_CrawlerUrlSourceRssStartTask.php @@ -21,7 +21,7 @@ * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ -class CrawlerUrlSourceRssStartTask extends BaseTask implements Taskable, Visitable { +class CrawlerUrlSourceRssStartTask extends BaseUrlSourceTask implements Taskable, Visitable { /** * Protected constructor * @@ -63,8 +63,20 @@ class CrawlerUrlSourceRssStartTask extends BaseTask implements Taskable, Visitab * @return void */ public function executeTask () { - // Get the URL source instance and announce us - UrlSourceObjectFactory::createUrlSourceInstance($this)->processStack(); + // Get source instance + $sourceInstance = $this->getUrlSourceInstance(); + + // Is it not set? + if (is_null($sourceInstance)) { + // Initialize it + $this->initUrlSourceTask(); + + // And re-get it + $sourceInstance = $this->getUrlSourceInstance(); + } // END - if + + // Get the URL source instance and fill the stack with crawl entries + $sourceInstance->fillUrlStack(); } } diff --git a/application/hub/main/tasks/crawler/url_source/class_CrawlerUrlSourceUploadedListTask.php b/application/hub/main/tasks/crawler/url_source/class_CrawlerUrlSourceUploadedListTask.php index 7330dda2d..50875ab1e 100644 --- a/application/hub/main/tasks/crawler/url_source/class_CrawlerUrlSourceUploadedListTask.php +++ b/application/hub/main/tasks/crawler/url_source/class_CrawlerUrlSourceUploadedListTask.php @@ -21,7 +21,7 @@ * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ -class CrawlerUrlSourceUploadedListTask extends BaseTask implements Taskable, Visitable { +class CrawlerUrlSourceUploadedListTask extends BaseUrlSourceTask implements Taskable, Visitable { /** * Protected constructor * @@ -63,8 +63,20 @@ class CrawlerUrlSourceUploadedListTask extends BaseTask implements Taskable, Vis * @return void */ public function executeTask () { - // Get the URL source instance and announce us - UrlSourceObjectFactory::createUrlSourceInstance($this)->processStack(); + // Get source instance + $sourceInstance = $this->getUrlSourceInstance(); + + // Is it not set? + if (is_null($sourceInstance)) { + // Initialize it + $this->initUrlSourceTask(); + + // And re-get it + $sourceInstance = $this->getUrlSourceInstance(); + } // END - if + + // Get the URL source instance and fill the stack with crawl entries + $sourceInstance->fillUrlStack(); } } diff --git a/docs/TODOs.txt b/docs/TODOs.txt index 746e1f2b9..9f17dbc70 100644 --- a/docs/TODOs.txt +++ b/docs/TODOs.txt @@ -1,7 +1,7 @@ ### WARNING: THIS FILE IS AUTO-GENERATED BY ./todo-builder.sh ### ### DO NOT EDIT THIS FILE. ### -./application/hub/config.php:775:// @TODO This and the next value is very static again -./application/hub/config.php:839:// @TODO This is very static, rewrite it to more flexible +./application/hub/config.php:772:// @TODO This and the next value is very static again +./application/hub/config.php:836:// @TODO This is very static, rewrite it to more flexible ./application/hub/interfaces/apt-proxy/class_AptProxy.php:10: * @todo We need to find a better name for this interface ./application/hub/interfaces/blocks/class_Minable.php:10: * @todo We need to find a better name for this interface ./application/hub/interfaces/chat/class_Chatter.php:10: * @todo We need to find a better name for this interface @@ -15,7 +15,7 @@ ./application/hub/interfaces/wrapper/class_NodeDhtWrapper.php:122: * @todo Add minimum/maximum age limitations ./application/hub/interfaces/wrapper/class_NodeDhtWrapper.php:132: * @todo Add timestamp to dataset instance ./application/hub/main/chains/class_PackageFilterChain.php:54: * @todo This may be slow if a message with a lot tags arrived -./application/hub/main/class_BaseHubSystem.php:577: // @TODO On some systems it is 134, on some 107? +./application/hub/main/class_BaseHubSystem.php:604: // @TODO On some systems it is 134, on some 107? ./application/hub/main/commands/console/class_HubConsoleAptProxyCommand.php:107: * @todo Should we add some more filters? ./application/hub/main/commands/console/class_HubConsoleAptProxyCommand.php:58: * @todo Try to create a AptProxyActivationTask or so ./application/hub/main/commands/console/class_HubConsoleChatCommand.php:107: * @todo Should we add some more filters? @@ -40,9 +40,11 @@ ./application/hub/main/dht/class_BaseDht.php:253: * @todo Switch flag 'accept_bootstrap' ./application/hub/main/dht/class_BaseDht.php:86: * @todo Find more to do here ./application/hub/main/dht/node/class_NodeDhtFacade.php:61: * @todo Does this data need to be enriched with more meta data? -./application/hub/main/discovery/protocol/class_ProtocolDiscovery.php:71: // @TODO Add some validation here??? +./application/hub/main/discovery/protocol/class_ProtocolDiscovery.php:94: // @TODO Add some validation here??? +./application/hub/main/discovery/recipient/package/class_PackageRecipientDiscovery.php:115: // @TODO Unfinished: $this->getListInstance()->addEntry('unl', $decodedData[NetworkPackage::PACKAGE_DATA_RECIPIENT]); ./application/hub/main/discovery/recipient/package/class_PackageRecipientDiscovery.php:86: * @todo Add some validation of recipient field, e.g. an Universal Node Locator is found -./application/hub/main/discovery/recipient/package/class_PackageRecipientDiscovery.php:87: * @todo The if() does only check for TCP, not UDP, e.g. try to get a $handlerInstance here +./application/hub/main/discovery/recipient/package/class_PackageRecipientDiscovery.php:87: * @todo Enrich both messages with recipient data +./application/hub/main/discovery/recipient/socket/class_PackageSocketDiscovery.php:159: // @TODO FIXME: I don't like these abuse of variables, better strict types ./application/hub/main/factories/handler/class_ProtocolHandlerFactory.php:10: * @todo Unfinished stuff ./application/hub/main/factories/socket/class_SocketFactory.php:10: * @todo Find an interface for hub helper ./application/hub/main/filter/apt-proxy/class_AptProxyInitializationFilter.php:54: * @todo 0% done @@ -91,18 +93,20 @@ ./application/hub/main/handler/message-types/self-connect/class_NodeMessageSelfConnectHandler.php:71: // @TODO Throw an exception here instead of dying ./application/hub/main/handler/network/class_BaseRawDataHandler.php:148: * @todo This method will be moved to a better place ./application/hub/main/handler/network/udp/class_UdpRawDataHandler.php:58: * @todo 0% +./application/hub/main/handler/protocol/class_BaseProtocolHandler.php:110: * @TODO If you know why, please fix and explain it to me. ./application/hub/main/handler/tasks/class_TaskHandler.php:139: // @TODO Messurement can be added around this call ./application/hub/main/helper/class_BaseHubSystemHelper.php:87: * @todo 0% done -./application/hub/main/helper/connection/class_BaseConnectionHelper.php:204: // @TODO Move this to the socket error handler -./application/hub/main/helper/connection/class_BaseConnectionHelper.php:232: * @todo Rewrite the while() loop to a iterator to not let the software stay very long here -./application/hub/main/helper/connection/tcp/class_TcpConnectionHelper.php:10: * @todo Find an interface for hub helper -./application/hub/main/helper/connection/tcp/class_TcpConnectionHelper.php:110: // @TODO Rewrite this test for UNLs -./application/hub/main/helper/connection/tcp/class_TcpConnectionHelper.php:117: // @TODO Rewrite this test for UNLs -./application/hub/main/helper/connection/tcp/class_TcpConnectionHelper.php:147: * @todo We may want to implement a filter for ease notification of other objects like our pool -./application/hub/main/helper/connection/tcp/class_TcpConnectionHelper.php:49: * @todo $errorCode/-Message are now in handleSocketError()'s call-back methods -./application/hub/main/helper/connection/tcp/class_TcpConnectionHelper.php:89: // @TODO The whole resolving part should be moved out and made more configurable -./application/hub/main/helper/connection/udp/class_UdpConnectionHelper.php:10: * @todo Find an interface for hub helper -./application/hub/main/helper/connection/udp/class_UdpConnectionHelper.php:56: * @todo Implement a filter for ease notification of other objects like the pool +./application/hub/main/helper/connection/ipv4/class_BaseIpV4ConnectionHelper.php:105: * @todo Rewrite the while() loop to a iterator to not let the software stay very long here +./application/hub/main/helper/connection/ipv4/class_BaseIpV4ConnectionHelper.php:10: * @todo Find an interface for hub helper +./application/hub/main/helper/connection/ipv4/class_BaseIpV4ConnectionHelper.php:77: // @TODO Move this to the socket error handler +./application/hub/main/helper/connection/ipv4/tcp/class_TcpConnectionHelper.php:10: * @todo Find an interface for hub helper +./application/hub/main/helper/connection/ipv4/tcp/class_TcpConnectionHelper.php:118: // @TODO Rewrite this test for UNLs +./application/hub/main/helper/connection/ipv4/tcp/class_TcpConnectionHelper.php:125: // @TODO Rewrite this test for UNLs +./application/hub/main/helper/connection/ipv4/tcp/class_TcpConnectionHelper.php:155: * @todo We may want to implement a filter for ease notification of other objects like our pool +./application/hub/main/helper/connection/ipv4/tcp/class_TcpConnectionHelper.php:49: * @todo $errorCode/-Message are now in handleSocketError()'s call-back methods +./application/hub/main/helper/connection/ipv4/tcp/class_TcpConnectionHelper.php:89: // @TODO The whole resolving part should be moved out and made more configurable +./application/hub/main/helper/connection/ipv4/udp/class_UdpConnectionHelper.php:10: * @todo Find an interface for hub helper +./application/hub/main/helper/connection/ipv4/udp/class_UdpConnectionHelper.php:56: * @todo Implement a filter for ease notification of other objects like the pool ./application/hub/main/helper/dht/class_DhtBootstrapHelper.php:10: * @todo Find an interface for hub helper ./application/hub/main/helper/dht/class_DhtPublishEntryHelper.php:10: * @todo Find an interface for hub helper ./application/hub/main/helper/node/announcement/class_NodeAnnouncementHelper.php:10: * @todo Find an interface for hub helper @@ -123,7 +127,7 @@ ./application/hub/main/iterator/pool/tasks/class_TaskPoolIterator.php:11: * @todo latency-based iteration or similar approaches ./application/hub/main/listener/tcp/class_TcpListener.php:252: // @TODO Does this work on Windozer boxes??? ./application/hub/main/listener/udp/class_UdpListener.php:153: * @todo ~50% done -./application/hub/main/lists/class_BaseList.php:276: // @TODO Extend this somehow? +./application/hub/main/lists/class_BaseList.php:305: // @TODO Extend this somehow? ./application/hub/main/lists/groups/class_ListGroupList.php:61: * @todo 0% done ./application/hub/main/miner/chash/class_HubChashMiner.php:108: * @todo Implement this method ./application/hub/main/miner/chash/class_HubChashMiner.php:138: * @todo 0% done @@ -136,8 +140,8 @@ ./application/hub/main/nodes/class_BaseHubNode.php:432: * @todo Change the first if() block to check for a specific state ./application/hub/main/nodes/class_BaseHubNode.php:638: * @todo Add checking if this node has been announced to the sender node ./application/hub/main/nodes/class_BaseHubNode.php:658: * @todo Add checking if this node has been announced to the sender node -./application/hub/main/nodes/class_BaseHubNode.php:761: * @todo Find more to do here -./application/hub/main/nodes/class_BaseHubNode.php:774: * @todo Handle thrown exception +./application/hub/main/nodes/class_BaseHubNode.php:763: * @todo Find more to do here +./application/hub/main/nodes/class_BaseHubNode.php:776: * @todo Handle thrown exception ./application/hub/main/nodes/list/class_HubListNode.php:58: * @todo Implement more bootstrap steps ./application/hub/main/nodes/list/class_HubListNode.php:79: // @TODO Add some filters here ./application/hub/main/nodes/list/class_HubListNode.php:88: * @todo 0% done @@ -147,14 +151,14 @@ ./application/hub/main/nodes/regular/class_HubRegularNode.php:58: * @todo Implement this method ./application/hub/main/nodes/regular/class_HubRegularNode.php:79: // @TODO Add some filters here ./application/hub/main/nodes/regular/class_HubRegularNode.php:88: * @todo 0% done -./application/hub/main/package/class_NetworkPackage.php:1150: * @todo This may be enchanced for outgoing packages? -./application/hub/main/package/class_NetworkPackage.php:1181: * @todo Unsupported feature of "signed" messages commented out -./application/hub/main/package/class_NetworkPackage.php:1270: * @todo Implement verification of all sent tags here? +./application/hub/main/package/class_NetworkPackage.php:1167: * @todo This may be enchanced for outgoing packages? +./application/hub/main/package/class_NetworkPackage.php:1198: * @todo Unsupported feature of "signed" messages commented out +./application/hub/main/package/class_NetworkPackage.php:1287: * @todo Implement verification of all sent tags here? ./application/hub/main/package/class_NetworkPackage.php:23: * @todo Needs to add functionality for handling the object's type ./application/hub/main/package/class_NetworkPackage.php:338: // @TODO md5() is very weak, but it needs to be fast ./application/hub/main/package/class_NetworkPackage.php:412: // @TODO md5() is very weak, but it needs to be fast -./application/hub/main/package/class_NetworkPackage.php:578: // @TODO We may want to do somthing more here? -./application/hub/main/package/class_NetworkPackage.php:613: * @todo Unfinished area, signatures are currently NOT fully supported +./application/hub/main/package/class_NetworkPackage.php:595: // @TODO We may want to do somthing more here? +./application/hub/main/package/class_NetworkPackage.php:630: * @todo Unfinished area, signatures are currently NOT fully supported ./application/hub/main/package/fragmenter/class_PackageFragmenter.php:275: * @todo Implement a way to send non-announcement packages with extra-salt ./application/hub/main/package/fragmenter/class_PackageFragmenter.php:370: // @TODO This assert broke packages where the hash chunk was very large: assert(strlen($rawData) <= NetworkPackage::TCP_PACKAGE_SIZE); ./application/hub/main/package/fragmenter/class_PackageFragmenter.php:441: * @todo $helperInstance is unused @@ -168,14 +172,13 @@ ./application/hub/main/producer/miner/blocks/class_MinerTestGenesisBlockProducer.php:86: * @todo ~5% done ./application/hub/main/recipient/dht/class_DhtRecipient.php:76: // @TODO Unfinished ./application/hub/main/recipient/self/class_SelfRecipient.php:61: // @TODO Add more checks on data -./application/hub/main/registry/socket/class_SocketRegistry.php:75: // @TODO Tested again base class, rewrite it to a generic interface! ./application/hub/main/resolver/protocol/tcp/class_TcpProtocolResolver.php:57: * @todo 0% done ./application/hub/main/resolver/state/peer/class_PeerStateResolver.php:59: * @todo ~30% done ./application/hub/main/scanner/crawler/uploaded_list/class_CrawlerUploadedListScanner.php:52: * @todo 0% done -./application/hub/main/source/urls/class_CrawlerFoundRssUrlSource.php:55: * @todo ~10% done -./application/hub/main/source/urls/class_CrawlerLocalStartUrlSource.php:55: * @todo ~10% done -./application/hub/main/source/urls/class_CrawlerRssStartUrlSource.php:55: * @todo ~10% done -./application/hub/main/source/urls/class_CrawlerUploadedListUrlSource.php:196: * @todo ~20% done +./application/hub/main/source/urls/class_CrawlerFoundRssUrlSource.php:55: * @todo 0% done +./application/hub/main/source/urls/class_CrawlerLocalStartUrlSource.php:55: * @todo 0% done +./application/hub/main/source/urls/class_CrawlerRssStartUrlSource.php:55: * @todo 0% done +./application/hub/main/source/urls/class_CrawlerUploadedListUrlSource.php:327: * @todo ~40% done ./application/hub/main/states/communicator/init/class_CommunicatorInitState.php:60: * @todo 0% done? ./application/hub/main/states/crawler/active/class_CrawlerActiveState.php:60: * @todo 0% done ./application/hub/main/states/crawler/booting/class_CrawlerBootingState.php:60: * @todo 0% done @@ -204,10 +207,10 @@ ./application/hub/main/tasks/crawler/snippet_extractor/class_CrawlerSnippetExtractorTask.php:64: * @todo 0% ./application/hub/main/tasks/crawler/structure_analyzer/class_CrawlerStructureAnalyzerTask.php:53: * @todo Maybe visit some sub-objects ./application/hub/main/tasks/crawler/structure_analyzer/class_CrawlerStructureAnalyzerTask.php:64: * @todo 0% -./application/hub/main/tasks/crawler/url_getter/local/class_CrawlerLocalUrlGetterTask.php:53: * @todo Maybe visit some sub-objects -./application/hub/main/tasks/crawler/url_getter/local/class_CrawlerLocalUrlGetterTask.php:64: * @todo 0% -./application/hub/main/tasks/crawler/url_getter/remote/class_CrawlerRemoteUrlGetterTask.php:53: * @todo Maybe visit some sub-objects -./application/hub/main/tasks/crawler/url_getter/remote/class_CrawlerRemoteUrlGetterTask.php:64: * @todo 0% +./application/hub/main/tasks/crawler/url_crawler/local/class_CrawlerLocalUrlCrawlerTask.php:53: * @todo Maybe visit some sub-objects +./application/hub/main/tasks/crawler/url_crawler/local/class_CrawlerLocalUrlCrawlerTask.php:64: * @todo 0% +./application/hub/main/tasks/crawler/url_crawler/remote/class_CrawlerRemoteUrlCrawlerTask.php:53: * @todo Maybe visit some sub-objects +./application/hub/main/tasks/crawler/url_crawler/remote/class_CrawlerRemoteUrlCrawlerTask.php:64: * @todo 0% ./application/hub/main/tasks/crawler/url_source/class_CrawlerUrlSourceFoundRssTask.php:53: * @todo Maybe visit some sub-objects ./application/hub/main/tasks/crawler/url_source/class_CrawlerUrlSourceLocalStartTask.php:53: * @todo Maybe visit some sub-objects ./application/hub/main/tasks/crawler/url_source/class_CrawlerUrlSourceRssStartTask.php:53: * @todo Maybe visit some sub-objects @@ -248,8 +251,8 @@ ./application/hub/main/template/requests/class_XmlRequestNodeListTemplateEngine.php:10: * @todo This template engine does not make use of setTemplateType() ./application/hub/main/template/requests/class_XmlRequestNodeListTemplateEngine.php:74: * @todo Find something useful with this! ./application/hub/main/tools/class_HubTools.php:158: // @TODO ((25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9])\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[0-9])):([0-9]{3,5}) -./application/hub/main/tools/class_HubTools.php:248: // @TODO Find a better validation than empty() -./application/hub/main/tools/class_HubTools.php:276: // @TODO Find a better validation than empty() +./application/hub/main/tools/class_HubTools.php:263: // @TODO Find a better validation than empty() +./application/hub/main/tools/class_HubTools.php:291: // @TODO Find a better validation than empty() ./application/hub/main/wrapper/node/class_NodeDistributedHashTableDatabaseWrapper.php:163: // @TODO Bad check on UNL, better use a proper validator ./application/hub/main/wrapper/node/class_NodeDistributedHashTableDatabaseWrapper.php:209: // @TODO Bad check on UNL, better use a proper validator ./application/hub/main/wrapper/node/class_NodeDistributedHashTableDatabaseWrapper.php:442: // @TODO Unimplemented part @@ -257,16 +260,17 @@ ./application/hub/main/wrapper/node/class_NodeDistributedHashTableDatabaseWrapper.php:540: * @todo Add timestamp to dataset instance ./application/hub/main/wrapper/states/class_PeerStateLookupDatabaseWrapper.php:174: * @todo Unfinished area ./application/hub/main/wrapper/states/class_PeerStateLookupDatabaseWrapper.php:216: * @todo Unfinished area +./core/inc/classes.php:10: * @todo Minimize these includes ./core/inc/classes/exceptions/main/class_MissingMethodException.php:13: * @todo Try to rewrite user/guest login classes and mark this exception as deprecated ./core/inc/classes/exceptions/main/class_NoConfigEntryException.php:10: * @todo Rename this class to NoFoundEntryException ./core/inc/classes/interfaces/class_FrameworkInterface.php:11: * @todo Find a better name for this interface ./core/inc/classes/interfaces/criteria/extended/class_LocalSearchCriteria.php:30: * @todo Find a nice casting here. (int) allows until and including 32766. ./core/inc/classes/interfaces/criteria/extended/class_LocalSearchCriteria.php:54: * @todo Find a nice casting here. (int) allows until and including 32766. -./core/inc/classes/main/class_BaseFrameworkSystem.php:1927: * @todo Write a logging mechanism for productive mode -./core/inc/classes/main/class_BaseFrameworkSystem.php:1942: // @TODO Finish this part! -./core/inc/classes/main/class_BaseFrameworkSystem.php:240: // @todo Try to clean these constants up -./core/inc/classes/main/class_BaseFrameworkSystem.php:465: // @TODO __CLASS__ does always return BaseFrameworkSystem but not the extending (=child) class -./core/inc/classes/main/class_BaseFrameworkSystem.php:539: * @todo SearchableResult and UpdateableResult shall have a super interface to use here +./core/inc/classes/main/class_BaseFrameworkSystem.php:1977: * @todo Write a logging mechanism for productive mode +./core/inc/classes/main/class_BaseFrameworkSystem.php:1992: // @TODO Finish this part! +./core/inc/classes/main/class_BaseFrameworkSystem.php:250: // @todo Try to clean these constants up +./core/inc/classes/main/class_BaseFrameworkSystem.php:475: // @TODO __CLASS__ does always return BaseFrameworkSystem but not the extending (=child) class +./core/inc/classes/main/class_BaseFrameworkSystem.php:549: * @todo SearchableResult and UpdateableResult shall have a super interface to use here ./core/inc/classes/main/commands/web/class_WebLoginAreaCommand.php:64: * @todo Add some stuff here: Some personal data, app/game related data ./core/inc/classes/main/commands/web/class_WebProblemCommand.php:58: * @todo 0% done ./core/inc/classes/main/commands/web/class_WebStatusCommand.php:58: * @todo 0% done @@ -287,13 +291,13 @@ ./core/inc/classes/main/controller/web/class_WebStatusController.php:10: * @todo This controller shall still provide some headlines for sidebars ./core/inc/classes/main/criteria/search/class_SearchCriteria.php:102: * @todo Find a nice casting here. (int) allows until and including 32766. ./core/inc/classes/main/criteria/search/class_SearchCriteria.php:70: * @todo Find a nice casting here. (int) allows until and including 32766. -./core/inc/classes/main/database/databases/class_LocalFileDatabase.php:327: * @todo Do some checks on the database directory and files here -./core/inc/classes/main/database/databases/class_LocalFileDatabase.php:616: * @todo Add more generic non-public data for removal +./core/inc/classes/main/database/backend/class_CachedLocalFileDatabase.php:327: * @todo Do some checks on the database directory and files here +./core/inc/classes/main/database/backend/class_CachedLocalFileDatabase.php:616: * @todo Add more generic non-public data for removal ./core/inc/classes/main/decorator/template/class_XmlRewriterTemplateDecorator.php:427: * @todo Find something useful with this! ./core/inc/classes/main/discovery/payment/class_LocalPaymentDiscovery.php:85: * @todo 0% done -./core/inc/classes/main/file_directories/class_BaseFileIo.php:162: * @todo Handle seekStatus ./core/inc/classes/main/file_directories/class_BaseFile.php:135: * @todo ~10% done? ./core/inc/classes/main/file_directories/class_BaseFile.php:148: * @todo Handle seekStatus +./core/inc/classes/main/file_directories/class_BaseFileIo.php:162: * @todo Handle seekStatus ./core/inc/classes/main/file_directories/directory/class_FrameworkDirectoryPointer.php:68: * @todo Get rid of inConstructor, could be old-lost code. ./core/inc/classes/main/file_directories/io_stream/class_FileIoStream.php:270: * @todo 0% done ./core/inc/classes/main/file_directories/io_stream/class_FileIoStream.php:74: * @todo This method needs heavy rewrite @@ -365,7 +369,6 @@ ./core/inc/classes/middleware/compressor/class_CompressorChannel.php:103: // @TODO Is there a configurable fall-back compressor needed, or is NullCompressor okay? ./core/inc/classes/middleware/debug/class_DebugMiddleware.php:113: // @TODO Initialization phase ./core/inc/classes/middleware/io/class_FileIoHandler.php:174: * @todo 0% done -./core/inc/classes.php:10: * @todo Minimize these includes ./core/inc/classes/third_party/api/wernisportal/class_WernisApi.php:10: * @todo Out-dated since 0.6-BETA ./core/inc/config/class_FrameworkConfiguration.php:115: * @todo This method encapsulates a deprecated PHP function and should be deprecated, too. ./core/inc/config/class_FrameworkConfiguration.php:223: * @todo We have to add some more entries from $_SERVER here @@ -377,16 +380,17 @@ ./core/inc/loader/class_ClassLoader.php:319: /* @TODO: Do not exit here. */ ./core/inc/output.php:11: * @todo Minimize these includes ./core/inc/selector.php:11: * @todo Minimize these includes +./core/index.php:43: * @todo This method is old code and needs heavy rewrite and should be moved to ApplicationHelper ./index.php:43: * @todo This method is old code and needs heavy rewrite and should be moved to ApplicationHelper ### ### DEPRECATION FOLLOWS: ### ### ./application/hub/main/nodes/class_BaseHubNode.php:46: * @deprecated +./core/inc/classes.php:9: * @deprecated ./core/inc/classes/exceptions/main/class_MissingMethodException.php:14: * @deprecated Please do no longer use this exception ./core/inc/classes/interfaces/database/backend/class_DatabaseFrontendInterface.php:2:// @DEPRECATED ./core/inc/classes/interfaces/database/frontend/class_DatabaseFrontendInterface.php:2:// @DEPRECATED ./core/inc/classes/main/database/class_BaseDatabaseFrontend.php:2:// @DEPRECATED ./core/inc/classes/main/handler/class_BaseHandler.php:2:// @DEPRECATED ./core/inc/classes/main/handler/raw_data/class_BaseRawDataHandler.php:2:// @DEPRECATED -./core/inc/classes.php:9: * @deprecated ./core/inc/database.php:10: * @deprecated ./core/inc/hooks.php:2:// @DEPRECATED ./core/inc/includes.php:10: * @deprecated