From 9c3e6306f211737d6d48dbb6a581a2afbe6f0ff2 Mon Sep 17 00:00:00 2001 From: Roland Haeder Date: Mon, 12 May 2014 21:21:19 +0200 Subject: [PATCH] Added crawler stuff: - Added scanner class/interface and task class. - Added a scanner class for looking for uploaded lists. They must be located in data/url_lists/ . - Rewritten both factories (scanner, source) to use createObjectByConfiguredName() instead of direct createObjectByName(). This allows to set another class if you don't like the original implementation. Signed-off-by: Roland Haeder --- application/hub/config.php | 42 +++++++++++ application/hub/interfaces/scanner/.htaccess | 1 + .../hub/interfaces/scanner/class_Scanner.php | 34 +++++++++ .../source/urls/class_UrlSource.php | 6 ++ .../hub/main/factories/scanner/.htaccess | 1 + .../scanner/class_ScannerObjectFactory.php | 72 +++++++++++++++++++ .../url/class_UrlSourceObjectFactory.php | 15 ++-- ...ss_CrawlerTaskHandlerInitializerFilter.php | 4 ++ application/hub/main/scanner/.htaccess | 1 + application/hub/main/scanner/class_ | 60 ++++++++++++++++ .../hub/main/scanner/class_BaseScanner.php | 38 ++++++++++ .../hub/main/scanner/crawler/.htaccess | 1 + .../hub/main/scanner/crawler/class_Crawler | 60 ++++++++++++++++ .../scanner/crawler/uploaded_list/.htaccess | 1 + .../class_CrawlerUploadedListScanner.php | 60 ++++++++++++++++ .../main/source/urls/class_CrawlerUrlSource | 10 +++ .../urls/class_CrawlerUrlSourceFoundRss.php | 10 +++ .../urls/class_CrawlerUrlSourceLocalStart.php | 10 +++ .../urls/class_CrawlerUrlSourceRssStart.php | 10 +++ .../class_CrawlerUrlSourceUploadedList.php | 10 +++ .../hub/main/tasks/crawler/scanner/.htaccess | 1 + .../main/tasks/crawler/scanner/class_Crawler | 72 +++++++++++++++++++ .../class_CrawlerUploadedListScannerTask.php | 72 +++++++++++++++++++ docs/TODOs.txt | 59 ++++++++++----- 24 files changed, 625 insertions(+), 25 deletions(-) create mode 100644 application/hub/interfaces/scanner/.htaccess create mode 100644 application/hub/interfaces/scanner/class_Scanner.php create mode 100644 application/hub/main/factories/scanner/.htaccess create mode 100644 application/hub/main/factories/scanner/class_ScannerObjectFactory.php create mode 100644 application/hub/main/scanner/.htaccess create mode 100644 application/hub/main/scanner/class_ create mode 100644 application/hub/main/scanner/class_BaseScanner.php create mode 100644 application/hub/main/scanner/crawler/.htaccess create mode 100644 application/hub/main/scanner/crawler/class_Crawler create mode 100644 application/hub/main/scanner/crawler/uploaded_list/.htaccess create mode 100644 application/hub/main/scanner/crawler/uploaded_list/class_CrawlerUploadedListScanner.php create mode 100644 application/hub/main/tasks/crawler/scanner/.htaccess create mode 100644 application/hub/main/tasks/crawler/scanner/class_Crawler create mode 100644 application/hub/main/tasks/crawler/scanner/class_CrawlerUploadedListScannerTask.php diff --git a/application/hub/config.php b/application/hub/config.php index ccd06b2e4..8536140c1 100644 --- a/application/hub/config.php +++ b/application/hub/config.php @@ -983,9 +983,24 @@ $cfg->setConfigEntry('task_miner_test_genesis_block_producer_interval_delay', 10 // CFG: TASK-MINER-TEST-GENESIC-BLOCK-PRODUCER-MAX-RUNS $cfg->setConfigEntry('task_miner_test_genesis_block_producer_max_runs', 1); +// CFG: MINER_NODE-COMMUNICATOR-TASK-CLASS +$cfg->setConfigEntry('miner_node_communicator_task_class', 'MinerNodeCommunicatorTask'); + +// CFG: TASK-MINER_NODE-COMMUNICATOR-STARTUP-DELAY +$cfg->setConfigEntry('task_miner_node_communicator_startup_delay', 500); + +// CFG: TASK-MINER_NODE-COMMUNICATOR-INTERVAL-DELAY +$cfg->setConfigEntry('task_miner_node_communicator_interval_delay', 250); + +// CFG: TASK-MINER_NODE-COMMUNICATOR-MAX-RUNS +$cfg->setConfigEntry('task_miner_node_communicator_max_runs', 0); + // CFG: MINER-TEST-GENESIS-BLOCK-PRODUCER-CLASS $cfg->setConfigEntry('miner_test_genesis_block_producer_class', 'MinerTestGenesisBlockProducer'); +// CFG: MINER-NODE-COMMUNICATOR-CLASS +$cfg->setConfigEntry('miner_node_communicator_class', 'MinerNodeCommunicator'); + /////////////////////////////////////////////////////////////////////////////// // Cruncher configuration /////////////////////////////////////////////////////////////////////////////// @@ -1217,6 +1232,21 @@ $cfg->setConfigEntry('crawler_booting_state_class', 'CrawlerBootingState'); // CFG: COMMUNICATOR-INIT-STATE-CLASS $cfg->setConfigEntry('communicator_init_state_class', 'CommunicatorInitState'); +// CFG: CRAWLER-UPLOADED-LIST-SCANNER +$cfg->setConfigEntry('crawler_uploaded_list_scanner', 'CrawlerUploadedListScanner'); + +// CFG: CRAWLER-URL-SOURCE-LOCAL-START-CLASS +$cfg->setConfigEntry('crawler_url_source_local_start_class', 'CrawlerUrlSourceLocalStart'); + +// CFG: CRAWLER-URL-SOURCE-UPLOADED-LIST-CLASS +$cfg->setConfigEntry('crawler_url_source_uploaded_list_class', 'CrawlerUrlSourceUploadedList'); + +// CFG: CRAWLER-URL-SOURCE-RSS-START-CLASS +$cfg->setConfigEntry('crawler_url_source_rss_start_class', 'CrawlerUrlSourceRssStart'); + +// CFG: CRAWLER-URL-SOURCE-FOUND-RSS-CLASS +$cfg->setConfigEntry('crawler_url_source_found_rss_class', 'CrawlerUrlSourceFoundRss'); + // CFG: CRAWLER-NODE-COMMUNICATOR-TASK-CLASS $cfg->setConfigEntry('crawler_node_communicator_task_class', 'CrawlerNodeCommunicatorTask'); @@ -1373,6 +1403,18 @@ $cfg->setConfigEntry('task_crawler_url_source_found_rss_interval_delay', 150); // CFG: TASK-CRAWLER-URL-SOURCE-FOUND-RSS-MAX-RUNS $cfg->setConfigEntry('task_crawler_url_source_found_rss_max_runs', 0); +// CFG: CRAWLER-URL-SOURCE-FOUND-RSS-TASK-CLASS +$cfg->setConfigEntry('crawler_uploaded_list_scanner_task_class', 'CrawlerUploadedListScannerTask'); + +// CFG: TASK-CRAWLER-URL-SOURCE-FOUND-RSS-STARTUP-DELAY +$cfg->setConfigEntry('task_crawler_uploaded_list_scanner_startup_delay', 500); + +// CFG: TASK-CRAWLER-URL-SOURCE-FOUND-RSS-INTERVAL-DELAY +$cfg->setConfigEntry('task_crawler_uploaded_list_scanner_interval_delay', 1000); + +// CFG: TASK-CRAWLER-URL-SOURCE-FOUND-RSS-MAX-RUNS +$cfg->setConfigEntry('task_crawler_uploaded_list_scanner_max_runs', 0); + /////////////////////////////////////////////////////////////////////////////// // HTTP Configuration /////////////////////////////////////////////////////////////////////////////// diff --git a/application/hub/interfaces/scanner/.htaccess b/application/hub/interfaces/scanner/.htaccess new file mode 100644 index 000000000..3a4288278 --- /dev/null +++ b/application/hub/interfaces/scanner/.htaccess @@ -0,0 +1 @@ +Deny from all diff --git a/application/hub/interfaces/scanner/class_Scanner.php b/application/hub/interfaces/scanner/class_Scanner.php new file mode 100644 index 000000000..7d697a069 --- /dev/null +++ b/application/hub/interfaces/scanner/class_Scanner.php @@ -0,0 +1,34 @@ + + * @version 0.0.0 + * @copyright Copyright (c) 2007, 2008 Roland Haeder, 2009 - 2012 Hub Developer Team + * @license GNU GPL 3.0 or any newer version + * @link http://www.shipsimu.org + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +interface Scanner extends FrameworkInterface { + /** + * Runs the scanner (please no loops here) + * + * @return void + */ + function execute (); +} + +// [EOF] +?> diff --git a/application/hub/interfaces/source/urls/class_UrlSource.php b/application/hub/interfaces/source/urls/class_UrlSource.php index 03d51cac7..19bba42a4 100644 --- a/application/hub/interfaces/source/urls/class_UrlSource.php +++ b/application/hub/interfaces/source/urls/class_UrlSource.php @@ -22,6 +22,12 @@ * along with this program. If not, see . */ interface UrlSource extends Source { + /** + * Processes entries in the stack. + * + * @return void + */ + function processStack (); } // [EOF] diff --git a/application/hub/main/factories/scanner/.htaccess b/application/hub/main/factories/scanner/.htaccess new file mode 100644 index 000000000..3a4288278 --- /dev/null +++ b/application/hub/main/factories/scanner/.htaccess @@ -0,0 +1 @@ +Deny from all diff --git a/application/hub/main/factories/scanner/class_ScannerObjectFactory.php b/application/hub/main/factories/scanner/class_ScannerObjectFactory.php new file mode 100644 index 000000000..c92d3bc32 --- /dev/null +++ b/application/hub/main/factories/scanner/class_ScannerObjectFactory.php @@ -0,0 +1,72 @@ + + * @version 0.0.0 + * @copyright Copyright (c) 2007, 2008 Roland Haeder, 2009 - 2012 Hub Developer Team + * @license GNU GPL 3.0 or any newer version + * @link http://www.ship-simu.org + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +class ScannerObjectFactory extends ObjectFactory { + /** + * Protected constructor + * + * @return void + */ + protected function __construct () { + // Call parent constructor + parent::__construct(__CLASS__); + } + + /** + * Returns a singleton (registry-based) Scanner instance + * + * @param $taskInstance An instance of a class with an Taskable interface + * @return $sourceInstance An instance of a Scanner class + */ + public static final function createScannerInstance (Taskable $taskInstance) { + // Get new factory instance + $factoryInstance = new ScannerObjectFactory(); + + // Get task handler instance + $handlerInstance = Registry::getRegistry()->getInstance('task_handler'); + + // Get scanner type for given task instance by searching it in task handler + $scannerType = $handlerInstance->searchTask($taskInstance); + + // Only let registered tasks pass + assert(!empty($scannerType)); + + // If there is no handler? + if (Registry::getRegistry()->instanceExists('scanner_' . $scannerType)) { + // Get handler from registry + $scannerInstance = Registry::getRegistry()->getInstance('scanner_' . $scannerType); + } else { + // Get the scanner instance + $scannerInstance = ObjectFactory::createObjectByConfiguredName($scannerType); + + // Add it to the registry + Registry::getRegistry()->addInstance('scanner_' . $scannerType, $scannerInstance); + } + + // Return the instance + return $scannerInstance; + } +} + +// [EOF] +?> diff --git a/application/hub/main/factories/source/url/class_UrlSourceObjectFactory.php b/application/hub/main/factories/source/url/class_UrlSourceObjectFactory.php index 76ac83135..42b605629 100644 --- a/application/hub/main/factories/source/url/class_UrlSourceObjectFactory.php +++ b/application/hub/main/factories/source/url/class_UrlSourceObjectFactory.php @@ -45,25 +45,22 @@ class UrlSourceObjectFactory extends ObjectFactory { // Get task handler instance $handlerInstance = Registry::getRegistry()->getInstance('task_handler'); - // The default node-mode is from our configuration + // Get source type by looking for given task instance in task handler $sourceType = $handlerInstance->searchTask($taskInstance); // Only let registered tasks pass assert(!empty($sourceType)); // If there is no handler? - if (Registry::getRegistry()->instanceExists('node_' . $sourceType)) { + if (Registry::getRegistry()->instanceExists('source_' . $sourceType)) { // Get handler from registry - $sourceInstance = Registry::getRegistry()->getInstance('node_' . $sourceType); + $sourceInstance = Registry::getRegistry()->getInstance('source_' . $sourceType); } else { - // Now convert the source type into a class name - $className = $factoryInstance->convertToClassName($sourceType); - - // Get the node instance - $sourceInstance = ObjectFactory::createObjectByName($className); + // Get the source instance + $sourceInstance = ObjectFactory::createObjectByConfiguredName($sourceType . '_class'); // Add it to the registry - Registry::getRegistry()->addInstance('node_' . $sourceType, $sourceInstance); + Registry::getRegistry()->addInstance('source_' . $sourceType, $sourceInstance); } // Return the instance diff --git a/application/hub/main/filter/task/crawler/class_CrawlerTaskHandlerInitializerFilter.php b/application/hub/main/filter/task/crawler/class_CrawlerTaskHandlerInitializerFilter.php index 4ba0ed0b1..46a84ffef 100644 --- a/application/hub/main/filter/task/crawler/class_CrawlerTaskHandlerInitializerFilter.php +++ b/application/hub/main/filter/task/crawler/class_CrawlerTaskHandlerInitializerFilter.php @@ -117,6 +117,10 @@ class CrawlerTaskHandlerInitializerFilter extends BaseCrawlerFilter implements F $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_url_source_found_rss_task_class'); $handlerInstance->registerTask('crawler_url_source_found_rss', $taskInstance); + // 14) Uploaded list scanner (checks for wanted files) + $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_uploaded_list_scanner_task_class'); + $handlerInstance->registerTask('crawler_uploaded_list_scanner', $taskInstance); + // Put the task handler in registry Registry::getRegistry()->addInstance('task_handler', $handlerInstance); } diff --git a/application/hub/main/scanner/.htaccess b/application/hub/main/scanner/.htaccess new file mode 100644 index 000000000..3a4288278 --- /dev/null +++ b/application/hub/main/scanner/.htaccess @@ -0,0 +1 @@ +Deny from all diff --git a/application/hub/main/scanner/class_ b/application/hub/main/scanner/class_ new file mode 100644 index 000000000..6b8ba17d6 --- /dev/null +++ b/application/hub/main/scanner/class_ @@ -0,0 +1,60 @@ + + * @version 0.0.0 + * @copyright Copyright (c) 2007, 2008 Roland Haeder, 2009 - 2012 Hub Developer Team + * @license GNU GPL 3.0 or any newer version + * @link http://www.ship-simu.org + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +class ???Scanner extends BaseScanner implements Scanner, Registerable { + /** + * Protected constructor + * + * @return void + */ + protected function __construct () { + // Call parent constructor + parent::__construct(__CLASS__); + } + + /** + * Creates an instance of this class + * + * @return $scannerInstance An instance of a Scanner class + */ + public final static function create???Scanner () { + // Get new instance + $scannerInstance = new ???Scanner(); + + // Return the prepared instance + return $scannerInstance; + } + + /** + * Runs the scanner (please no loops here) + * + * @return void + * @todo 0% done + */ + public function execute () { + $this->partialStub('Please implement this method.'); + } +} + +// [EOF] +?> diff --git a/application/hub/main/scanner/class_BaseScanner.php b/application/hub/main/scanner/class_BaseScanner.php new file mode 100644 index 000000000..d56b387eb --- /dev/null +++ b/application/hub/main/scanner/class_BaseScanner.php @@ -0,0 +1,38 @@ + + * @version 0.0.0 + * @copyright Copyright (c) 2007, 2008 Roland Haeder, 2009 - 2012 Hub Developer Team + * @license GNU GPL 3.0 or any newer version + * @link http://www.ship-simu.org + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +class BaseScanner extends BaseHubSystem { + /** + * Protected constructor + * + * @param $className Name of the class + * @return void + */ + protected function __construct ($className) { + // Call parent constructor + parent::__construct($className); + } +} + +// [EOF] +?> diff --git a/application/hub/main/scanner/crawler/.htaccess b/application/hub/main/scanner/crawler/.htaccess new file mode 100644 index 000000000..3a4288278 --- /dev/null +++ b/application/hub/main/scanner/crawler/.htaccess @@ -0,0 +1 @@ +Deny from all diff --git a/application/hub/main/scanner/crawler/class_Crawler b/application/hub/main/scanner/crawler/class_Crawler new file mode 100644 index 000000000..d02f28cd4 --- /dev/null +++ b/application/hub/main/scanner/crawler/class_Crawler @@ -0,0 +1,60 @@ + + * @version 0.0.0 + * @copyright Copyright (c) 2014 Crawler Developer Team + * @license GNU GPL 3.0 or any newer version + * @link http://www.ship-simu.org + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +class Crawler???Scanner extends BaseScanner implements Scanner, Registerable { + /** + * Protected constructor + * + * @return void + */ + protected function __construct () { + // Call parent constructor + parent::__construct(__CLASS__); + } + + /** + * Creates an instance of this class + * + * @return $scannerInstance An instance of a Scanner class + */ + public final static function createCrawler???Scanner () { + // Get new instance + $scannerInstance = new Crawler???Scanner(); + + // Return the prepared instance + return $scannerInstance; + } + + /** + * Runs the scanner (please no loops here) + * + * @return void + * @todo 0% done + */ + public function execute () { + $this->partialStub('Please implement this method.'); + } +} + +// [EOF] +?> diff --git a/application/hub/main/scanner/crawler/uploaded_list/.htaccess b/application/hub/main/scanner/crawler/uploaded_list/.htaccess new file mode 100644 index 000000000..3a4288278 --- /dev/null +++ b/application/hub/main/scanner/crawler/uploaded_list/.htaccess @@ -0,0 +1 @@ +Deny from all diff --git a/application/hub/main/scanner/crawler/uploaded_list/class_CrawlerUploadedListScanner.php b/application/hub/main/scanner/crawler/uploaded_list/class_CrawlerUploadedListScanner.php new file mode 100644 index 000000000..1dfac0f40 --- /dev/null +++ b/application/hub/main/scanner/crawler/uploaded_list/class_CrawlerUploadedListScanner.php @@ -0,0 +1,60 @@ + + * @version 0.0.0 + * @copyright Copyright (c) 2014 Crawler Developer Team + * @license GNU GPL 3.0 or any newer version + * @link http://www.ship-simu.org + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +class CrawlerUploadedListScanner extends BaseScanner implements Scanner, Registerable { + /** + * Protected constructor + * + * @return void + */ + protected function __construct () { + // Call parent constructor + parent::__construct(__CLASS__); + } + + /** + * Creates an instance of this class + * + * @return $scannerInstance An instance of a Scanner class + */ + public final static function createCrawlerUploadedListScanner () { + // Get new instance + $scannerInstance = new CrawlerUploadedListScanner(); + + // Return the prepared instance + return $scannerInstance; + } + + /** + * Runs the scanner (please no loops here) + * + * @return void + * @todo 0% done + */ + public function execute () { + $this->partialStub('Please implement this method.'); + } +} + +// [EOF] +?> diff --git a/application/hub/main/source/urls/class_CrawlerUrlSource b/application/hub/main/source/urls/class_CrawlerUrlSource index eb9411024..c330af0a4 100644 --- a/application/hub/main/source/urls/class_CrawlerUrlSource +++ b/application/hub/main/source/urls/class_CrawlerUrlSource @@ -44,6 +44,16 @@ class CrawlerUrlSource??? extends BaseSource implements UrlSource, Registerable // Return the prepared instance return $sourceInstance; } + + /** + * Processes entries in the stack. + * + * @return void + * @todo 0% done + */ + public function processStack () { + $this->partialStub('Please implement this method.'); + } } // [EOF] diff --git a/application/hub/main/source/urls/class_CrawlerUrlSourceFoundRss.php b/application/hub/main/source/urls/class_CrawlerUrlSourceFoundRss.php index 6abf586a6..08a4595f6 100644 --- a/application/hub/main/source/urls/class_CrawlerUrlSourceFoundRss.php +++ b/application/hub/main/source/urls/class_CrawlerUrlSourceFoundRss.php @@ -44,6 +44,16 @@ class CrawlerUrlSourceFoundRss extends BaseSource implements UrlSource, Register // Return the prepared instance return $sourceInstance; } + + /** + * Processes entries in the stack. + * + * @return void + * @todo 0% done + */ + public function processStack () { + $this->partialStub('Please implement this method.'); + } } // [EOF] diff --git a/application/hub/main/source/urls/class_CrawlerUrlSourceLocalStart.php b/application/hub/main/source/urls/class_CrawlerUrlSourceLocalStart.php index f2b5e1557..67279be04 100644 --- a/application/hub/main/source/urls/class_CrawlerUrlSourceLocalStart.php +++ b/application/hub/main/source/urls/class_CrawlerUrlSourceLocalStart.php @@ -44,6 +44,16 @@ class CrawlerUrlSourceLocalStart extends BaseSource implements UrlSource, Regist // Return the prepared instance return $sourceInstance; } + + /** + * Processes entries in the stack. + * + * @return void + * @todo 0% done + */ + public function processStack () { + $this->partialStub('Please implement this method.'); + } } // [EOF] diff --git a/application/hub/main/source/urls/class_CrawlerUrlSourceRssStart.php b/application/hub/main/source/urls/class_CrawlerUrlSourceRssStart.php index c0f880ee7..7378c0c0b 100644 --- a/application/hub/main/source/urls/class_CrawlerUrlSourceRssStart.php +++ b/application/hub/main/source/urls/class_CrawlerUrlSourceRssStart.php @@ -44,6 +44,16 @@ class CrawlerUrlSourceRssStart extends BaseSource implements UrlSource, Register // Return the prepared instance return $sourceInstance; } + + /** + * Processes entries in the stack. + * + * @return void + * @todo 0% done + */ + public function processStack () { + $this->partialStub('Please implement this method.'); + } } // [EOF] diff --git a/application/hub/main/source/urls/class_CrawlerUrlSourceUploadedList.php b/application/hub/main/source/urls/class_CrawlerUrlSourceUploadedList.php index ecbbe5938..5ef26cdaf 100644 --- a/application/hub/main/source/urls/class_CrawlerUrlSourceUploadedList.php +++ b/application/hub/main/source/urls/class_CrawlerUrlSourceUploadedList.php @@ -44,6 +44,16 @@ class CrawlerUrlSourceUploadedList extends BaseSource implements UrlSource, Regi // Return the prepared instance return $sourceInstance; } + + /** + * Processes entries in the stack. + * + * @return void + * @todo 0% done + */ + public function processStack () { + $this->partialStub('Please implement this method.'); + } } // [EOF] diff --git a/application/hub/main/tasks/crawler/scanner/.htaccess b/application/hub/main/tasks/crawler/scanner/.htaccess new file mode 100644 index 000000000..3a4288278 --- /dev/null +++ b/application/hub/main/tasks/crawler/scanner/.htaccess @@ -0,0 +1 @@ +Deny from all diff --git a/application/hub/main/tasks/crawler/scanner/class_Crawler b/application/hub/main/tasks/crawler/scanner/class_Crawler new file mode 100644 index 000000000..359c41d45 --- /dev/null +++ b/application/hub/main/tasks/crawler/scanner/class_Crawler @@ -0,0 +1,72 @@ + + * @version 0.0.0 + * @copyright Copyright (c) 2014 Crawler Developer Team + * @license GNU GPL 3.0 or any newer version + * @link http://www.ship-simu.org + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +class Crawler???ScannerTask extends BaseTask implements Taskable, Visitable { + /** + * Protected constructor + * + * @return void + */ + protected function __construct () { + // Call parent constructor + parent::__construct(__CLASS__); + } + + /** + * Creates an instance of this class + * + * @return $taskInstance An instance of a Visitable class + */ + public final static function createCrawler???ScannerTask () { + // Get new instance + $taskInstance = new Crawler???ScannerTask(); + + // Return the prepared instance + return $taskInstance; + } + + /** + * Accepts the visitor to process the visitor + * + * @param $visitorInstance An instance of a Visitor class + * @return void + * @todo Maybe visit some sub-objects + */ + public function accept (Visitor $visitorInstance) { + // Visit this task + $visitorInstance->visitTask($this); + } + + /** + * Executes the task + * + * @return void + */ + public function executeTask () { + // Call factory for a scanner instance and execute it + ScannerFactory::createScannerInstance($this)->execute(); + } +} + +// [EOF] +?> diff --git a/application/hub/main/tasks/crawler/scanner/class_CrawlerUploadedListScannerTask.php b/application/hub/main/tasks/crawler/scanner/class_CrawlerUploadedListScannerTask.php new file mode 100644 index 000000000..cdb3c0d00 --- /dev/null +++ b/application/hub/main/tasks/crawler/scanner/class_CrawlerUploadedListScannerTask.php @@ -0,0 +1,72 @@ + + * @version 0.0.0 + * @copyright Copyright (c) 2014 Crawler Developer Team + * @license GNU GPL 3.0 or any newer version + * @link http://www.ship-simu.org + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +class CrawlerUploadedListScannerTask extends BaseTask implements Taskable, Visitable { + /** + * Protected constructor + * + * @return void + */ + protected function __construct () { + // Call parent constructor + parent::__construct(__CLASS__); + } + + /** + * Creates an instance of this class + * + * @return $taskInstance An instance of a Visitable class + */ + public final static function createCrawlerUploadedListScannerTask () { + // Get new instance + $taskInstance = new CrawlerUploadedListScannerTask(); + + // Return the prepared instance + return $taskInstance; + } + + /** + * Accepts the visitor to process the visitor + * + * @param $visitorInstance An instance of a Visitor class + * @return void + * @todo Maybe visit some sub-objects + */ + public function accept (Visitor $visitorInstance) { + // Visit this task + $visitorInstance->visitTask($this); + } + + /** + * Executes the task + * + * @return void + */ + public function executeTask () { + // Call factory for a scanner instance and execute it + ScannerObjectFactory::createScannerInstance($this)->execute(); + } +} + +// [EOF] +?> diff --git a/docs/TODOs.txt b/docs/TODOs.txt index 1b260bfa2..8ef8fa236 100644 --- a/docs/TODOs.txt +++ b/docs/TODOs.txt @@ -14,7 +14,7 @@ ./application/hub/interfaces/wrapper/class_NodeDhtWrapper.php:122: * @todo Add minimum/maximum age limitations ./application/hub/interfaces/wrapper/class_NodeDhtWrapper.php:132: * @todo Add timestamp to dataset instance ./application/hub/main/chains/class_PackageFilterChain.php:54: * @todo This may be slow if a message with a lot tags arrived -./application/hub/main/class_BaseHubSystem.php:528: // @TODO On some systems it is 134, on some 107? +./application/hub/main/class_BaseHubSystem.php:576: // @TODO On some systems it is 134, on some 107? ./application/hub/main/commands/console/class_HubConsoleAptProxyCommand.php:107: * @todo Should we add some more filters? ./application/hub/main/commands/console/class_HubConsoleAptProxyCommand.php:58: * @todo Try to create a AptProxyActivationTask or so ./application/hub/main/commands/console/class_HubConsoleChatCommand.php:107: * @todo Should we add some more filters? @@ -27,16 +27,17 @@ ./application/hub/main/commands/console/class_HubConsoleMainCommand.php:58: * @todo Try to create a HubActivationTask or so ./application/hub/main/commands/console/class_HubConsoleMinerCommand.php:107: * @todo Should we add some more filters? ./application/hub/main/commands/console/class_HubConsoleMinerCommand.php:58: * @todo Try to create a MinerActivationTask or so +./application/hub/main/crawler/class_BaseNodeCrawler.php:59: * @todo 0% done ./application/hub/main/cruncher/class_BaseHubCruncher.php:200: * @todo Try to make this method more generic so we can move it in BaseFrameworkSystem ./application/hub/main/cruncher/mcrypt/class_HubMcryptCruncher.php:108: * @todo Implement this method ./application/hub/main/cruncher/mcrypt/class_HubMcryptCruncher.php:138: * @todo 0% done ./application/hub/main/cruncher/mcrypt/class_HubMcryptCruncher.php:98: // @TODO Implement this method -./application/hub/main/dht/class_BaseDht.php:129: * @todo Add minimum/maximum age limitations -./application/hub/main/dht/class_BaseDht.php:163: // @TODO Maybe add more small checks? -./application/hub/main/dht/class_BaseDht.php:205: * @todo Find out if loadDescriptorXml() can be called only once to avoid a lot methods working. -./application/hub/main/dht/class_BaseDht.php:245: * @todo 0% done -./application/hub/main/dht/class_BaseDht.php:256: * @todo Switch flag 'accept_bootstrap' -./application/hub/main/dht/class_BaseDht.php:89: * @todo Find more to do here +./application/hub/main/dht/class_BaseDht.php:126: * @todo Add minimum/maximum age limitations +./application/hub/main/dht/class_BaseDht.php:160: // @TODO Maybe add more small checks? +./application/hub/main/dht/class_BaseDht.php:202: * @todo Find out if loadDescriptorXml() can be called only once to avoid a lot methods working. +./application/hub/main/dht/class_BaseDht.php:242: * @todo 0% done +./application/hub/main/dht/class_BaseDht.php:253: * @todo Switch flag 'accept_bootstrap' +./application/hub/main/dht/class_BaseDht.php:86: * @todo Find more to do here ./application/hub/main/dht/node/class_NodeDhtFacade.php:61: * @todo Does this data need to be enriched with more meta data? ./application/hub/main/discovery/package/class_PackageRecipientDiscovery.php:86: * @todo Add some validation of recipient field, e.g. ip:port is found ./application/hub/main/discovery/package/class_PackageRecipientDiscovery.php:87: * @todo The if() does only check for TCP, not UDP, e.g. try to get a $handlerInstance here @@ -74,7 +75,7 @@ ./application/hub/main/filter/shutdown/node/class_NodeShutdownTaskHandlerFilter.php:55: * @todo 0% done ./application/hub/main/filter/task/apt-proxy/class_AptProxyTaskHandlerInitializerFilter.php:55: * @todo 5% done ./application/hub/main/filter/task/chat/class_ChatTaskHandlerInitializerFilter.php:55: * @todo 5% done -./application/hub/main/filter/task/crawler/class_CrawlerTaskHandlerInitializerFilter.php:55: * @todo 5% done +./application/hub/main/filter/task/crawler/class_CrawlerTaskHandlerInitializerFilter.php:55: * @todo 10% done ./application/hub/main/filter/task/cruncher/class_CruncherTaskHandlerInitializerFilter.php:55: * @todo 5% done ./application/hub/main/filter/task/miner/class_MinerTaskHandlerInitializerFilter.php:55: * @todo 5% done ./application/hub/main/filter/task/node/class_NodeTaskHandlerInitializerFilter.php:55: * @todo Maybe some more tasks needs to be added? @@ -124,13 +125,13 @@ ./application/hub/main/miner/class_BaseHubMiner.php:200: * @todo Try to make this method more generic so we can move it in BaseFrameworkSystem ./application/hub/main/nodes/boot/class_HubBootNode.php:110: // @TODO Add some filters here ./application/hub/main/nodes/boot/class_HubBootNode.php:58: * @todo add some more special bootstrap things for this boot node -./application/hub/main/nodes/class_BaseHubNode.php:120: * @todo Make this code more generic and move it to CryptoHelper or -./application/hub/main/nodes/class_BaseHubNode.php:417: * @todo Try to make this method more generic so we can move it in BaseFrameworkSystem -./application/hub/main/nodes/class_BaseHubNode.php:457: * @todo Change the first if() block to check for a specific state -./application/hub/main/nodes/class_BaseHubNode.php:663: * @todo Add checking if this node has been announced to the sender node -./application/hub/main/nodes/class_BaseHubNode.php:683: * @todo Add checking if this node has been announced to the sender node -./application/hub/main/nodes/class_BaseHubNode.php:767: * @todo Find more to do here -./application/hub/main/nodes/class_BaseHubNode.php:780: * @todo Handle thrown exception +./application/hub/main/nodes/class_BaseHubNode.php:117: * @todo Make this code more generic and move it to CryptoHelper or +./application/hub/main/nodes/class_BaseHubNode.php:414: * @todo Try to make this method more generic so we can move it in BaseFrameworkSystem +./application/hub/main/nodes/class_BaseHubNode.php:454: * @todo Change the first if() block to check for a specific state +./application/hub/main/nodes/class_BaseHubNode.php:660: * @todo Add checking if this node has been announced to the sender node +./application/hub/main/nodes/class_BaseHubNode.php:680: * @todo Add checking if this node has been announced to the sender node +./application/hub/main/nodes/class_BaseHubNode.php:764: * @todo Find more to do here +./application/hub/main/nodes/class_BaseHubNode.php:777: * @todo Handle thrown exception ./application/hub/main/nodes/list/class_HubListNode.php:58: * @todo Implement more bootstrap steps ./application/hub/main/nodes/list/class_HubListNode.php:79: // @TODO Add some filters here ./application/hub/main/nodes/list/class_HubListNode.php:88: * @todo 0% done @@ -157,9 +158,13 @@ ./application/hub/main/producer/cruncher/work_units/class_CruncherTestUnitProducer.php:79: * @todo ~60% done ./application/hub/main/producer/cruncher/work_units/class_CruncherTestUnitProducer.php:88: // @TODO Unfinished work here ./application/hub/main/producer/miner/blocks/class_MinerTestGenesisBlockProducer.php:53: * @todo 0% done -./application/hub/main/producer/miner/blocks/class_MinerTestGenesisBlockProducer.php:64: * @todo 0% done +./application/hub/main/producer/miner/blocks/class_MinerTestGenesisBlockProducer.php:63: * @todo 0% done +./application/hub/main/producer/miner/blocks/class_MinerTestGenesisBlockProducer.php:74: * @todo 0% done ./application/hub/main/registry/socket/class_SocketRegistry.php:71: // @TODO Tested again base class, rewrite it to a generic interface! ./application/hub/main/resolver/state/peer/class_PeerStateResolver.php:59: * @todo ~30% done +./application/hub/main/states/crawler/active/class_CrawlerActiveState.php:60: * @todo 0% done +./application/hub/main/states/crawler/booting/class_CrawlerBootingState.php:60: * @todo 0% done +./application/hub/main/states/crawler/init/class_CrawlerInitState.php:70: * @todo ~30% done ./application/hub/main/states/dht/class_BaseDhtState.php:10: * @todo Create generic DHT interface ./application/hub/main/states/node/active/class_NodeActiveState.php:75: * @todo We might want to move some calls to this method to fill it with life ./application/hub/main/states/node/init/class_NodeInitState.php:60: * @todo We might want to move some calls to this method to fill it with life @@ -168,6 +173,28 @@ ./application/hub/main/streams/raw_data/input/class_RawDataInputStream.php:58: * @todo Do we need to do something more here? ./application/hub/main/tasks/apt-proxy/class_AptProxyListenerTask.php:63: * @todo 0% ./application/hub/main/tasks/chat/class_ChatTelnetListenerTask.php:63: * @todo 0% +./application/hub/main/tasks/crawler/communicator/class_CrawlerNodeCommunicatorTask.php:53: * @todo Maybe visit some sub-objects +./application/hub/main/tasks/crawler/document_parser/class_CrawlerDocumentParserTask.php:53: * @todo Maybe visit some sub-objects +./application/hub/main/tasks/crawler/document_parser/class_CrawlerDocumentParserTask.php:64: * @todo 0% +./application/hub/main/tasks/crawler/mime_sniffer/class_CrawlerMimeSnifferTask.php:53: * @todo Maybe visit some sub-objects +./application/hub/main/tasks/crawler/mime_sniffer/class_CrawlerMimeSnifferTask.php:64: * @todo 0% +./application/hub/main/tasks/crawler/ping/class_CrawlerPingTask.php:53: * @todo Maybe visit some sub-objects +./application/hub/main/tasks/crawler/ping/class_CrawlerPingTask.php:64: * @todo 0% +./application/hub/main/tasks/crawler/publisher/class_CrawlerRemoteJobPublisherTask.php:53: * @todo Maybe visit some sub-objects +./application/hub/main/tasks/crawler/publisher/class_CrawlerRemoteJobPublisherTask.php:64: * @todo 0% +./application/hub/main/tasks/crawler/snippet_extractor/class_CrawlerSnippetExtractorTask.php:53: * @todo Maybe visit some sub-objects +./application/hub/main/tasks/crawler/snippet_extractor/class_CrawlerSnippetExtractorTask.php:64: * @todo 0% +./application/hub/main/tasks/crawler/structure_analyzer/class_CrawlerStructureAnalyzerTask.php:53: * @todo Maybe visit some sub-objects +./application/hub/main/tasks/crawler/structure_analyzer/class_CrawlerStructureAnalyzerTask.php:64: * @todo 0% +./application/hub/main/tasks/crawler/url_getter/local/class_CrawlerLocalUrlGetterTask.php:53: * @todo Maybe visit some sub-objects +./application/hub/main/tasks/crawler/url_getter/local/class_CrawlerLocalUrlGetterTask.php:64: * @todo 0% +./application/hub/main/tasks/crawler/url_getter/remote/class_CrawlerRemoteUrlGetterTask.php:53: * @todo Maybe visit some sub-objects +./application/hub/main/tasks/crawler/url_getter/remote/class_CrawlerRemoteUrlGetterTask.php:64: * @todo 0% +./application/hub/main/tasks/crawler/url_source/class_CrawlerUrlSourceFoundRssTask.php:53: * @todo Maybe visit some sub-objects +./application/hub/main/tasks/crawler/url_source/class_CrawlerUrlSourceLocalStartTask.php:53: * @todo Maybe visit some sub-objects +./application/hub/main/tasks/crawler/url_source/class_CrawlerUrlSourceRssStartTask.php:53: * @todo Maybe visit some sub-objects +./application/hub/main/tasks/crawler/url_source/class_CrawlerUrlSourceUploadedListTask.php:53: * @todo Maybe visit some sub-objects +./application/hub/main/tasks/miner/communicator/class_MinerNodeCommunicatorTask.php:53: * @todo Maybe visit some sub-objects ./application/hub/main/tasks/network/class_NetworkPackageReaderTask.php:63: * @todo Also visit some sub-objects? ./application/hub/main/tasks/network/class_NetworkPackageWriterTask.php:59: * @todo Also visit some sub-objects? ./application/hub/main/tasks/node/chunks/class_NodeChunkAssemblerTask.php:59: * @todo Also visit some sub-objects? -- 2.39.5