From 01bec5a55a74224e7d97bfb64a424e6b10abbec6 Mon Sep 17 00:00:00 2001 From: Roland Haeder Date: Sat, 3 May 2014 22:06:16 +0200 Subject: [PATCH] Added initial files like classes and interface for crawler application. Signed-off-by: Roland Haeder --- application/hub/config.php | 31 +++++ .../hub/interfaces/chat/class_Chatter.php | 2 +- application/hub/interfaces/crawler/.htaccess | 1 + .../hub/interfaces/crawler/class_Crawler.php | 69 +++++++++ application/hub/main/chat/class_ | 2 +- .../class_HubConsoleCrawlerCommand.php | 131 ++++++++++++++++++ .../class_HubConsoleCrawlerController.php | 131 ++++++++++++++++++ application/hub/main/crawler/.htaccess | 1 + application/hub/main/crawler/class_ | 82 +++++++++++ .../main/crawler/class_BaseNodeCrawler.php | 62 +++++++++ .../hub/main/crawler/console/.htaccess | 1 + .../console/class_NodeConsoleCrawler.php | 93 +++++++++++++ .../main/filter/bootstrap/crawler/.htaccess | 1 + .../bootstrap/crawler/class_CrawlerBootstrap | 66 +++++++++ ...awlerBootstrapExtraBootstrappingFilter.php | 66 +++++++++ ...rawlerBootstrapGenericActivationFilter.php | 69 +++++++++ .../main/filter/class_BaseCrawlerFilter.php | 38 +++++ application/hub/main/filter/crawler/.htaccess | 1 + .../class_CrawlerInitializationFilter.php | 75 ++++++++++ .../class_CrawlerPhpRequirementsFilter.php | 78 +++++++++++ .../class_CrawlerWelcomeTeaserFilter.php | 67 +++++++++ .../main/filter/shutdown/crawler/.htaccess | 1 + .../crawler/class_CrawlerShutdownFilter.php | 68 +++++++++ .../hub/main/filter/task/crawler/.htaccess | 1 + ...ss_CrawlerTaskHandlerInitializerFilter.php | 78 +++++++++++ docs/README | 13 +- 26 files changed, 1224 insertions(+), 4 deletions(-) create mode 100644 application/hub/interfaces/crawler/.htaccess create mode 100644 application/hub/interfaces/crawler/class_Crawler.php create mode 100644 application/hub/main/commands/console/class_HubConsoleCrawlerCommand.php create mode 100644 application/hub/main/controller/console/class_HubConsoleCrawlerController.php create mode 100644 application/hub/main/crawler/.htaccess create mode 100644 application/hub/main/crawler/class_ create mode 100644 application/hub/main/crawler/class_BaseNodeCrawler.php create mode 100644 application/hub/main/crawler/console/.htaccess create mode 100644 application/hub/main/crawler/console/class_NodeConsoleCrawler.php create mode 100644 application/hub/main/filter/bootstrap/crawler/.htaccess create mode 100644 application/hub/main/filter/bootstrap/crawler/class_CrawlerBootstrap create mode 100644 application/hub/main/filter/bootstrap/crawler/class_CrawlerBootstrapExtraBootstrappingFilter.php create mode 100644 application/hub/main/filter/bootstrap/crawler/class_CrawlerBootstrapGenericActivationFilter.php create mode 100644 application/hub/main/filter/class_BaseCrawlerFilter.php create mode 100644 application/hub/main/filter/crawler/.htaccess create mode 100644 application/hub/main/filter/crawler/class_CrawlerInitializationFilter.php create mode 100644 application/hub/main/filter/crawler/class_CrawlerPhpRequirementsFilter.php create mode 100644 application/hub/main/filter/crawler/class_CrawlerWelcomeTeaserFilter.php create mode 100644 application/hub/main/filter/shutdown/crawler/.htaccess create mode 100644 application/hub/main/filter/shutdown/crawler/class_CrawlerShutdownFilter.php create mode 100644 application/hub/main/filter/task/crawler/.htaccess create mode 100644 application/hub/main/filter/task/crawler/class_CrawlerTaskHandlerInitializerFilter.php diff --git a/application/hub/config.php b/application/hub/config.php index 2620a75bf..0d47627ab 100644 --- a/application/hub/config.php +++ b/application/hub/config.php @@ -1174,6 +1174,37 @@ $cfg->setConfigEntry('task_chat_telnet_listener_interval_delay', 10); // CFG: TASK-CHAT-TELNET-LISTENER-MAX-RUNS $cfg->setConfigEntry('task_chat_telnet_listener_max_runs', 0); +/////////////////////////////////////////////////////////////////////////////// +// Crawler Configuration +/////////////////////////////////////////////////////////////////////////////// + +// CFG: HUBCONSOLE-CMD-CHAT-RESOLVER-CLASS +$cfg->setConfigEntry('hubconsole_cmd_crawler_resolver_class', 'HubConsoleCommandResolver'); + +// CFG: CRAWLER-PHP-REQUIREMENTS-FILTER +$cfg->setConfigEntry('crawler_php_requirements_filter', 'CrawlerPhpRequirementsFilter'); + +// CFG: CRAWLER-INITIALIZER-FILTER +$cfg->setConfigEntry('crawler_initializer_filter', 'CrawlerInitializationFilter'); + +// CFG: NODE-CONSOLE-CRAWLER +$cfg->setConfigEntry('node_console_crawler_class', 'NodeConsoleCrawler'); + +// CFG: CRAWLER-WELCOME-TEASER-FILTER +$cfg->setConfigEntry('crawler_welcome_teaser_filter', 'CrawlerWelcomeTeaserFilter'); + +// CFG: CRAWLER-BOOTSTRAP-TASK-HANDLER-INITIALIZER-FILTER +$cfg->setConfigEntry('crawler_bootstrap_task_handler_initializer_filter', 'CrawlerTaskHandlerInitializerFilter'); + +// CFG: CRAWLER-BOOTSTRAP-EXTRA-BOOTSTRAPPING-FILTER +$cfg->setConfigEntry('crawler_bootstrap_extra_bootstrapping_filter', 'CrawlerBootstrapExtraBootstrappingFilter'); + +// CFG: CRAWLER-BOOTSTRAP-GENERIC-ACTIVATION-FILTER +$cfg->setConfigEntry('crawler_bootstrap_generic_activation_filter', 'CrawlerBootstrapGenericActivationFilter'); + +// CFG: CRAWLER-SHUTDOWN-FILTER +$cfg->setConfigEntry('crawler_shutdown_filter', 'CrawlerShutdownFilter'); + /////////////////////////////////////////////////////////////////////////////// // HTTP Configuration /////////////////////////////////////////////////////////////////////////////// diff --git a/application/hub/interfaces/chat/class_Chatter.php b/application/hub/interfaces/chat/class_Chatter.php index ae99c7af2..fe7734a39 100644 --- a/application/hub/interfaces/chat/class_Chatter.php +++ b/application/hub/interfaces/chat/class_Chatter.php @@ -4,7 +4,7 @@ * * @author Roland Haeder * @version 0.0.0 - * @copyright Copyright (c) 2007, 2008 Roland Haeder, 2009 - 2012 Hub Developer Team + * @copyright Copyright (c) 2012 Chat Developer Team * @license GNU GPL 3.0 or any newer version * @link http://www.shipsimu.org * @todo We need to find a better name for this interface diff --git a/application/hub/interfaces/crawler/.htaccess b/application/hub/interfaces/crawler/.htaccess new file mode 100644 index 000000000..3a4288278 --- /dev/null +++ b/application/hub/interfaces/crawler/.htaccess @@ -0,0 +1 @@ +Deny from all diff --git a/application/hub/interfaces/crawler/class_Crawler.php b/application/hub/interfaces/crawler/class_Crawler.php new file mode 100644 index 000000000..1c4103f46 --- /dev/null +++ b/application/hub/interfaces/crawler/class_Crawler.php @@ -0,0 +1,69 @@ + + * @version 0.0.0 + * @copyright Copyright (c) 2014 Crawler Developer Team + * @license GNU GPL 3.0 or any newer version + * @link http://www.shipsimu.org + * @todo We need to find a better name for this interface + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +interface Crawler extends FrameworkInterface { + /** + * Method to "bootstrap" the crawler. This step does also apply provided + * command-line arguments stored in the request instance. You should now + * get it from calling $this->getRequestInstance(). + * + * @return void + */ + function doBootstrapping (); + + /** + * Outputs the console teaser. This should only be executed on startup or + * full restarts. This method generates some space around the teaser. + * + * @return void + */ + function outputConsoleTeaser (); + + /** + * Add some crawler-specific filters + * + * @param $controllerInstance An object of a Controller instance + * @param $responseInstance An object of a Responseable instance + * @return void + */ + function addExtraFilters (Controller $controllerInstance, Responseable $responseInstance); + + /** + * Enables/disables the crawler (just sets a flag) + * + * @param $version Version number of this crawler + * @return void + */ + function enableIsActive ($isActive = TRUE); + + /** + * Determines whether the crawler is active + * + * @return $isActive Whether the crawler is active + */ + function isActive (); +} + +// [EOF] +?> diff --git a/application/hub/main/chat/class_ b/application/hub/main/chat/class_ index ac7a45fb4..c25b448e3 100644 --- a/application/hub/main/chat/class_ +++ b/application/hub/main/chat/class_ @@ -4,7 +4,7 @@ * * @author Roland Haeder * @version 0.0.0 - * @copyright Copyright (c) 2011 - 2012 Cruncher Developer Team + * @copyright Copyright (c) 2012 Chat Developer Team * @license GNU GPL 3.0 or any newer version * @link http://www.ship-simu.org * diff --git a/application/hub/main/commands/console/class_HubConsoleCrawlerCommand.php b/application/hub/main/commands/console/class_HubConsoleCrawlerCommand.php new file mode 100644 index 000000000..1f7d1c704 --- /dev/null +++ b/application/hub/main/commands/console/class_HubConsoleCrawlerCommand.php @@ -0,0 +1,131 @@ + + * @version 0.0.0 + * @copyright Copyright (c) 2014 Crawler Developer Team + * @license GNU GPL 3.0 or any newer version + * @link http://www.shipsimu.org + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +class HubConsoleCrawlerCommand extends BaseCommand implements Commandable { + /** + * Protected constructor + * + * @return void + */ + protected function __construct () { + // Call parent constructor + parent::__construct(__CLASS__); + } + + /** + * Creates an instance of this class + * + * @param $resolverInstance An instance of a command resolver class + * @return $commandInstance An instance a prepared command class + */ + public static final function createHubConsoleCrawlerCommand (CommandResolver $resolverInstance) { + // Get new instance + $commandInstance = new HubConsoleCrawlerCommand(); + + // Set the application instance + $commandInstance->setResolverInstance($resolverInstance); + + // Return the prepared instance + return $commandInstance; + } + + /** + * Executes the given command with given request and response objects + * + * @param $requestInstance An instance of a class with an Requestable interface + * @param $responseInstance An instance of a class with an Responseable interface + * @return void + * @todo Try to create a CrawlerActivationTask or so + */ + public function execute (Requestable $requestInstance, Responseable $responseInstance) { + // Get a registry and the application instance from it + $applicationInstance = Registry::getRegistry()->getInstance('app'); + + /* + * ----------------------- Bootstrapping phase ------------------------ + * Try to bootstrap the node and pass the request instance to it for + * extra arguments which mostly override config entries or enable special + * features within the hub (none is ready at this development stage) + */ + self::createDebugInstance(__CLASS__)->debugOutput('BOOTSTRAP: Beginning with bootstrap...'); + $applicationInstance->getControllerInstance()->executeBootstrapFilters($requestInstance, $responseInstance); + self::createDebugInstance(__CLASS__)->debugOutput('BOOTSTRAP: Bootstrap finished.'); + + // Get crawler instance + $crawlerInstance = Registry::getRegistry()->getInstance('crawler'); + + // Add some crawler-specific filters + $crawlerInstance->addExtraCrawlerFilters(); + + // Get task handler instance + $handlerInstance = Registry::getRegistry()->getInstance('task_handler'); + + // Debug message + self::createDebugInstance(__CLASS__)->debugOutput('MAIN: --- Entering main loop. ---'); + + /* + * ----------------------------- Main loop ---------------------------- + * This is the main loop. Queried calls should come back here very fast + * so the whole application runs on nice speed. This while-loop goes + * until the crawler is no longer active or all tasks are killed. + */ + while (($crawlerInstance->isActive()) && ($handlerInstance->hasTasksLeft())) { + // Handle all tasks here + $handlerInstance->handleTasks(); + } // END - while + + // Debug message + self::createDebugInstance(__CLASS__)->debugOutput('MAIN: --- Leaving main loop. ---'); + } + + /** + * Adds extra filters to the given controller instance + * + * @param $controllerInstance A controller instance + * @param $requestInstance An instance of a class with an Requestable interface + * @return void + * @todo Should we add some more filters? + */ + public function addExtraFilters (Controller $controllerInstance, Requestable $requestInstance) { + // Add pre filters + $controllerInstance->addPreFilter(ObjectFactory::createObjectByConfiguredName('crawler_php_requirements_filter')); + $controllerInstance->addPreFilter(ObjectFactory::createObjectByConfiguredName('crawler_initializer_filter')); + $controllerInstance->addPreFilter(ObjectFactory::createObjectByConfiguredName('crawler_welcome_teaser_filter')); + + // Add bootstrap filters + $controllerInstance->addBootstrapFilter(ObjectFactory::createObjectByConfiguredName('crawler_bootstrap_task_handler_initializer_filter')); + $controllerInstance->addBootstrapFilter(ObjectFactory::createObjectByConfiguredName('crawler_bootstrap_extra_bootstrapping_filter')); + + // This is the last generic boostrap filter + $controllerInstance->addBootstrapFilter(ObjectFactory::createObjectByConfiguredName('crawler_bootstrap_generic_activation_filter')); + + // Add shutdown filters + //$controllerInstance->addShutdownFilter(ObjectFactory::createObjectByConfiguredName('crawler_shutdown_task_handler_filter')); + + // This is the last generic shutdown filter + $controllerInstance->addShutdownFilter(ObjectFactory::createObjectByConfiguredName('crawler_shutdown_filter')); + } +} + +// [EOF] +?> diff --git a/application/hub/main/controller/console/class_HubConsoleCrawlerController.php b/application/hub/main/controller/console/class_HubConsoleCrawlerController.php new file mode 100644 index 000000000..2a514e7d8 --- /dev/null +++ b/application/hub/main/controller/console/class_HubConsoleCrawlerController.php @@ -0,0 +1,131 @@ + + * @version 0.0.0 + * @copyright Copyright (c) 2014 Crawler Developer Team + * @license GNU GPL 3.0 or any newer version + * @link http://www.shipsimu.org + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +class HubConsoleCrawlerController extends BaseController implements Controller { + /** + * Protected constructor + * + * @return void + */ + protected function __construct () { + // Call parent constructor + parent::__construct(__CLASS__); + + // Init additional filter chains + foreach (array('bootstrap','shutdown') as $filterChain) { + $this->initFilterChain($filterChain); + } // END - foreach + } + + /** + * Creates an instance of this class + * + * @param $resolverInstance An instance of a command resolver class + * @return $controllerInstance A prepared instance of this class + */ + public static final function createHubConsoleCrawlerController (CommandResolver $resolverInstance) { + // Create the instance + $controllerInstance = new HubConsoleCrawlerController(); + + // Set the command resolver + $controllerInstance->setResolverInstance($resolverInstance); + + // Return the prepared instance + return $controllerInstance; + } + + /** + * Handles the given request and response + * + * @param $requestInstance An instance of a request class + * @param $responseInstance An instance of a response class + * @return void + */ + public function handleRequest (Requestable $requestInstance, Responseable $responseInstance) { + // Get the command instance from the resolver by sending a request instance to the resolver + $commandInstance = $this->getResolverInstance()->resolveCommandByRequest($requestInstance); + + // Add more filters by the command + $commandInstance->addExtraFilters($this, $requestInstance); + + // Run the pre filters + $this->executePreFilters($requestInstance, $responseInstance); + + // This request was valid! :-D + $requestInstance->requestIsValid(); + + // Execute the command + $commandInstance->execute($requestInstance, $responseInstance); + + // Run the pre filters + $this->executePostFilters($requestInstance, $responseInstance); + + // Flush the response out + $responseInstance->flushBuffer(); + } + + /** + * Add a bootstrap filter + * + * @param $filterInstance A Filterable class + * @return void + */ + public function addBootstrapFilter (Filterable $filterInstance) { + $this->addFilter('bootstrap', $filterInstance); + } + + /** + * Executes all bootstrap filters + * + * @param $requestInstance A Requestable class + * @param $responseInstance A Responseable class + * @return void + */ + public function executeBootstrapFilters (Requestable $requestInstance, Responseable $responseInstance) { + $this->executeFilters('bootstrap', $requestInstance, $responseInstance); + } + + /** + * Add a shutdown filter + * + * @param $filterInstance A Filterable class + * @return void + */ + public function addShutdownFilter (Filterable $filterInstance) { + $this->addFilter('shutdown', $filterInstance); + } + + /** + * Executes all shutdown filters + * + * @param $requestInstance A Requestable class + * @param $responseInstance A Responseable class + * @return void + */ + public function executeShutdownFilters (Requestable $requestInstance, Responseable $responseInstance) { + $this->executeFilters('shutdown', $requestInstance, $responseInstance); + } +} + +// [EOF] +?> diff --git a/application/hub/main/crawler/.htaccess b/application/hub/main/crawler/.htaccess new file mode 100644 index 000000000..3a4288278 --- /dev/null +++ b/application/hub/main/crawler/.htaccess @@ -0,0 +1 @@ +Deny from all diff --git a/application/hub/main/crawler/class_ b/application/hub/main/crawler/class_ new file mode 100644 index 000000000..189379e51 --- /dev/null +++ b/application/hub/main/crawler/class_ @@ -0,0 +1,82 @@ + + * @version 0.0.0 + * @copyright Copyright (c) 2014 Crawler Developer Team + * @license GNU GPL 3.0 or any newer version + * @link http://www.ship-simu.org + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +class Node???Crawler extends BaseNodeCrawler implements Crawler { + /** + * Protected constructor + * + * @return void + */ + protected function __construct () { + // Call parent constructor + parent::__construct(__CLASS__); + } + + /** + * Creates an instance of this class + * + * @return $crawlerInstance An instance of a Crawler class + */ + public final static function createNode???Crawler () { + // Get new instance + $crawlerInstance = new Node???Crawler(); + + // Return the prepared instance + return $crawlerInstance; + } + + /** + * Method to "bootstrap" the crawler. This step does also apply provided + * command-line arguments stored in the request instance. You should now + * get it from calling $this->getRequestInstance(). + * + * @return void + */ + public function doBootstrapping () { + $this->partialStub('Please implement this method.'); + } + + /** + * Outputs the console teaser. This should only be executed on startup or + * full restarts. This method generates some space around the teaser. + * + * @return void + */ + public function outputConsoleTeaser () { + $this->partialStub('Please implement this method.'); + } + + /** + * Add some crawler-specific filters + * + * @param $controllerInstance An object of a Controller instance + * @param $responseInstance An object of a Responseable instance + * @return void + */ + public function addExtraFilters (Controller $controllerInstance, Responseable $responseInstance) { + $this->partialStub('Please implement this method.'); + } +} + +// [EOF] +?> diff --git a/application/hub/main/crawler/class_BaseNodeCrawler.php b/application/hub/main/crawler/class_BaseNodeCrawler.php new file mode 100644 index 000000000..c515456c6 --- /dev/null +++ b/application/hub/main/crawler/class_BaseNodeCrawler.php @@ -0,0 +1,62 @@ + + * @version 0.0.0 + * @copyright Copyright (c) 2014 Crawler Developer Team + * @license GNU GPL 3.0 or any newer version + * @link http://www.shipsimu.org + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +class BaseNodeCrawler extends BaseHubSystem { + /** + * Whether this Crawler is active + */ + private $isActive = FALSE; + + /** + * Protected constructor + * + * @param $className Name of the class + * @return void + */ + protected function __construct ($className) { + // Call parent constructor + parent::__construct($className); + } + + /** + * Enables/disables the crawler (just sets a flag) + * + * @param $version Version number of this crawler + * @return void + */ + public final function enableIsActive ($isActive = TRUE) { + $this->isActive = (bool) $isActive; + } + + /** + * Determines whether the crawler is active + * + * @return $isActive Whether the crawler is active + */ + public final function isActive () { + return $this->isActive; + } +} + +// [EOF] +?> diff --git a/application/hub/main/crawler/console/.htaccess b/application/hub/main/crawler/console/.htaccess new file mode 100644 index 000000000..3a4288278 --- /dev/null +++ b/application/hub/main/crawler/console/.htaccess @@ -0,0 +1 @@ +Deny from all diff --git a/application/hub/main/crawler/console/class_NodeConsoleCrawler.php b/application/hub/main/crawler/console/class_NodeConsoleCrawler.php new file mode 100644 index 000000000..d94216b08 --- /dev/null +++ b/application/hub/main/crawler/console/class_NodeConsoleCrawler.php @@ -0,0 +1,93 @@ + + * @version 0.0.0 + * @copyright Copyright (c) 2014 Crawler Developer Team + * @license GNU GPL 3.0 or any newer version + * @link http://www.shipsimu.org + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +class NodeConsoleCrawler extends BaseNodeCrawler implements Crawler, Registerable { + /** + * Protected constructor + * + * @return void + */ + protected function __construct () { + // Call parent constructor + parent::__construct(__CLASS__); + } + + /** + * Creates an instance of this class + * + * @return $crawlerInstance An instance of a Crawler class + */ + public final static function createNodeConsoleCrawler () { + // Get new instance + $crawlerInstance = new NodeConsoleCrawler(); + + // Return the prepared instance + return $crawlerInstance; + } + + /** + * Method to "bootstrap" the crawler. This step does also apply provided + * command-line arguments stored in the request instance. You should now + * get it from calling $this->getRequestInstance(). + * + * @return void + */ + public function doBootstrapping () { + $this->partialStub('Please implement this method.'); + } + + /** + * Outputs the console teaser. This should only be executed on startup or + * full restarts. This method generates some space around the teaser. + * + * @return void + */ + public function outputConsoleTeaser () { + // Get the app instance (for shortening our code) + $app = $this->getApplicationInstance(); + + // Output all lines + self::createDebugInstance(__CLASS__)->debugOutput(' '); + self::createDebugInstance(__CLASS__)->debugOutput($app->getAppName() . ' v' . $app->getAppVersion() . ' - A distributed crawler'); + self::createDebugInstance(__CLASS__)->debugOutput('Copyright (c) 2014 Crawler Developer Team'); + self::createDebugInstance(__CLASS__)->debugOutput(' '); + self::createDebugInstance(__CLASS__)->debugOutput('This program comes with ABSOLUTELY NO WARRANTY; for details see docs/COPYING.'); + self::createDebugInstance(__CLASS__)->debugOutput('This is free software, and you are welcome to redistribute it under certain'); + self::createDebugInstance(__CLASS__)->debugOutput('conditions; see docs/COPYING for details.'); + self::createDebugInstance(__CLASS__)->debugOutput(' '); + } + + /** + * Add some crawler-specific filters + * + * @param $controllerInstance An object of a Controller instance + * @param $responseInstance An object of a Responseable instance + * @return void + */ + public function addExtraFilters (Controller $controllerInstance, Responseable $responseInstance) { + $this->partialStub('Please implement this method.'); + } +} + +// [EOF] +?> diff --git a/application/hub/main/filter/bootstrap/crawler/.htaccess b/application/hub/main/filter/bootstrap/crawler/.htaccess new file mode 100644 index 000000000..3a4288278 --- /dev/null +++ b/application/hub/main/filter/bootstrap/crawler/.htaccess @@ -0,0 +1 @@ +Deny from all diff --git a/application/hub/main/filter/bootstrap/crawler/class_CrawlerBootstrap b/application/hub/main/filter/bootstrap/crawler/class_CrawlerBootstrap new file mode 100644 index 000000000..f0c3fcf4c --- /dev/null +++ b/application/hub/main/filter/bootstrap/crawler/class_CrawlerBootstrap @@ -0,0 +1,66 @@ + + * @version 0.0.0 + * @copyright Copyright (c) 2014 Crawler Developer Team + * @license GNU GPL 3.0 or any newer version + * @link http://www.ship-simu.org + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +class CrawlerBootstrap???Filter extends BaseCrawlerFilter implements Filterable { + /** + * Protected constructor + * + * @return void + */ + protected function __construct () { + // Call parent constructor + parent::__construct(__CLASS__); + } + + /** + * Creates an instance of this filter class + * + * @return $filterInstance An instance of this filter class + */ + public final static function createCrawlerBootstrap???Filter () { + // Get a new instance + $filterInstance = new CrawlerBootstrap???Filter(); + + // Return the instance + return $filterInstance; + } + + /** + * Executes the filter with given request and response objects + * + * @param $requestInstance An instance of a class with an Requestable interface + * @param $responseInstance An instance of a class with an Responseable interface + * @return void + * @todo 0% done + */ + public function execute (Requestable $requestInstance, Responseable $responseInstance) { + // Get crawler instance + $crawlerInstance = Registry::getRegistry()->getInstance('crawler'); + + // Now do something + $this->partialStub('Please implement this step.'); + } +} + +// [EOF] +?> diff --git a/application/hub/main/filter/bootstrap/crawler/class_CrawlerBootstrapExtraBootstrappingFilter.php b/application/hub/main/filter/bootstrap/crawler/class_CrawlerBootstrapExtraBootstrappingFilter.php new file mode 100644 index 000000000..b4c6d8cac --- /dev/null +++ b/application/hub/main/filter/bootstrap/crawler/class_CrawlerBootstrapExtraBootstrappingFilter.php @@ -0,0 +1,66 @@ + + * @version 0.0.0 + * @copyright Copyright (c) 2014 Crawler Developer Team + * @license GNU GPL 3.0 or any newer version + * @link http://www.shipsimu.org + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +class CrawlerBootstrapExtraBootstrappingFilter extends BaseCrawlerFilter implements Filterable { + /** + * Protected constructor + * + * @return void + */ + protected function __construct () { + // Call parent constructor + parent::__construct(__CLASS__); + } + + /** + * Creates an instance of this filter class + * + * @return $filterInstance An instance of this filter class + */ + public static final function createCrawlerBootstrapExtraBootstrappingFilter () { + // Get a new instance + $filterInstance = new CrawlerBootstrapExtraBootstrappingFilter(); + + // Return the instance + return $filterInstance; + } + + /** + * Executes the filter with given request and response objects + * + * @param $requestInstance An instance of a class with an Requestable interface + * @param $responseInstance An instance of a class with an Responseable interface + * @return void + * @throws FilterChainException If $crawlerInstance is null (no NullPointerException here) + */ + public function execute (Requestable $requestInstance, Responseable $responseInstance) { + // Get crawler instance + $crawlerInstance = Registry::getRegistry()->getInstance('crawler'); + + // Do some extra bootstrapping steps + $crawlerInstance->doBootstrapping(); + } +} + +// [EOF] +?> diff --git a/application/hub/main/filter/bootstrap/crawler/class_CrawlerBootstrapGenericActivationFilter.php b/application/hub/main/filter/bootstrap/crawler/class_CrawlerBootstrapGenericActivationFilter.php new file mode 100644 index 000000000..571ef3b9c --- /dev/null +++ b/application/hub/main/filter/bootstrap/crawler/class_CrawlerBootstrapGenericActivationFilter.php @@ -0,0 +1,69 @@ + + * @version 0.0.0 + * @copyright Copyright (c) 2014 Crawler Developer Team + * @license GNU GPL 3.0 or any newer version + * @link http://www.shipsimu.org + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +class CrawlerBootstrapGenericActivationFilter extends BaseCrawlerFilter implements Filterable { + /** + * Protected constructor + * + * @return void + */ + protected function __construct () { + // Call parent constructor + parent::__construct(__CLASS__); + } + + /** + * Creates an instance of this filter class + * + * @return $filterInstance An instance of this filter class + */ + public final static function createCrawlerBootstrapGenericActivationFilter () { + // Get a new instance + $filterInstance = new CrawlerBootstrapGenericActivationFilter(); + + // Return the instance + return $filterInstance; + } + + /** + * Executes the filter with given request and response objects + * + * @param $requestInstance An instance of a class with an Requestable interface + * @param $responseInstance An instance of a class with an Responseable interface + * @return void + * @todo Maybe we want to do somthing more here? + */ + public function execute (Requestable $requestInstance, Responseable $responseInstance) { + // Get a crawler instance + $crawlerInstance = Registry::getRegistry()->getInstance('crawler'); + + // Set the flag + $crawlerInstance->enableIsActive(); + + // Debug message + self::createDebugInstance(__CLASS__)->debugOutput('BOOTSTRAP: The crawler console has been activated.'); + } +} + +// [EOF] +?> diff --git a/application/hub/main/filter/class_BaseCrawlerFilter.php b/application/hub/main/filter/class_BaseCrawlerFilter.php new file mode 100644 index 000000000..3a964eefc --- /dev/null +++ b/application/hub/main/filter/class_BaseCrawlerFilter.php @@ -0,0 +1,38 @@ + + * @version 0.0.0 + * @copyright Copyright (c) 2014 Crawler Developer Team + * @license GNU GPL 3.0 or any newer version + * @link http://www.shipsimu.org + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +class BaseCrawlerFilter extends BaseHubFilter { + /** + * Protected constructor + * + * @param $className Real name of class + * @return void + */ + protected function __construct ($className) { + // Call parent constructor + parent::__construct($className); + } +} + +// [EOF] +?> diff --git a/application/hub/main/filter/crawler/.htaccess b/application/hub/main/filter/crawler/.htaccess new file mode 100644 index 000000000..3a4288278 --- /dev/null +++ b/application/hub/main/filter/crawler/.htaccess @@ -0,0 +1 @@ +Deny from all diff --git a/application/hub/main/filter/crawler/class_CrawlerInitializationFilter.php b/application/hub/main/filter/crawler/class_CrawlerInitializationFilter.php new file mode 100644 index 000000000..31ae09a9a --- /dev/null +++ b/application/hub/main/filter/crawler/class_CrawlerInitializationFilter.php @@ -0,0 +1,75 @@ + + * @version 0.0.0 + * @copyright Copyright (c) 2014 Crawler Developer Team + * @license GNU GPL 3.0 or any newer version + * @link http://www.shipsimu.org + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +class CrawlerInitializationFilter extends BaseCrawlerFilter implements Filterable { + /** + * Protected constructor + * + * @return void + */ + protected function __construct () { + // Call parent constructor + parent::__construct(__CLASS__); + } + + /** + * Creates an instance of this filter class + * + * @return $filterInstance An instance of this filter class + */ + public static final function createCrawlerInitializationFilter () { + // Get a new instance + $filterInstance = new CrawlerInitializationFilter(); + + // Return the instance + return $filterInstance; + } + + /** + * Executes the filter with given request and response objects + * + * @param $requestInstance An instance of a class with an Requestable interface + * @param $responseInstance An instance of a class with an Responseable interface + * @return void + * @todo 0% done + */ + public function execute (Requestable $requestInstance, Responseable $responseInstance) { + // Get an instance + $crawlerInstance = ObjectFactory::createObjectByConfiguredName('node_console_crawler_class'); + + // Get a registry + $applicationInstance = Registry::getRegistry()->getInstance('app'); + + // Set the app instance + $crawlerInstance->setApplicationInstance($applicationInstance); + + // Add crawler-specific filters + $crawlerInstance->addExtraFilters($applicationInstance->getControllerInstance(), $responseInstance); + + // Set the crawler instance in registry + Registry::getRegistry()->addInstance('crawler', $crawlerInstance); + } +} + +// [EOF] +?> diff --git a/application/hub/main/filter/crawler/class_CrawlerPhpRequirementsFilter.php b/application/hub/main/filter/crawler/class_CrawlerPhpRequirementsFilter.php new file mode 100644 index 000000000..f144b03e8 --- /dev/null +++ b/application/hub/main/filter/crawler/class_CrawlerPhpRequirementsFilter.php @@ -0,0 +1,78 @@ + + * @version 0.0.0 + * @copyright Copyright (c) 2014 Crawler Developer Teama + * @license GNU GPL 3.0 or any newer version + * @link http://www.shipsimu.org + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +class CrawlerPhpRequirementsFilter extends BaseCrawlerFilter implements Filterable { + /** + * Protected constructor + * + * @return void + */ + protected function __construct () { + // Call parent constructor + parent::__construct(__CLASS__); + } + + /** + * Creates an instance of this filter class + * + * @return $filterInstance An instance of this filter class + */ + public static final function createCrawlerPhpRequirementsFilter () { + // Get a new instance + $filterInstance = new CrawlerPhpRequirementsFilter(); + + // Return the instance + return $filterInstance; + } + + /** + * Executes the filter with given request and response objects + * + * @param $requestInstance An instance of a class with an Requestable interface + * @param $responseInstance An instance of a class with an Responseable interface + * @return void + * @throws FilterChainException If a required PHP function is not available + * @todo Add more test and try to add an extra message to the thrown exception + */ + public function execute (Requestable $requestInstance, Responseable $responseInstance) { + // By default, the requirement check is passed and zero checks are failed + $checkPassed = TRUE; + $checksFailed = 0; + + // Socket support is essential... + if (!function_exists('socket_create')) { + // Test failed + $checkPassed = FALSE; + $checksFailed++; + } // END -if + + // Are all tests passed? + if ($checkPassed === FALSE) { + // Throw an exception + throw new FilterChainException($this, self::FILTER_CHAIN_INTERCEPTED); + } // END - if + } +} + +// [EOF] +?> diff --git a/application/hub/main/filter/crawler/class_CrawlerWelcomeTeaserFilter.php b/application/hub/main/filter/crawler/class_CrawlerWelcomeTeaserFilter.php new file mode 100644 index 000000000..51579ddf9 --- /dev/null +++ b/application/hub/main/filter/crawler/class_CrawlerWelcomeTeaserFilter.php @@ -0,0 +1,67 @@ + + * @version 0.0.0 + * @copyright Copyright (c) 2014 Crawler Developer Team + * @license GNU GPL 3.0 or any newer version + * @link http://www.shipsimu.org + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +class CrawlerWelcomeTeaserFilter extends BaseCrawlerFilter implements Filterable { + /** + * Protected constructor + * + * @return void + */ + protected function __construct () { + // Call parent constructor + parent::__construct(__CLASS__); + } + + /** + * Creates an instance of this filter class + * + * @return $filterInstance An instance of this filter class + */ + public static final function createCrawlerWelcomeTeaserFilter () { + // Get a new instance + $filterInstance = new CrawlerWelcomeTeaserFilter(); + + // Return the instance + return $filterInstance; + } + + /** + * Executes the filter with given request and response objects + * + * @param $requestInstance An instance of a class with an Requestable interface + * @param $responseInstance An instance of a class with an Responseable interface + * @return void + * @throws FilterChainException If $nodeInstance is null (no NullPointerException here) + * @todo Handle over the $responseInstance to outputConsoleTeaser() + */ + public function execute (Requestable $requestInstance, Responseable $responseInstance) { + // Get node instance + $crawlerInstance = Registry::getRegistry()->getInstance('crawler'); + + // Now output the teaser + $crawlerInstance->outputConsoleTeaser(); + } +} + +// [EOF] +?> diff --git a/application/hub/main/filter/shutdown/crawler/.htaccess b/application/hub/main/filter/shutdown/crawler/.htaccess new file mode 100644 index 000000000..3a4288278 --- /dev/null +++ b/application/hub/main/filter/shutdown/crawler/.htaccess @@ -0,0 +1 @@ +Deny from all diff --git a/application/hub/main/filter/shutdown/crawler/class_CrawlerShutdownFilter.php b/application/hub/main/filter/shutdown/crawler/class_CrawlerShutdownFilter.php new file mode 100644 index 000000000..e7241bfa5 --- /dev/null +++ b/application/hub/main/filter/shutdown/crawler/class_CrawlerShutdownFilter.php @@ -0,0 +1,68 @@ + + * @version 0.0.0 + * @copyright Copyright (c) 2014 Crawler Developer Team + * @license GNU GPL 3.0 or any newer version + * @link http://www.shipsimu.org + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +class CrawlerShutdownFilter extends BaseCrawlerFilter implements Filterable { + /** + * Protected constructor + * + * @return void + */ + protected function __construct () { + // Call parent constructor + parent::__construct(__CLASS__); + } + + /** + * Creates an instance of this filter class + * + * @return $filterInstance An instance of this filter class + */ + public static final function createCrawlerShutdownFilter () { + // Get a new instance + $filterInstance = new CrawlerShutdownFilter(); + + // Return the instance + return $filterInstance; + } + + /** + * Executes the filter with given request and response objects + * + * @param $requestInstance An instance of a class with an Requestable interface + * @param $responseInstance An instance of a class with an Responseable interface + * @return void + * @throws FilterChainException If $nodeInstance is null (no NullPointerException please) + */ + public function execute (Requestable $requestInstance, Responseable $responseInstance) { + // Get node instance + $crawlerInstance = Registry::getRegistry()->getInstance('crawler'); + + // Shutdown the crawler. This should be the last line + $crawlerInstance->doShutdown(); + } +} + +// [EOF] +?> diff --git a/application/hub/main/filter/task/crawler/.htaccess b/application/hub/main/filter/task/crawler/.htaccess new file mode 100644 index 000000000..3a4288278 --- /dev/null +++ b/application/hub/main/filter/task/crawler/.htaccess @@ -0,0 +1 @@ +Deny from all diff --git a/application/hub/main/filter/task/crawler/class_CrawlerTaskHandlerInitializerFilter.php b/application/hub/main/filter/task/crawler/class_CrawlerTaskHandlerInitializerFilter.php new file mode 100644 index 000000000..15a297733 --- /dev/null +++ b/application/hub/main/filter/task/crawler/class_CrawlerTaskHandlerInitializerFilter.php @@ -0,0 +1,78 @@ + + * @version 0.0.0 + * @copyright Copyright (c) 2014 Crawler Developer Team + * @license GNU GPL 3.0 or any newer version + * @link http://www.shipsimu.org + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +class CrawlerTaskHandlerInitializerFilter extends BaseCrawlerFilter implements Filterable { + /** + * Protected constructor + * + * @return void + */ + protected function __construct () { + // Call parent constructor + parent::__construct(__CLASS__); + } + + /** + * Creates an instance of this filter class + * + * @return $filterInstance An instance of this filter class + */ + public static final function createCrawlerTaskHandlerInitializerFilter () { + // Get a new instance + $filterInstance = new CrawlerTaskHandlerInitializerFilter(); + + // Return the instance + return $filterInstance; + } + + /** + * Executes the filter with given request and response objects + * + * @param $requestInstance An instance of a class with an Requestable interface + * @param $responseInstance An instance of a class with an Responseable interface + * @return void + * @throws FilterChainException If we need to interrupt the filter chain + * @todo 5% done + */ + public function execute (Requestable $requestInstance, Responseable $responseInstance) { + // Get crawler instance + //$crawlerInstance = Registry::getRegistry()->getInstance('crawler'); + + // Get a new task handler instance + $handlerInstance = ObjectFactory::createObjectByConfiguredName('task_handler_class'); + + /* + * Register all tasks: + * + * 1) ... + */ + //$taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_???_task_class'); + //$handlerInstance->registerTask('crawler_???', $taskInstance); + + // Put the task handler in registry + Registry::getRegistry()->addInstance('task_handler', $handlerInstance); + } +} + +// [EOF] +?> diff --git a/docs/README b/docs/README index c63447087..33a874b5e 100644 --- a/docs/README +++ b/docs/README @@ -84,7 +84,16 @@ environment?) to find hash blocks: php index.php app=hub command=miner -------------------------------------------------------------------------------- -6.) How to start the chat console and use it +6.) How to start the crawler +-------------------------------------------------------------------------------- + +The distributed crawler is a test application (or maybe later used in a serious +environment?) to crawl websites and build a searchable and distributed index: + +php index.php app=hub command=crawler + +-------------------------------------------------------------------------------- +7.) How to start the chat console and use it -------------------------------------------------------------------------------- The chat console is yet another test application for the node. You can launch @@ -101,7 +110,7 @@ do not try any 'interactive console' things like cursor key or so. This is (or will be) a very simple and basic telnet console. -------------------------------------------------------------------------------- -7.) How to start the apt-proxy and use it +8.) How to start the apt-proxy and use it -------------------------------------------------------------------------------- The apt-proxy is yet another test application for the node and shall provide a -- 2.39.5