// CFG: TASK-CRAWLER-REMOTE-JOB-PUBLISHER-MAX-RUNS
$cfg->setConfigEntry('task_crawler_remote_job_publisher_max_runs', 0);
+// CFG: CRAWLER-MIME-SNIFFER-TASK-CLASS
+$cfg->setConfigEntry('crawler_mime_sniffer_task_class', 'CrawlerMimeSnifferTask');
+
+// CFG: TASK-CRAWLER-MIME-SNIFFER-STARTUP-DELAY
+$cfg->setConfigEntry('task_crawler_mime_sniffer_startup_delay', 1500);
+
+// CFG: TASK-CRAWLER-MIME-SNIFFER-INTERVAL-DELAY
+$cfg->setConfigEntry('task_crawler_mime_sniffer_interval_delay', 100);
+
+// CFG: TASK-CRAWLER-MIME-SNIFFER-MAX-RUNS
+$cfg->setConfigEntry('task_crawler_mime_sniffer_max_runs', 0);
+
+// CFG: CRAWLER-DOCUMENT-PARSER-TASK-CLASS
+$cfg->setConfigEntry('crawler_document_parser_task_class', 'CrawlerDocumentParserTask');
+
+// CFG: TASK-CRAWLER-DOCUMENT-PARSER-STARTUP-DELAY
+$cfg->setConfigEntry('task_crawler_document_parser_startup_delay', 1500);
+
+// CFG: TASK-CRAWLER-DOCUMENT-PARSER-INTERVAL-DELAY
+$cfg->setConfigEntry('task_crawler_document_parser_interval_delay', 100);
+
+// CFG: TASK-CRAWLER-DOCUMENT-PARSER-MAX-RUNS
+$cfg->setConfigEntry('task_crawler_document_parser_max_runs', 0);
+
+// CFG: CRAWLER-STRUCTURE-ANALYZER-TASK-CLASS
+$cfg->setConfigEntry('crawler_structure_analyzer_task_class', 'CrawlerStructureAnalyzerTask');
+
+// CFG: TASK-CRAWLER-STRUCTURE-ANALYZER-STARTUP-DELAY
+$cfg->setConfigEntry('task_crawler_structure_analyzer_startup_delay', 1500);
+
+// CFG: TASK-CRAWLER-STRUCTURE-ANALYZER-INTERVAL-DELAY
+$cfg->setConfigEntry('task_crawler_structure_analyzer_interval_delay', 100);
+
+// CFG: TASK-CRAWLER-STRUCTURE-ANALYZER-MAX-RUNS
+$cfg->setConfigEntry('task_crawler_structure_analyzer_max_runs', 0);
+
+// CFG: CRAWLER-SNIPPET-EXTRACTOR-TASK-CLASS
+$cfg->setConfigEntry('crawler_snippet_extractor_task_class', 'CrawlerSnippetExtractorTask');
+
+// CFG: TASK-CRAWLER-SNIPPET-EXTRACTOR-STARTUP-DELAY
+$cfg->setConfigEntry('task_crawler_snippet_extractor_startup_delay', 1500);
+
+// CFG: TASK-CRAWLER-SNIPPET-EXTRACTOR-INTERVAL-DELAY
+$cfg->setConfigEntry('task_crawler_snippet_extractor_interval_delay', 100);
+
+// CFG: TASK-CRAWLER-SNIPPET-EXTRACTOR-MAX-RUNS
+$cfg->setConfigEntry('task_crawler_snippet_extractor_max_runs', 0);
+
+// CFG: CRAWLER-PING-TASK-CLASS
+$cfg->setConfigEntry('crawler_ping_task_class', 'CrawlerPingTask');
+
+// CFG: TASK-CRAWLER-PING-STARTUP-DELAY
+$cfg->setConfigEntry('task_crawler_ping_startup_delay', 1500);
+
+// CFG: TASK-CRAWLER-PING-INTERVAL-DELAY
+$cfg->setConfigEntry('task_crawler_ping_interval_delay', 100);
+
+// CFG: TASK-CRAWLER-PING-MAX-RUNS
+$cfg->setConfigEntry('task_crawler_ping_max_runs', 0);
+
///////////////////////////////////////////////////////////////////////////////
// HTTP Configuration
///////////////////////////////////////////////////////////////////////////////
$taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_remote_job_publisher_task_class');
$handlerInstance->registerTask('crawler_remote_job_publisher', $taskInstance);
+ // 5) MIME sniffer
+ $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_mime_sniffer_task_class');
+ $handlerInstance->registerTask('crawler_mime_sniffer', $taskInstance);
+
+ // 6) Document parser (converts document to meta format)
+ $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_document_parser_task_class');
+ $handlerInstance->registerTask('crawler_document_parser', $taskInstance);
+
+ // 7) Document structure analyzer
+ $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_structure_analyzer_task_class');
+ $handlerInstance->registerTask('crawler_structure_analyzer', $taskInstance);
+
+ // 8) Snippet extractor
+ $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_snippet_extractor_task_class');
+ $handlerInstance->registerTask('crawler_snippet_extractor', $taskInstance);
+
+ // 9) Node ping
+ $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_ping_task_class');
+ $handlerInstance->registerTask('crawler_ping', $taskInstance);
+
// Put the task handler in registry
Registry::getRegistry()->addInstance('task_handler', $handlerInstance);
}
--- /dev/null
+Deny from all
--- /dev/null
+<?php
+/**
+ * A DocumentParser task for crawlers
+ *
+ * @author Roland Haeder <webmaster@ship-simu.org>
+ * @version 0.0.0
+ * @copyright Copyright (c) 2014 Crawler Developer Team
+ * @license GNU GPL 3.0 or any newer version
+ * @link http://www.ship-simu.org
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+class CrawlerDocumentParserTask extends BaseTask implements Taskable, Visitable {
+ /**
+ * Protected constructor
+ *
+ * @return void
+ */
+ protected function __construct () {
+ // Call parent constructor
+ parent::__construct(__CLASS__);
+ }
+
+ /**
+ * Creates an instance of this class
+ *
+ * @return $taskInstance An instance of a Visitable class
+ */
+ public final static function createCrawlerDocumentParserTask () {
+ // Get new instance
+ $taskInstance = new CrawlerDocumentParserTask();
+
+ // Return the prepared instance
+ return $taskInstance;
+ }
+
+ /**
+ * Accepts the visitor to process the visitor
+ *
+ * @param $visitorInstance An instance of a Visitor class
+ * @return void
+ * @todo Maybe visit some sub-objects
+ */
+ public function accept (Visitor $visitorInstance) {
+ // Visit this task
+ $visitorInstance->visitTask($this);
+ }
+
+ /**
+ * Executes the task
+ *
+ * @return void
+ * @todo 0%
+ */
+ public function executeTask () {
+ $this->partialStub('Unimplemented task.');
+ }
+}
+
+// [EOF]
+?>
--- /dev/null
+Deny from all
--- /dev/null
+<?php
+/**
+ * A MimeSniffer task for crawlers
+ *
+ * @author Roland Haeder <webmaster@ship-simu.org>
+ * @version 0.0.0
+ * @copyright Copyright (c) 2014 Crawler Developer Team
+ * @license GNU GPL 3.0 or any newer version
+ * @link http://www.ship-simu.org
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+class CrawlerMimeSnifferTask extends BaseTask implements Taskable, Visitable {
+ /**
+ * Protected constructor
+ *
+ * @return void
+ */
+ protected function __construct () {
+ // Call parent constructor
+ parent::__construct(__CLASS__);
+ }
+
+ /**
+ * Creates an instance of this class
+ *
+ * @return $taskInstance An instance of a Visitable class
+ */
+ public final static function createCrawlerMimeSnifferTask () {
+ // Get new instance
+ $taskInstance = new CrawlerMimeSnifferTask();
+
+ // Return the prepared instance
+ return $taskInstance;
+ }
+
+ /**
+ * Accepts the visitor to process the visitor
+ *
+ * @param $visitorInstance An instance of a Visitor class
+ * @return void
+ * @todo Maybe visit some sub-objects
+ */
+ public function accept (Visitor $visitorInstance) {
+ // Visit this task
+ $visitorInstance->visitTask($this);
+ }
+
+ /**
+ * Executes the task
+ *
+ * @return void
+ * @todo 0%
+ */
+ public function executeTask () {
+ $this->partialStub('Unimplemented task.');
+ }
+}
+
+// [EOF]
+?>
--- /dev/null
+Deny from all
--- /dev/null
+<?php
+/**
+ * A Ping task for crawlers
+ *
+ * @author Roland Haeder <webmaster@ship-simu.org>
+ * @version 0.0.0
+ * @copyright Copyright (c) 2014 Crawler Developer Team
+ * @license GNU GPL 3.0 or any newer version
+ * @link http://www.ship-simu.org
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+class CrawlerPingTask extends BaseTask implements Taskable, Visitable {
+ /**
+ * Protected constructor
+ *
+ * @return void
+ */
+ protected function __construct () {
+ // Call parent constructor
+ parent::__construct(__CLASS__);
+ }
+
+ /**
+ * Creates an instance of this class
+ *
+ * @return $taskInstance An instance of a Visitable class
+ */
+ public final static function createCrawlerPingTask () {
+ // Get new instance
+ $taskInstance = new CrawlerPingTask();
+
+ // Return the prepared instance
+ return $taskInstance;
+ }
+
+ /**
+ * Accepts the visitor to process the visitor
+ *
+ * @param $visitorInstance An instance of a Visitor class
+ * @return void
+ * @todo Maybe visit some sub-objects
+ */
+ public function accept (Visitor $visitorInstance) {
+ // Visit this task
+ $visitorInstance->visitTask($this);
+ }
+
+ /**
+ * Executes the task
+ *
+ * @return void
+ * @todo 0%
+ */
+ public function executeTask () {
+ $this->partialStub('Unimplemented task.');
+ }
+}
+
+// [EOF]
+?>
--- /dev/null
+Deny from all
--- /dev/null
+<?php
+/**
+ * A SnippetExtractor task for crawlers
+ *
+ * @author Roland Haeder <webmaster@ship-simu.org>
+ * @version 0.0.0
+ * @copyright Copyright (c) 2014 Crawler Developer Team
+ * @license GNU GPL 3.0 or any newer version
+ * @link http://www.ship-simu.org
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+class CrawlerSnippetExtractorTask extends BaseTask implements Taskable, Visitable {
+ /**
+ * Protected constructor
+ *
+ * @return void
+ */
+ protected function __construct () {
+ // Call parent constructor
+ parent::__construct(__CLASS__);
+ }
+
+ /**
+ * Creates an instance of this class
+ *
+ * @return $taskInstance An instance of a Visitable class
+ */
+ public final static function createCrawlerSnippetExtractorTask () {
+ // Get new instance
+ $taskInstance = new CrawlerSnippetExtractorTask();
+
+ // Return the prepared instance
+ return $taskInstance;
+ }
+
+ /**
+ * Accepts the visitor to process the visitor
+ *
+ * @param $visitorInstance An instance of a Visitor class
+ * @return void
+ * @todo Maybe visit some sub-objects
+ */
+ public function accept (Visitor $visitorInstance) {
+ // Visit this task
+ $visitorInstance->visitTask($this);
+ }
+
+ /**
+ * Executes the task
+ *
+ * @return void
+ * @todo 0%
+ */
+ public function executeTask () {
+ $this->partialStub('Unimplemented task.');
+ }
+}
+
+// [EOF]
+?>
--- /dev/null
+Deny from all
--- /dev/null
+<?php
+/**
+ * A StructureAnalyzer task for crawlers
+ *
+ * @author Roland Haeder <webmaster@ship-simu.org>
+ * @version 0.0.0
+ * @copyright Copyright (c) 2014 Crawler Developer Team
+ * @license GNU GPL 3.0 or any newer version
+ * @link http://www.ship-simu.org
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+class CrawlerStructureAnalyzerTask extends BaseTask implements Taskable, Visitable {
+ /**
+ * Protected constructor
+ *
+ * @return void
+ */
+ protected function __construct () {
+ // Call parent constructor
+ parent::__construct(__CLASS__);
+ }
+
+ /**
+ * Creates an instance of this class
+ *
+ * @return $taskInstance An instance of a Visitable class
+ */
+ public final static function createCrawlerStructureAnalyzerTask () {
+ // Get new instance
+ $taskInstance = new CrawlerStructureAnalyzerTask();
+
+ // Return the prepared instance
+ return $taskInstance;
+ }
+
+ /**
+ * Accepts the visitor to process the visitor
+ *
+ * @param $visitorInstance An instance of a Visitor class
+ * @return void
+ * @todo Maybe visit some sub-objects
+ */
+ public function accept (Visitor $visitorInstance) {
+ // Visit this task
+ $visitorInstance->visitTask($this);
+ }
+
+ /**
+ * Executes the task
+ *
+ * @return void
+ * @todo 0%
+ */
+ public function executeTask () {
+ $this->partialStub('Unimplemented task.');
+ }
+}
+
+// [EOF]
+?>