]> git.mxchange.org Git - hub.git/commitdiff
Added more tasks (see wiki: https://wiki.shipsimu.org/doku.php/de/projects/hub/applic...
authorRoland Haeder <roland@mxchange.org>
Tue, 6 May 2014 19:26:17 +0000 (21:26 +0200)
committerRoland Haeder <roland@mxchange.org>
Tue, 6 May 2014 19:26:17 +0000 (21:26 +0200)
Signed-off-by: Roland Haeder <roland@mxchange.org>
12 files changed:
application/hub/config.php
application/hub/main/filter/task/crawler/class_CrawlerTaskHandlerInitializerFilter.php
application/hub/main/tasks/crawler/document_parser/.htaccess [new file with mode: 0644]
application/hub/main/tasks/crawler/document_parser/class_CrawlerDocumentParserTask.php [new file with mode: 0644]
application/hub/main/tasks/crawler/mime_sniffer/.htaccess [new file with mode: 0644]
application/hub/main/tasks/crawler/mime_sniffer/class_CrawlerMimeSnifferTask.php [new file with mode: 0644]
application/hub/main/tasks/crawler/ping/.htaccess [new file with mode: 0644]
application/hub/main/tasks/crawler/ping/class_CrawlerPingTask.php [new file with mode: 0644]
application/hub/main/tasks/crawler/snippet_extractor/.htaccess [new file with mode: 0644]
application/hub/main/tasks/crawler/snippet_extractor/class_CrawlerSnippetExtractorTask.php [new file with mode: 0644]
application/hub/main/tasks/crawler/structure_analyzer/.htaccess [new file with mode: 0644]
application/hub/main/tasks/crawler/structure_analyzer/class_CrawlerStructureAnalyzerTask.php [new file with mode: 0644]

index da3c260537c4f6bdba4f6a2b1fb6fa8b2dc0cd7f..2aa1dba3aee08ef96f032e1cd6034aa05e4d9b63 100644 (file)
@@ -1265,6 +1265,66 @@ $cfg->setConfigEntry('task_crawler_remote_job_publisher_interval_delay', 50);
 // CFG: TASK-CRAWLER-REMOTE-JOB-PUBLISHER-MAX-RUNS
 $cfg->setConfigEntry('task_crawler_remote_job_publisher_max_runs', 0);
 
+// CFG: CRAWLER-MIME-SNIFFER-TASK-CLASS
+$cfg->setConfigEntry('crawler_mime_sniffer_task_class', 'CrawlerMimeSnifferTask');
+
+// CFG: TASK-CRAWLER-MIME-SNIFFER-STARTUP-DELAY
+$cfg->setConfigEntry('task_crawler_mime_sniffer_startup_delay', 1500);
+
+// CFG: TASK-CRAWLER-MIME-SNIFFER-INTERVAL-DELAY
+$cfg->setConfigEntry('task_crawler_mime_sniffer_interval_delay', 100);
+
+// CFG: TASK-CRAWLER-MIME-SNIFFER-MAX-RUNS
+$cfg->setConfigEntry('task_crawler_mime_sniffer_max_runs', 0);
+
+// CFG: CRAWLER-DOCUMENT-PARSER-TASK-CLASS
+$cfg->setConfigEntry('crawler_document_parser_task_class', 'CrawlerDocumentParserTask');
+
+// CFG: TASK-CRAWLER-DOCUMENT-PARSER-STARTUP-DELAY
+$cfg->setConfigEntry('task_crawler_document_parser_startup_delay', 1500);
+
+// CFG: TASK-CRAWLER-DOCUMENT-PARSER-INTERVAL-DELAY
+$cfg->setConfigEntry('task_crawler_document_parser_interval_delay', 100);
+
+// CFG: TASK-CRAWLER-DOCUMENT-PARSER-MAX-RUNS
+$cfg->setConfigEntry('task_crawler_document_parser_max_runs', 0);
+
+// CFG: CRAWLER-STRUCTURE-ANALYZER-TASK-CLASS
+$cfg->setConfigEntry('crawler_structure_analyzer_task_class', 'CrawlerStructureAnalyzerTask');
+
+// CFG: TASK-CRAWLER-STRUCTURE-ANALYZER-STARTUP-DELAY
+$cfg->setConfigEntry('task_crawler_structure_analyzer_startup_delay', 1500);
+
+// CFG: TASK-CRAWLER-STRUCTURE-ANALYZER-INTERVAL-DELAY
+$cfg->setConfigEntry('task_crawler_structure_analyzer_interval_delay', 100);
+
+// CFG: TASK-CRAWLER-STRUCTURE-ANALYZER-MAX-RUNS
+$cfg->setConfigEntry('task_crawler_structure_analyzer_max_runs', 0);
+
+// CFG: CRAWLER-SNIPPET-EXTRACTOR-TASK-CLASS
+$cfg->setConfigEntry('crawler_snippet_extractor_task_class', 'CrawlerSnippetExtractorTask');
+
+// CFG: TASK-CRAWLER-SNIPPET-EXTRACTOR-STARTUP-DELAY
+$cfg->setConfigEntry('task_crawler_snippet_extractor_startup_delay', 1500);
+
+// CFG: TASK-CRAWLER-SNIPPET-EXTRACTOR-INTERVAL-DELAY
+$cfg->setConfigEntry('task_crawler_snippet_extractor_interval_delay', 100);
+
+// CFG: TASK-CRAWLER-SNIPPET-EXTRACTOR-MAX-RUNS
+$cfg->setConfigEntry('task_crawler_snippet_extractor_max_runs', 0);
+
+// CFG: CRAWLER-PING-TASK-CLASS
+$cfg->setConfigEntry('crawler_ping_task_class', 'CrawlerPingTask');
+
+// CFG: TASK-CRAWLER-PING-STARTUP-DELAY
+$cfg->setConfigEntry('task_crawler_ping_startup_delay', 1500);
+
+// CFG: TASK-CRAWLER-PING-INTERVAL-DELAY
+$cfg->setConfigEntry('task_crawler_ping_interval_delay', 100);
+
+// CFG: TASK-CRAWLER-PING-MAX-RUNS
+$cfg->setConfigEntry('task_crawler_ping_max_runs', 0);
+
 ///////////////////////////////////////////////////////////////////////////////
 //                            HTTP Configuration
 ///////////////////////////////////////////////////////////////////////////////
index 43a7be491f7b6c994a411a0216c65e3f296d5cfd..cb2a1383c6e7b86cfea2e35ddf8d132a571fe1a7 100644 (file)
@@ -81,6 +81,26 @@ class CrawlerTaskHandlerInitializerFilter extends BaseCrawlerFilter implements F
                $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_remote_job_publisher_task_class');
                $handlerInstance->registerTask('crawler_remote_job_publisher', $taskInstance);
 
+               // 5) MIME sniffer
+               $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_mime_sniffer_task_class');
+               $handlerInstance->registerTask('crawler_mime_sniffer', $taskInstance);
+
+               // 6) Document parser (converts document to meta format)
+               $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_document_parser_task_class');
+               $handlerInstance->registerTask('crawler_document_parser', $taskInstance);
+
+               // 7) Document structure analyzer
+               $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_structure_analyzer_task_class');
+               $handlerInstance->registerTask('crawler_structure_analyzer', $taskInstance);
+
+               // 8) Snippet extractor
+               $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_snippet_extractor_task_class');
+               $handlerInstance->registerTask('crawler_snippet_extractor', $taskInstance);
+
+               // 9) Node ping
+               $taskInstance = ObjectFactory::createObjectByConfiguredName('crawler_ping_task_class');
+               $handlerInstance->registerTask('crawler_ping', $taskInstance);
+
                // Put the task handler in registry
                Registry::getRegistry()->addInstance('task_handler', $handlerInstance);
        }
diff --git a/application/hub/main/tasks/crawler/document_parser/.htaccess b/application/hub/main/tasks/crawler/document_parser/.htaccess
new file mode 100644 (file)
index 0000000..3a42882
--- /dev/null
@@ -0,0 +1 @@
+Deny from all
diff --git a/application/hub/main/tasks/crawler/document_parser/class_CrawlerDocumentParserTask.php b/application/hub/main/tasks/crawler/document_parser/class_CrawlerDocumentParserTask.php
new file mode 100644 (file)
index 0000000..32b245d
--- /dev/null
@@ -0,0 +1,72 @@
+<?php
+/**
+ * A DocumentParser task for crawlers
+ *
+ * @author             Roland Haeder <webmaster@ship-simu.org>
+ * @version            0.0.0
+ * @copyright  Copyright (c) 2014 Crawler Developer Team
+ * @license            GNU GPL 3.0 or any newer version
+ * @link               http://www.ship-simu.org
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+class CrawlerDocumentParserTask extends BaseTask implements Taskable, Visitable {
+       /**
+        * Protected constructor
+        *
+        * @return      void
+        */
+       protected function __construct () {
+               // Call parent constructor
+               parent::__construct(__CLASS__);
+       }
+
+       /**
+        * Creates an instance of this class
+        *
+        * @return      $taskInstance   An instance of a Visitable class
+        */
+       public final static function createCrawlerDocumentParserTask () {
+               // Get new instance
+               $taskInstance = new CrawlerDocumentParserTask();
+
+               // Return the prepared instance
+               return $taskInstance;
+       }
+
+       /**
+        * Accepts the visitor to process the visitor
+        *
+        * @param       $visitorInstance        An instance of a Visitor class
+        * @return      void
+        * @todo        Maybe visit some sub-objects
+        */
+       public function accept (Visitor $visitorInstance) {
+               // Visit this task
+               $visitorInstance->visitTask($this);
+       }
+
+       /**
+        * Executes the task
+        *
+        * @return      void
+        * @todo        0%
+        */
+       public function executeTask () {
+               $this->partialStub('Unimplemented task.');
+       }
+}
+
+// [EOF]
+?>
diff --git a/application/hub/main/tasks/crawler/mime_sniffer/.htaccess b/application/hub/main/tasks/crawler/mime_sniffer/.htaccess
new file mode 100644 (file)
index 0000000..3a42882
--- /dev/null
@@ -0,0 +1 @@
+Deny from all
diff --git a/application/hub/main/tasks/crawler/mime_sniffer/class_CrawlerMimeSnifferTask.php b/application/hub/main/tasks/crawler/mime_sniffer/class_CrawlerMimeSnifferTask.php
new file mode 100644 (file)
index 0000000..6cd3001
--- /dev/null
@@ -0,0 +1,72 @@
+<?php
+/**
+ * A MimeSniffer task for crawlers
+ *
+ * @author             Roland Haeder <webmaster@ship-simu.org>
+ * @version            0.0.0
+ * @copyright  Copyright (c) 2014 Crawler Developer Team
+ * @license            GNU GPL 3.0 or any newer version
+ * @link               http://www.ship-simu.org
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+class CrawlerMimeSnifferTask extends BaseTask implements Taskable, Visitable {
+       /**
+        * Protected constructor
+        *
+        * @return      void
+        */
+       protected function __construct () {
+               // Call parent constructor
+               parent::__construct(__CLASS__);
+       }
+
+       /**
+        * Creates an instance of this class
+        *
+        * @return      $taskInstance   An instance of a Visitable class
+        */
+       public final static function createCrawlerMimeSnifferTask () {
+               // Get new instance
+               $taskInstance = new CrawlerMimeSnifferTask();
+
+               // Return the prepared instance
+               return $taskInstance;
+       }
+
+       /**
+        * Accepts the visitor to process the visitor
+        *
+        * @param       $visitorInstance        An instance of a Visitor class
+        * @return      void
+        * @todo        Maybe visit some sub-objects
+        */
+       public function accept (Visitor $visitorInstance) {
+               // Visit this task
+               $visitorInstance->visitTask($this);
+       }
+
+       /**
+        * Executes the task
+        *
+        * @return      void
+        * @todo        0%
+        */
+       public function executeTask () {
+               $this->partialStub('Unimplemented task.');
+       }
+}
+
+// [EOF]
+?>
diff --git a/application/hub/main/tasks/crawler/ping/.htaccess b/application/hub/main/tasks/crawler/ping/.htaccess
new file mode 100644 (file)
index 0000000..3a42882
--- /dev/null
@@ -0,0 +1 @@
+Deny from all
diff --git a/application/hub/main/tasks/crawler/ping/class_CrawlerPingTask.php b/application/hub/main/tasks/crawler/ping/class_CrawlerPingTask.php
new file mode 100644 (file)
index 0000000..ee2154c
--- /dev/null
@@ -0,0 +1,72 @@
+<?php
+/**
+ * A Ping task for crawlers
+ *
+ * @author             Roland Haeder <webmaster@ship-simu.org>
+ * @version            0.0.0
+ * @copyright  Copyright (c) 2014 Crawler Developer Team
+ * @license            GNU GPL 3.0 or any newer version
+ * @link               http://www.ship-simu.org
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+class CrawlerPingTask extends BaseTask implements Taskable, Visitable {
+       /**
+        * Protected constructor
+        *
+        * @return      void
+        */
+       protected function __construct () {
+               // Call parent constructor
+               parent::__construct(__CLASS__);
+       }
+
+       /**
+        * Creates an instance of this class
+        *
+        * @return      $taskInstance   An instance of a Visitable class
+        */
+       public final static function createCrawlerPingTask () {
+               // Get new instance
+               $taskInstance = new CrawlerPingTask();
+
+               // Return the prepared instance
+               return $taskInstance;
+       }
+
+       /**
+        * Accepts the visitor to process the visitor
+        *
+        * @param       $visitorInstance        An instance of a Visitor class
+        * @return      void
+        * @todo        Maybe visit some sub-objects
+        */
+       public function accept (Visitor $visitorInstance) {
+               // Visit this task
+               $visitorInstance->visitTask($this);
+       }
+
+       /**
+        * Executes the task
+        *
+        * @return      void
+        * @todo        0%
+        */
+       public function executeTask () {
+               $this->partialStub('Unimplemented task.');
+       }
+}
+
+// [EOF]
+?>
diff --git a/application/hub/main/tasks/crawler/snippet_extractor/.htaccess b/application/hub/main/tasks/crawler/snippet_extractor/.htaccess
new file mode 100644 (file)
index 0000000..3a42882
--- /dev/null
@@ -0,0 +1 @@
+Deny from all
diff --git a/application/hub/main/tasks/crawler/snippet_extractor/class_CrawlerSnippetExtractorTask.php b/application/hub/main/tasks/crawler/snippet_extractor/class_CrawlerSnippetExtractorTask.php
new file mode 100644 (file)
index 0000000..8b2f98a
--- /dev/null
@@ -0,0 +1,72 @@
+<?php
+/**
+ * A SnippetExtractor task for crawlers
+ *
+ * @author             Roland Haeder <webmaster@ship-simu.org>
+ * @version            0.0.0
+ * @copyright  Copyright (c) 2014 Crawler Developer Team
+ * @license            GNU GPL 3.0 or any newer version
+ * @link               http://www.ship-simu.org
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+class CrawlerSnippetExtractorTask extends BaseTask implements Taskable, Visitable {
+       /**
+        * Protected constructor
+        *
+        * @return      void
+        */
+       protected function __construct () {
+               // Call parent constructor
+               parent::__construct(__CLASS__);
+       }
+
+       /**
+        * Creates an instance of this class
+        *
+        * @return      $taskInstance   An instance of a Visitable class
+        */
+       public final static function createCrawlerSnippetExtractorTask () {
+               // Get new instance
+               $taskInstance = new CrawlerSnippetExtractorTask();
+
+               // Return the prepared instance
+               return $taskInstance;
+       }
+
+       /**
+        * Accepts the visitor to process the visitor
+        *
+        * @param       $visitorInstance        An instance of a Visitor class
+        * @return      void
+        * @todo        Maybe visit some sub-objects
+        */
+       public function accept (Visitor $visitorInstance) {
+               // Visit this task
+               $visitorInstance->visitTask($this);
+       }
+
+       /**
+        * Executes the task
+        *
+        * @return      void
+        * @todo        0%
+        */
+       public function executeTask () {
+               $this->partialStub('Unimplemented task.');
+       }
+}
+
+// [EOF]
+?>
diff --git a/application/hub/main/tasks/crawler/structure_analyzer/.htaccess b/application/hub/main/tasks/crawler/structure_analyzer/.htaccess
new file mode 100644 (file)
index 0000000..3a42882
--- /dev/null
@@ -0,0 +1 @@
+Deny from all
diff --git a/application/hub/main/tasks/crawler/structure_analyzer/class_CrawlerStructureAnalyzerTask.php b/application/hub/main/tasks/crawler/structure_analyzer/class_CrawlerStructureAnalyzerTask.php
new file mode 100644 (file)
index 0000000..20e153c
--- /dev/null
@@ -0,0 +1,72 @@
+<?php
+/**
+ * A StructureAnalyzer task for crawlers
+ *
+ * @author             Roland Haeder <webmaster@ship-simu.org>
+ * @version            0.0.0
+ * @copyright  Copyright (c) 2014 Crawler Developer Team
+ * @license            GNU GPL 3.0 or any newer version
+ * @link               http://www.ship-simu.org
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+class CrawlerStructureAnalyzerTask extends BaseTask implements Taskable, Visitable {
+       /**
+        * Protected constructor
+        *
+        * @return      void
+        */
+       protected function __construct () {
+               // Call parent constructor
+               parent::__construct(__CLASS__);
+       }
+
+       /**
+        * Creates an instance of this class
+        *
+        * @return      $taskInstance   An instance of a Visitable class
+        */
+       public final static function createCrawlerStructureAnalyzerTask () {
+               // Get new instance
+               $taskInstance = new CrawlerStructureAnalyzerTask();
+
+               // Return the prepared instance
+               return $taskInstance;
+       }
+
+       /**
+        * Accepts the visitor to process the visitor
+        *
+        * @param       $visitorInstance        An instance of a Visitor class
+        * @return      void
+        * @todo        Maybe visit some sub-objects
+        */
+       public function accept (Visitor $visitorInstance) {
+               // Visit this task
+               $visitorInstance->visitTask($this);
+       }
+
+       /**
+        * Executes the task
+        *
+        * @return      void
+        * @todo        0%
+        */
+       public function executeTask () {
+               $this->partialStub('Unimplemented task.');
+       }
+}
+
+// [EOF]
+?>