]> git.mxchange.org Git - hub.git/commitdiff
Continued:
authorRoland Haeder <roland@mxchange.org>
Sun, 7 Dec 2014 20:30:48 +0000 (21:30 +0100)
committerRoland Haeder <roland@mxchange.org>
Sun, 7 Dec 2014 20:30:48 +0000 (21:30 +0100)
- added initial stuff to import CSV files (unfinished)
- udpated to latest core

Signed-off-by: Roland Haeder <roland@mxchange.org>
application/hub/config.php
application/hub/main/nodes/class_BaseHubNode.php
application/hub/main/source/urls/class_CrawlerUploadedListUrlSource.php
application/hub/main/wrapper/node/class_NodeDistributedHashTableDatabaseWrapper.php
core

index a1ec1a2ecdb592eaa63d37579cde67ad29393516..217135e032a9565784a9bbf68573fd575c11ef78 100644 (file)
@@ -1463,6 +1463,9 @@ $cfg->setConfigEntry('task_crawler_uploaded_list_scanner_interval_delay', 1000);
 // CFG: TASK-CRAWLER-URL-SOURCE-FOUND-RSS-MAX-RUNS
 $cfg->setConfigEntry('task_crawler_uploaded_list_scanner_max_runs', 0);
 
+// CFG: CRAWLER-CSV-FILE-PATH
+$cfg->setConfigEntry('crawler_csv_file_path', 'data/url_lists');
+
 ///////////////////////////////////////////////////////////////////////////////
 //                            HTTP Configuration
 ///////////////////////////////////////////////////////////////////////////////
index e71a63026f12c43db3fd7cd585e1643f65fe1648..ce3637cc12f46ad561125fff901ae54f72ea71b3 100644 (file)
@@ -689,6 +689,7 @@ class BaseHubNode extends BaseHubSystem implements Updateable, AddableCriteria {
         * @return      $unlInstance    An instance of a LocateableNode class for this node
         */
        public function determineUniversalNodeLocator () {
+               /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput('NODE[' . __METHOD__ . ':' . __LINE__ . ']: CALLED!');
                // Determine UNL based on this node:
                // 1) Get discovery class
                $discoveryInstance = ObjectFactory::createObjectByConfiguredName('unl_discovery_class');
@@ -697,6 +698,7 @@ class BaseHubNode extends BaseHubSystem implements Updateable, AddableCriteria {
                $unlInstance = $discoveryInstance->discoverUniversalNodeLocatorByNode($this);
 
                // 3) Return it
+               /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput('NODE[' . __METHOD__ . ':' . __LINE__ . ']: unlInstance= ' . $unlInstance->__toString() . ' - EXIT!');
                return $unlInstance;
        }
 
@@ -706,6 +708,8 @@ class BaseHubNode extends BaseHubSystem implements Updateable, AddableCriteria {
         * @return      $unlArray       An array from an instance of a LocateableNode class for this node
         */
        public final function getUniversalNodeLocatorArray () {
+               /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput('NODE[' . __METHOD__ . ':' . __LINE__ . ']: CALLED!');
+
                // Get the Universal Node Locator (UNL) instance
                $unlInstance = $this->determineUniversalNodeLocator();
 
@@ -713,6 +717,7 @@ class BaseHubNode extends BaseHubSystem implements Updateable, AddableCriteria {
                die(__METHOD__ . ':unlInstance[' . gettype($unlInstance) . ']=' . print_r($unlInstance, TRUE));
 
                // Return it
+               /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput('NODE[' . __METHOD__ . ':' . __LINE__ . ']: EXIT!');
                return $unlArray;
        }
 
index 928a9e28449bbe9ac58ffe539d187aface6340e2..c98aa6ff039e347b3a808259f5b93a6ff702f77f 100644 (file)
  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 class CrawlerUploadedListUrlSource extends BaseUrlSource implements UrlSource, Registerable {
+       /**
+        * Cached path of CSV files
+        */
+       private $csvFilesPath = '';
+
+       /**
+        * Last found CSV file
+        */
+       private $lastCsvFile = '';
+
        /**
         * Protected constructor
         *
@@ -30,6 +40,50 @@ class CrawlerUploadedListUrlSource extends BaseUrlSource implements UrlSource, R
        protected function __construct () {
                // Call parent constructor
                parent::__construct(__CLASS__);
+
+               // Set CSV files path
+               $this->csvFilesPath = $this->getConfigInstance()->getConfigEntry('base_path') . '/' . $this->getConfigInstance()->getConfigEntry('crawler_csv_file_path');
+
+               // Get directory instance
+               $directoryInstance = ObjectFactory::createObjectByConfiguredName('directory_class', array($this->csvFilesPath));
+
+               // Set it here
+               $this->setDirectoryInstance($directoryInstance);
+       }
+
+       /**
+        * Checks whether a CSV file is found in configured path
+        *
+        * @return      $isFound        Whether a CSV file is found
+        */
+       private function isCsvFileFound () {
+               // Is it valid?
+               if (!$this->getDirectoryInstance()->getDirectoryIteratorInstance()->valid()) {
+                       // Rewind to start
+                       $this->getDirectoryInstance()->getDirectoryIteratorInstance()->rewind();
+               } // END - if
+
+               // Read next entry
+               $directoryEntry = $this->getDirectoryInstance()->readDirectoryExcept(array('.htaccess', '.', '..'));
+
+               // Debug message
+               /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput('SOURCE[' . __METHOD__ . ':' . __LINE__ . '] directoryEntry(' . strlen($directoryEntry) . ')=' . $directoryEntry);
+
+               // Is it empty or wrong file extension?
+               if ((empty($directoryEntry)) || (substr($directoryEntry, -4, 4) != '.csv')) {
+                       // Skip further processing
+                       /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput('SOURCE[' . __METHOD__ . ':' . __LINE__ . '] directoryEntry(' . strlen($directoryEntry) . ')=' . $directoryEntry . ' - SKIPPED!');
+                       return FALSE;
+               } // END - if
+
+               // Initialize CSV instance
+               $csvFileInstance = ObjectFactory::createObjectByConfiguredName('csv_file_class', array($this->csvFilesPath . '/' . $directoryEntry));
+
+               // Set it here
+               $this->setCsvFileInstance($csvFileInstance);
+
+               // Found a file
+               return TRUE;
        }
 
        /**
@@ -64,7 +118,14 @@ class CrawlerUploadedListUrlSource extends BaseUrlSource implements UrlSource, R
                         * to process more than one entry at a time.
                         */
                        $this->processNextEntry();
-               } // @TODO elseif ($this->
+               } elseif ($this->isCsvFileFound()) {
+                       /*
+                        * A file containing an URL list is found. Please note the format is
+                        * CSV-like as you may wish to provide meta data such as crawl
+                        * depth, handling of 3rd-party URLs and such.
+                        */
+                       $this->importCsvFile();
+               }
 
                $this->partialStub('Please implement this method.');
        }
index 9dafd9f35b80dfbf5b4851c5cdf2a93cd1b863cb..928eb3310072f37ab4ca31be2ed3871cd3c77b31 100644 (file)
@@ -136,6 +136,8 @@ class NodeDistributedHashTableDatabaseWrapper extends BaseDatabaseWrapper implem
         * @return      $dataSetInstance        An instance of a StoreableCriteria class
         */
        private function prepareLocalDataSetInstance () {
+               /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput('DHT-WRAPPER[' . __METHOD__ . ':' . __LINE__ . ']: CALLED!');
+
                // Get node/request instances
                $nodeInstance = NodeObjectFactory::createNodeInstance();
                $requestInstance = ApplicationHelper::getSelfInstance()->getRequestInstance();
@@ -169,6 +171,7 @@ class NodeDistributedHashTableDatabaseWrapper extends BaseDatabaseWrapper implem
                $dataSetInstance->addCriteria(self::DB_COLUMN_ACCEPT_BOOTSTRAP, $this->translateBooleanToYesNo($nodeInstance->isAcceptingDhtBootstrap()));
 
                // Return it
+               /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput('DHT-WRAPPER[' . __METHOD__ . ':' . __LINE__ . ']: EXIT!');
                return $dataSetInstance;
        }
 
diff --git a/core b/core
index 6339d66e421f4514ec9de8f61d96e38cb34005e6..c8ea0af3f3bfe092c38f0864d689a82172af19c0 160000 (submodule)
--- a/core
+++ b/core
@@ -1 +1 @@
-Subproject commit 6339d66e421f4514ec9de8f61d96e38cb34005e6
+Subproject commit c8ea0af3f3bfe092c38f0864d689a82172af19c0