From 5ef98382f47aa1b63151acebb4c2788888a9cb81 Mon Sep 17 00:00:00 2001
From: Roland Haeder <roland@mxchange.org>
Date: Sun, 7 Dec 2014 21:30:48 +0100
Subject: [PATCH] Continued: - added initial stuff to import CSV files
 (unfinished) - udpated to latest core

Signed-off-by: Roland Haeder <roland@mxchange.org>
---
 application/hub/config.php                    |  3 +
 .../hub/main/nodes/class_BaseHubNode.php      |  5 ++
 .../class_CrawlerUploadedListUrlSource.php    | 63 ++++++++++++++++++-
 ...odeDistributedHashTableDatabaseWrapper.php |  3 +
 core                                          |  2 +-
 5 files changed, 74 insertions(+), 2 deletions(-)

diff --git a/application/hub/config.php b/application/hub/config.php
index a1ec1a2ec..217135e03 100644
--- a/application/hub/config.php
+++ b/application/hub/config.php
@@ -1463,6 +1463,9 @@ $cfg->setConfigEntry('task_crawler_uploaded_list_scanner_interval_delay', 1000);
 // CFG: TASK-CRAWLER-URL-SOURCE-FOUND-RSS-MAX-RUNS
 $cfg->setConfigEntry('task_crawler_uploaded_list_scanner_max_runs', 0);
 
+// CFG: CRAWLER-CSV-FILE-PATH
+$cfg->setConfigEntry('crawler_csv_file_path', 'data/url_lists');
+
 ///////////////////////////////////////////////////////////////////////////////
 //                            HTTP Configuration
 ///////////////////////////////////////////////////////////////////////////////
diff --git a/application/hub/main/nodes/class_BaseHubNode.php b/application/hub/main/nodes/class_BaseHubNode.php
index e71a63026..ce3637cc1 100644
--- a/application/hub/main/nodes/class_BaseHubNode.php
+++ b/application/hub/main/nodes/class_BaseHubNode.php
@@ -689,6 +689,7 @@ class BaseHubNode extends BaseHubSystem implements Updateable, AddableCriteria {
 	 * @return	$unlInstance	An instance of a LocateableNode class for this node
 	 */
 	public function determineUniversalNodeLocator () {
+		/* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput('NODE[' . __METHOD__ . ':' . __LINE__ . ']: CALLED!');
 		// Determine UNL based on this node:
 		// 1) Get discovery class
 		$discoveryInstance = ObjectFactory::createObjectByConfiguredName('unl_discovery_class');
@@ -697,6 +698,7 @@ class BaseHubNode extends BaseHubSystem implements Updateable, AddableCriteria {
 		$unlInstance = $discoveryInstance->discoverUniversalNodeLocatorByNode($this);
 
 		// 3) Return it
+		/* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput('NODE[' . __METHOD__ . ':' . __LINE__ . ']: unlInstance= ' . $unlInstance->__toString() . ' - EXIT!');
 		return $unlInstance;
 	}
 
@@ -706,6 +708,8 @@ class BaseHubNode extends BaseHubSystem implements Updateable, AddableCriteria {
 	 * @return	$unlArray	An array from an instance of a LocateableNode class for this node
 	 */
 	public final function getUniversalNodeLocatorArray () {
+		/* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput('NODE[' . __METHOD__ . ':' . __LINE__ . ']: CALLED!');
+
 		// Get the Universal Node Locator (UNL) instance
 		$unlInstance = $this->determineUniversalNodeLocator();
 
@@ -713,6 +717,7 @@ class BaseHubNode extends BaseHubSystem implements Updateable, AddableCriteria {
 		die(__METHOD__ . ':unlInstance[' . gettype($unlInstance) . ']=' . print_r($unlInstance, TRUE));
 
 		// Return it
+		/* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput('NODE[' . __METHOD__ . ':' . __LINE__ . ']: EXIT!');
 		return $unlArray;
 	}
 
diff --git a/application/hub/main/source/urls/class_CrawlerUploadedListUrlSource.php b/application/hub/main/source/urls/class_CrawlerUploadedListUrlSource.php
index 928a9e284..c98aa6ff0 100644
--- a/application/hub/main/source/urls/class_CrawlerUploadedListUrlSource.php
+++ b/application/hub/main/source/urls/class_CrawlerUploadedListUrlSource.php
@@ -22,6 +22,16 @@
  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 class CrawlerUploadedListUrlSource extends BaseUrlSource implements UrlSource, Registerable {
+	/**
+	 * Cached path of CSV files
+	 */
+	private $csvFilesPath = '';
+
+	/**
+	 * Last found CSV file
+	 */
+	private $lastCsvFile = '';
+
 	/**
 	 * Protected constructor
 	 *
@@ -30,6 +40,50 @@ class CrawlerUploadedListUrlSource extends BaseUrlSource implements UrlSource, R
 	protected function __construct () {
 		// Call parent constructor
 		parent::__construct(__CLASS__);
+
+		// Set CSV files path
+		$this->csvFilesPath = $this->getConfigInstance()->getConfigEntry('base_path') . '/' . $this->getConfigInstance()->getConfigEntry('crawler_csv_file_path');
+
+		// Get directory instance
+		$directoryInstance = ObjectFactory::createObjectByConfiguredName('directory_class', array($this->csvFilesPath));
+
+		// Set it here
+		$this->setDirectoryInstance($directoryInstance);
+	}
+
+	/**
+	 * Checks whether a CSV file is found in configured path
+	 *
+	 * @return	$isFound	Whether a CSV file is found
+	 */
+	private function isCsvFileFound () {
+		// Is it valid?
+		if (!$this->getDirectoryInstance()->getDirectoryIteratorInstance()->valid()) {
+			// Rewind to start
+			$this->getDirectoryInstance()->getDirectoryIteratorInstance()->rewind();
+		} // END - if
+
+		// Read next entry
+		$directoryEntry = $this->getDirectoryInstance()->readDirectoryExcept(array('.htaccess', '.', '..'));
+
+		// Debug message
+		/* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput('SOURCE[' . __METHOD__ . ':' . __LINE__ . '] directoryEntry(' . strlen($directoryEntry) . ')=' . $directoryEntry);
+
+		// Is it empty or wrong file extension?
+		if ((empty($directoryEntry)) || (substr($directoryEntry, -4, 4) != '.csv')) {
+			// Skip further processing
+			/* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput('SOURCE[' . __METHOD__ . ':' . __LINE__ . '] directoryEntry(' . strlen($directoryEntry) . ')=' . $directoryEntry . ' - SKIPPED!');
+			return FALSE;
+		} // END - if
+
+		// Initialize CSV instance
+		$csvFileInstance = ObjectFactory::createObjectByConfiguredName('csv_file_class', array($this->csvFilesPath . '/' . $directoryEntry));
+
+		// Set it here
+		$this->setCsvFileInstance($csvFileInstance);
+
+		// Found a file
+		return TRUE;
 	}
 
 	/**
@@ -64,7 +118,14 @@ class CrawlerUploadedListUrlSource extends BaseUrlSource implements UrlSource, R
 			 * to process more than one entry at a time.
 			 */
 			$this->processNextEntry();
-		} // @TODO elseif ($this->
+		} elseif ($this->isCsvFileFound()) {
+			/*
+			 * A file containing an URL list is found. Please note the format is
+			 * CSV-like as you may wish to provide meta data such as crawl
+			 * depth, handling of 3rd-party URLs and such.
+			 */
+			$this->importCsvFile();
+		}
 
 		$this->partialStub('Please implement this method.');
 	}
diff --git a/application/hub/main/wrapper/node/class_NodeDistributedHashTableDatabaseWrapper.php b/application/hub/main/wrapper/node/class_NodeDistributedHashTableDatabaseWrapper.php
index 9dafd9f35..928eb3310 100644
--- a/application/hub/main/wrapper/node/class_NodeDistributedHashTableDatabaseWrapper.php
+++ b/application/hub/main/wrapper/node/class_NodeDistributedHashTableDatabaseWrapper.php
@@ -136,6 +136,8 @@ class NodeDistributedHashTableDatabaseWrapper extends BaseDatabaseWrapper implem
 	 * @return	$dataSetInstance	An instance of a StoreableCriteria class
 	 */
 	private function prepareLocalDataSetInstance () {
+		/* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput('DHT-WRAPPER[' . __METHOD__ . ':' . __LINE__ . ']: CALLED!');
+
 		// Get node/request instances
 		$nodeInstance = NodeObjectFactory::createNodeInstance();
 		$requestInstance = ApplicationHelper::getSelfInstance()->getRequestInstance();
@@ -169,6 +171,7 @@ class NodeDistributedHashTableDatabaseWrapper extends BaseDatabaseWrapper implem
 		$dataSetInstance->addCriteria(self::DB_COLUMN_ACCEPT_BOOTSTRAP, $this->translateBooleanToYesNo($nodeInstance->isAcceptingDhtBootstrap()));
 
 		// Return it
+		/* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput('DHT-WRAPPER[' . __METHOD__ . ':' . __LINE__ . ']: EXIT!');
 		return $dataSetInstance;
 	}
 
diff --git a/core b/core
index 6339d66e4..c8ea0af3f 160000
--- a/core
+++ b/core
@@ -1 +1 @@
-Subproject commit 6339d66e421f4514ec9de8f61d96e38cb34005e6
+Subproject commit c8ea0af3f3bfe092c38f0864d689a82172af19c0
-- 
2.39.5