*/
class CrawlerUploadedListUrlSource extends BaseUrlSource implements UrlSource, Registerable {
/**
- * Cached path of CSV files
+ * "Cached" CSV path
*/
- private $csvFilesPath = '';
+ private $csvFilePath = '';
/**
- * Last found CSV file
+ * Last CSV file instance
*/
- private $lastCsvFile = '';
+ private $lastCsvFileInstance = NULL;
+
+ /**
+ * Stack for pushing data from this clas to another
+ */
+ private $stackSourceInstance = NULL;
+
+ /**
+ * Stack name for a CSV file
+ */
+ const STACK_NAME_CSV_FILE = 'csv_file';
+
+ /**
+ * "Imported" CSV files
+ */
+ private $csvFileImported = array();
/**
* Protected constructor
// Call parent constructor
parent::__construct(__CLASS__);
- // Set CSV files path
- $this->csvFilesPath = $this->getConfigInstance()->getConfigEntry('base_path') . '/' . $this->getConfigInstance()->getConfigEntry('crawler_csv_file_path');
+ // "Cache" CSV path for faster usage
+ $this->csvFilePath = $this->getConfigInstance()->getConfigEntry('base_path') . '/' . $this->getConfigInstance()->getConfigEntry('crawler_csv_file_path');
- // Get directory instance
- $directoryInstance = ObjectFactory::createObjectByConfiguredName('directory_class', array($this->csvFilesPath));
+ // Initialize directory instance
+ $directoryInstance = ObjectFactory::createObjectByConfiguredName('directory_class', array($this->csvFilePath));
// Set it here
$this->setDirectoryInstance($directoryInstance);
+
+ // Init stack instance
+ $this->stackSourceInstance = ObjectFactory::createObjectByConfiguredName('crawler_uploaded_list_url_source_stack_class');
+
+ // Init stack
+ $this->getStackSourceInstance()->initStack(self::STACK_NAME_CSV_FILE);
}
/**
* @return $isFound Whether a CSV file is found
*/
private function isCsvFileFound () {
+ //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput('CRAWLER-SOURCE [' . __METHOD__ . ':' . __LINE__ . ']: CALLED!');
+
// Is it valid?
if (!$this->getDirectoryInstance()->getDirectoryIteratorInstance()->valid()) {
// Rewind to start
} // END - if
// Read next entry
- $directoryEntry = $this->getDirectoryInstance()->readDirectoryExcept(array('.htaccess', '.', '..'));
+ /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput('CRAWLER-SOURCE [' . __METHOD__ . ':' . __LINE__ . ']: this->csvFileImported=' . print_r($this->csvFileImported, TRUE));
+ $directoryEntry = $this->getDirectoryInstance()->readDirectoryExcept(array_merge(array('.htaccess', '.', '..'), $this->csvFileImported));
// Debug message
- /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput('SOURCE[' . __METHOD__ . ':' . __LINE__ . '] directoryEntry(' . strlen($directoryEntry) . ')=' . $directoryEntry);
+ /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput('CRAWLER-SOURCE[' . __METHOD__ . ':' . __LINE__ . '] directoryEntry(' . strlen($directoryEntry) . ')=' . $directoryEntry);
// Is it empty or wrong file extension?
if ((empty($directoryEntry)) || (substr($directoryEntry, -4, 4) != '.csv')) {
// Skip further processing
- /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput('SOURCE[' . __METHOD__ . ':' . __LINE__ . '] directoryEntry(' . strlen($directoryEntry) . ')=' . $directoryEntry . ' - SKIPPED!');
+ /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput('CRAWLER-SOURCE[' . __METHOD__ . ':' . __LINE__ . '] directoryEntry(' . strlen($directoryEntry) . ')=' . $directoryEntry . ' - SKIPPED!');
return FALSE;
} // END - if
- // Initialize CSV instance
- $csvFileInstance = ObjectFactory::createObjectByConfiguredName('csv_file_class', array($this->csvFilesPath . '/' . $directoryEntry));
+ // Initialize CSV file instance
+ $this->lastCsvFileInstance = ObjectFactory::createObjectByConfiguredName('csv_file_class', array($this->csvFilePath . '/' . $directoryEntry));
- // Set it here
- $this->setCsvFileInstance($csvFileInstance);
-
- // Found a file
+ // Found an entry
+ //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput('CRAWLER-SOURCE [' . __METHOD__ . ':' . __LINE__ . ']: EXIT!');
return TRUE;
}
// Init source
$sourceInstance->initSource('crawler', 'uploaded_list');
- // Get a ??? @TODO
-
// Return the prepared instance
return $sourceInstance;
}
+ /**
+ * Initializes the import of the CSV file which is being processed by other task
+ *
+ * @return void
+ * @throws NullPointerException If lastCsvFileInstance is not set
+ */
+ private function importCsvFile () {
+ //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput('CRAWLER-SOURCE [' . __METHOD__ . ':' . __LINE__ . ']: CALLED!');
+
+ // Is the instance set?
+ if (is_null($this->lastCsvFileInstance)) {
+ // This should not happen
+ throw new NullPointerException($this, self::EXCEPTION_IS_NULL_POINTER);
+ } // END - if
+
+ // Stack this file
+ $this->getStackSourceInstance()->pushNamed(self::STACK_NAME_CSV_FILE, $this->lastCsvFileInstance);
+
+ // ... and mark it as "imported"
+ array_push($this->csvFileImported, basename($this->lastCsvFileInstance->getFileName()));
+
+ // ... and finally NULL it (to save some RAM)
+ $this->lastCsvFileInstance = NULL;
+
+ //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput('CRAWLER-SOURCE [' . __METHOD__ . ':' . __LINE__ . ']: EXIT!');
+ }
+
+ /**
+ * Getter for stackSourceInstance variable
+ *
+ * @return $stackSourceInstance An instance of an additional stack
+ */
+ public final function getStackSourceInstance () {
+ return $this->stackSourceInstance;
+ }
+
/**
* Processes entries in the stack.
*