]> git.mxchange.org Git - hub.git/blobdiff - application/hub/main/source/urls/class_CrawlerUploadedListUrlSource.php
Updated 'core'.
[hub.git] / application / hub / main / source / urls / class_CrawlerUploadedListUrlSource.php
index c98aa6ff039e347b3a808259f5b93a6ff702f77f..7063492b0bd2c96f037c6cbaa84c3e00fa1d23fa 100644 (file)
  */
 class CrawlerUploadedListUrlSource extends BaseUrlSource implements UrlSource, Registerable {
        /**
-        * Cached path of CSV files
+        * "Cached" CSV path
         */
-       private $csvFilesPath = '';
+       private $csvFilePath = '';
 
        /**
-        * Last found CSV file
+        * Last CSV file instance
         */
-       private $lastCsvFile = '';
+       private $lastCsvFileInstance = NULL;
+
+       /**
+        * Stack for pushing data from this clas to another
+        */
+       private $stackSourceInstance = NULL;
+
+       /**
+        * Stack name for a CSV file
+        */
+       const STACK_NAME_CSV_FILE = 'csv_file';
+
+       /**
+        * "Imported" CSV files
+        */
+       private $csvFileImported = array();
 
        /**
         * Protected constructor
@@ -41,14 +56,20 @@ class CrawlerUploadedListUrlSource extends BaseUrlSource implements UrlSource, R
                // Call parent constructor
                parent::__construct(__CLASS__);
 
-               // Set CSV files path
-               $this->csvFilesPath = $this->getConfigInstance()->getConfigEntry('base_path') . '/' . $this->getConfigInstance()->getConfigEntry('crawler_csv_file_path');
+               // "Cache" CSV path for faster usage
+               $this->csvFilePath = $this->getConfigInstance()->getConfigEntry('base_path') . '/' . $this->getConfigInstance()->getConfigEntry('crawler_csv_file_path');
 
-               // Get directory instance
-               $directoryInstance = ObjectFactory::createObjectByConfiguredName('directory_class', array($this->csvFilesPath));
+               // Initialize directory instance
+               $directoryInstance = ObjectFactory::createObjectByConfiguredName('directory_class', array($this->csvFilePath));
 
                // Set it here
                $this->setDirectoryInstance($directoryInstance);
+
+               // Init stack instance
+               $this->stackSourceInstance = ObjectFactory::createObjectByConfiguredName('crawler_uploaded_list_url_source_stack_class');
+
+               // Init stack
+               $this->getStackSourceInstance()->initStack(self::STACK_NAME_CSV_FILE);
        }
 
        /**
@@ -57,6 +78,8 @@ class CrawlerUploadedListUrlSource extends BaseUrlSource implements UrlSource, R
         * @return      $isFound        Whether a CSV file is found
         */
        private function isCsvFileFound () {
+               //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput('CRAWLER-SOURCE [' . __METHOD__ . ':' . __LINE__ . ']: CALLED!');
+
                // Is it valid?
                if (!$this->getDirectoryInstance()->getDirectoryIteratorInstance()->valid()) {
                        // Rewind to start
@@ -64,25 +87,24 @@ class CrawlerUploadedListUrlSource extends BaseUrlSource implements UrlSource, R
                } // END - if
 
                // Read next entry
-               $directoryEntry = $this->getDirectoryInstance()->readDirectoryExcept(array('.htaccess', '.', '..'));
+               /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput('CRAWLER-SOURCE [' . __METHOD__ . ':' . __LINE__ . ']: this->csvFileImported=' . print_r($this->csvFileImported, TRUE));
+               $directoryEntry = $this->getDirectoryInstance()->readDirectoryExcept(array_merge(array('.htaccess', '.', '..'), $this->csvFileImported));
 
                // Debug message
-               /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput('SOURCE[' . __METHOD__ . ':' . __LINE__ . '] directoryEntry(' . strlen($directoryEntry) . ')=' . $directoryEntry);
+               /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput('CRAWLER-SOURCE[' . __METHOD__ . ':' . __LINE__ . '] directoryEntry(' . strlen($directoryEntry) . ')=' . $directoryEntry);
 
                // Is it empty or wrong file extension?
                if ((empty($directoryEntry)) || (substr($directoryEntry, -4, 4) != '.csv')) {
                        // Skip further processing
-                       /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput('SOURCE[' . __METHOD__ . ':' . __LINE__ . '] directoryEntry(' . strlen($directoryEntry) . ')=' . $directoryEntry . ' - SKIPPED!');
+                       /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput('CRAWLER-SOURCE[' . __METHOD__ . ':' . __LINE__ . '] directoryEntry(' . strlen($directoryEntry) . ')=' . $directoryEntry . ' - SKIPPED!');
                        return FALSE;
                } // END - if
 
-               // Initialize CSV instance
-               $csvFileInstance = ObjectFactory::createObjectByConfiguredName('csv_file_class', array($this->csvFilesPath . '/' . $directoryEntry));
+               // Initialize CSV file instance
+               $this->lastCsvFileInstance = ObjectFactory::createObjectByConfiguredName('csv_file_class', array($this->csvFilePath . '/' . $directoryEntry));
 
-               // Set it here
-               $this->setCsvFileInstance($csvFileInstance);
-
-               // Found a file
+               // Found an entry
+               //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput('CRAWLER-SOURCE [' . __METHOD__ . ':' . __LINE__ . ']: EXIT!');
                return TRUE;
        }
 
@@ -98,12 +120,46 @@ class CrawlerUploadedListUrlSource extends BaseUrlSource implements UrlSource, R
                // Init source
                $sourceInstance->initSource('crawler', 'uploaded_list');
 
-               // Get a ??? @TODO
-
                // Return the prepared instance
                return $sourceInstance;
        }
 
+       /**
+        * Initializes the import of the CSV file which is being processed by other task
+        *
+        * @return      void
+        * @throws      NullPointerException    If lastCsvFileInstance is not set
+        */
+       private function importCsvFile () {
+               //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput('CRAWLER-SOURCE [' . __METHOD__ . ':' . __LINE__ . ']: CALLED!');
+
+               // Is the instance set?
+               if (is_null($this->lastCsvFileInstance)) {
+                       // This should not happen
+                       throw new NullPointerException($this, self::EXCEPTION_IS_NULL_POINTER);
+               } // END - if
+
+               // Stack this file
+               $this->getStackSourceInstance()->pushNamed(self::STACK_NAME_CSV_FILE, $this->lastCsvFileInstance);
+
+               // ... and mark it as "imported"
+               array_push($this->csvFileImported, basename($this->lastCsvFileInstance->getFileName()));
+
+               // ... and finally NULL it (to save some RAM)
+               $this->lastCsvFileInstance = NULL;
+
+               //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput('CRAWLER-SOURCE [' . __METHOD__ . ':' . __LINE__ . ']: EXIT!');
+       }
+
+       /**
+        * Getter for stackSourceInstance variable
+        *
+        * @return      $stackSourceInstance    An instance of an additional stack
+        */
+       public final function getStackSourceInstance () {
+               return $this->stackSourceInstance;
+       }
+
        /**
         * Processes entries in the stack.
         *