InputOutputPointer is the right interface.
[core.git] / inc / classes / main / stacker / file / class_BaseFileStack.php
index f5050899ae5bca521b466e209daf0cb99fe97366..53afecbd50b5aede20952acbc1327a242ad3f141 100644 (file)
@@ -47,46 +47,6 @@ class BaseFileStack extends BaseStacker {
         */
        const LENGTH_NAME = 10;
 
-       /**
-        * Length of count
-        */
-       const LENGTH_COUNT = 20;
-
-       /**
-        * Length of position
-        */
-       const LENGTH_POSITION = 20;
-
-       /**
-        * Counter for total entries
-        */
-       private $totalEntries = 0;
-
-       /**
-        * Current seek position
-        */
-       private $seekPosition = 0;
-
-       /**
-        * Size of header
-        */
-       private $headerSize = 0;
-
-       /**
-        * File header
-        */
-       private $header = array();
-
-       /**
-        * Seek positions for gaps ("fragmentation")
-        */
-       private $gaps = array();
-
-       /**
-        * Seek positions for damaged entries (e.g. mismatching hash sum, ...)
-        */
-       private $damagedEntries = array();
-
        /**
         * Protected constructor
         *
@@ -98,7 +58,7 @@ class BaseFileStack extends BaseStacker {
                parent::__construct($className);
 
                // Calculate header size
-               $this->headerSize = (
+               $this->setHeaderSize(
                        strlen(self::STACK_MAGIC) +
                        strlen(self::SEPARATOR_HEADER_DATA) +
                        self::LENGTH_COUNT +
@@ -111,100 +71,23 @@ class BaseFileStack extends BaseStacker {
                $this->initCountersGapsArray();
        }
 
-       /**
-        * Initializes counter for valid entries, arrays for damaged entries and
-        * an array for gap seek positions. If you call this method on your own,
-        * please re-analyze the file structure. So you are better to call
-        * analyzeStackFile() instead of this method.
-        *
-        * @return      void
-        */
-       private function initCountersGapsArray () {
-               // Init counter and seek position
-               $this->setCounter(0);
-               $this->setSeekPosition(0);
-
-               // Init arrays
-               $this->gaps = array();
-               $this->damagedEntries = array();
-       }
-
-       /**
-        * Getter for total entries
-        *
-        * @return      $totalEntries   Total entries in this stack
-        */
-       private final function getCounter () {
-               // Get it
-               return $this->totalEntries;
-       }
-
-       /**
-        * Increment counter
-        *
-        * @return      void
-        */
-       private final function incrementCounter () {
-               // Get it
-               $this->totalEntries++;
-       }
-
-       /**
-        * Getter for seek position
-        *
-        * @return      $seekPosition   Current seek position (stored here in object)
-        */
-       private final function getSeekPosition () {
-               // Get it
-               return $this->seekPosition;
-       }
-
-       /**
-        * Setter for seek position
-        *
-        * @param       $seekPosition   Current seek position (stored here in object)
-        * @return      void
-        */
-       private final function setSeekPosition ($seekPosition) {
-               // And set it
-               $this->seekPosition = $seekPosition;
-       }
-
-       /**
-        * Updates seekPosition attribute from file to avoid to much access on file.
-        *
-        * @return      void
-        */
-       private function updateSeekPosition () {
-               //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput(sprintf('[%s:%d:] CALLED!', __METHOD__, __LINE__));
-
-               // Get key (= seek position)
-               $seekPosition = $this->getIteratorInstance()->key();
-               //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput(sprintf('[%s:%d:] Setting seekPosition=%s', __METHOD__, __LINE__, $seekPosition));
-
-               // And set it here
-               $this->setSeekPosition($seekPosition);
-
-               //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput(sprintf('[%s:%d:] EXIT!', __METHOD__, __LINE__));
-       }
-
        /**
         * Reads the file header
         *
         * @return      void
         */
-       private function readFileHeader () {
+       protected function readFileHeader () {
                //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput(sprintf('[%s:%d:] CALLED!', __METHOD__, __LINE__));
 
                // First rewind to beginning as the header sits at the beginning ...
                $this->getIteratorInstance()->rewind();
 
                // Then read it (see constructor for calculation)
-               $data = $this->getIteratorInstance()->read($this->headerSize);
-               //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput(sprintf('[%s:%d:] Read %d bytes (%d wanted).', __METHOD__, __LINE__, strlen($data), $this->headerSize));
+               $data = $this->getIteratorInstance()->read($this->getHeaderSize());
+               //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput(sprintf('[%s:%d:] Read %d bytes (%d wanted).', __METHOD__, __LINE__, strlen($data), $this->getHeaderSize()));
 
                // Have all requested bytes been read?
-               assert(strlen($data) == $this->headerSize);
+               assert(strlen($data) == $this->getHeaderSize());
                //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput(sprintf('[%s:%d:] Passed assert().', __METHOD__, __LINE__));
 
                // Last character must be the separator
@@ -221,116 +104,43 @@ class BaseFileStack extends BaseStacker {
                /*
                 * Now split it:
                 *
-                * 0 => Magic
-                * 1 => Total entries
-                * 2 => Current seek position
+                * 0 => magic
+                * 1 => total entries
+                * 2 => current seek position
                 */
-               $this->header = explode(chr(self::SEPARATOR_HEADER_DATA), $data);
+               $header = explode(chr(self::SEPARATOR_HEADER_DATA), $data);
+
+               // Set header here
+               $this->setHeader($header);
 
                // Check if the array has only 3 elements
-               //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput(sprintf('[%s:%d:] header(%d)=%s', __METHOD__, __LINE__, count($this->header), print_r($this->header, TRUE)));
-               assert(count($this->header) == 3);
+               //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput(sprintf('[%s:%d:] header(%d)=%s', __METHOD__, __LINE__, count($header), print_r($header, TRUE)));
+               assert(count($header) == 3);
                //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput(sprintf('[%s:%d:] Passed assert().', __METHOD__, __LINE__));
 
                // Check magic
-               assert($this->header[0] == self::STACK_MAGIC);
+               assert($header[0] == self::STACK_MAGIC);
                //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput(sprintf('[%s:%d:] Passed assert().', __METHOD__, __LINE__));
 
                // Check length of count and seek position
-               assert(strlen($this->header[1]) == self::LENGTH_COUNT);
+               assert(strlen($header[1]) == self::LENGTH_COUNT);
                //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput(sprintf('[%s:%d:] Passed assert().', __METHOD__, __LINE__));
-               assert(strlen($this->header[2]) == self::LENGTH_POSITION);
+               assert(strlen($header[2]) == self::LENGTH_POSITION);
                //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput(sprintf('[%s:%d:] Passed assert().', __METHOD__, __LINE__));
 
                // Decode count and seek position
-               $this->header[1] = hex2bin($this->header[1]);
-               $this->header[2] = hex2bin($this->header[2]);
+               $header[1] = hex2bin($header[1]);
+               $header[2] = hex2bin($header[2]);
 
                //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput(sprintf('[%s:%d:] EXIT!', __METHOD__, __LINE__));
        }
 
-       /**
-        * Checks whether the file header is initialized
-        *
-        * @return      $isInitialized  Whether the file header is initialized
-        */
-       private function isFileHeaderInitialized () {
-               //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput(sprintf('[%s:%d:] CALLED!', __METHOD__, __LINE__));
-               // Default is not initialized
-               $isInitialized = FALSE;
-
-               // Is the file initialized?
-               if ($this->isFileInitialized()) {
-                       // Some bytes has been written, so rewind to start of it.
-                       $rewindStatus = $this->getIteratorInstance()->rewind();
-                       //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput(sprintf('[%s:%d:] rewindStatus=%s', __METHOD__, __LINE__, $rewindStatus));
-
-                       // Is the rewind() call successfull?
-                       if ($rewindStatus != 1) {
-                               // Something bad happened
-                               self::createDebugInstance(__CLASS__)->debugOutput(sprintf('[%s:%d:] Could not rewind().', __METHOD__, __LINE__));
-                       } // END - if
-
-                       // Read file header
-                       $this->readFileHeader();
-
-                       // The above method does already check the header
-                       $isInitialized = TRUE;
-               } // END - if
-
-               // Return result
-               //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput(sprintf('[%s:%d:] isInitialized=%d - EXIT!', __METHOD__, __LINE__, intval($isInitialized)));
-               return $isInitialized;
-       }
-
-       /**
-        * Checks whether the file-based stack has been initialized
-        *
-        * @return      $isInitialized          Whether the file's size is zero
-        */
-       private function isFileInitialized () {
-               //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput(sprintf('[%s:%d:] CALLED!', __METHOD__, __LINE__));
-
-               // Get it from iterator which holds the pointer instance. If FALSE is returned
-               $fileSize = $this->getIteratorInstance()->size();
-               //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput(sprintf('[%s:%d:] fileSize=%s', __METHOD__, __LINE__, $fileSize));
-
-               /*
-                * The returned file size should not be FALSE or NULL as this means
-                * that the pointer class does not work correctly.
-                */
-               assert(is_int($fileSize));
-
-               // Is more than 0 returned?
-               $isInitialized = ($fileSize > 0);
-
-               // Return result
-               //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput(sprintf('[%s:%d:] isInitialized=%d - EXIT!', __METHOD__, __LINE__, intval($isInitialized)));
-               return $isInitialized;
-       }
-
-       /**
-        * Creates the file-stack's header
-        *
-        * @return      void
-        */
-       private function createFileHeader () {
-               //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput(sprintf('[%s:%d:] CALLED!', __METHOD__, __LINE__));
-               // The file's header should not be initialized here
-               assert(!$this->isFileHeaderInitialized());
-
-               // Simple flush file header which will create it.
-               $this->flushFileHeader();
-
-               //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput(sprintf('[%s:%d:] EXIT!!', __METHOD__, __LINE__));
-       }
-
        /**
         * Flushes the file header
         *
         * @return      void
         */
-       private function flushFileHeader () {
+       protected function flushFileHeader () {
                //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput(sprintf('[%s:%d:] CALLED!', __METHOD__, __LINE__));
 
                // Put all informations together
@@ -355,63 +165,7 @@ class BaseFileStack extends BaseStacker {
                );
 
                // Write it to disk (header is always at seek position 0)
-               $this->writeData(0, $header);
-
-               //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput(sprintf('[%s:%d:] EXIT!', __METHOD__, __LINE__));
-       }
-
-       /**
-        * Writes data at given position
-        *
-        * @param       $seekPosition   Seek position
-        * @param       $data                   Data to be written
-        * @return      void
-        */
-       private function writeData ($seekPosition, $data) {
-               //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput(sprintf('[%s:%d:] seekPosition=%s,data()=%s - CALLED!', __METHOD__, __LINE__, $seekPosition, strlen($data)));
-
-               // Write data at given position
-               $this->getIteratorInstance()->writeAtPosition($seekPosition, $data);
-
-               // Update seek position
-               $this->updateSeekPosition();
-
-               //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput(sprintf('[%s:%d:] EXIT!', __METHOD__, __LINE__));
-       }
-
-       /**
-        * Pre-allocates file (if enabled) with some space for later faster write access.
-        *
-        * @return      void
-        */
-       private function preAllocateFile () {
-               //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput(sprintf('[%s:%d:] CALLED!', __METHOD__, __LINE__));
-
-               // Is it enabled?
-               if ($this->getConfigInstance()->getConfigEntry('file_stack_pre_allocate_enabled') != 'Y') {
-                       // Not enabled
-                       self::createDebugInstance(__CLASS__)->debugOutput(sprintf('[%s:%d:] Not pre-allocating stack file.', __METHOD__, __LINE__));
-
-                       // Don't continue here.
-                       return;
-               } // END - if
-
-               // Message to user
-               self::createDebugInstance(__CLASS__)->debugOutput(sprintf('[%s:%d:] Pre-allocating stack file ...', __METHOD__, __LINE__));
-
-               /*
-                * Calculate minimum length for one entry:
-                * minimum length = hash length + separator + name + minimum entry size = ?? + 1 + 10 + 1 = ??
-                */
-               $minLengthEntry = self::getHashLength() + strlen(self::SEPARATOR_HASH_NAME) + self::LENGTH_NAME + 1;
-               //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput(sprintf('[%s:%d:] minLengthEntry=%s', __METHOD__, __LINE__, $minLengthEntry));
-
-               // Calulcate seek position
-               $seekPosition = $minLengthEntry * $this->getConfigInstance()->getConfigEntry('file_stack_pre_allocate_count');
-               //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput(sprintf('[%s:%d:] seekPosition=%s', __METHOD__, __LINE__, $seekPosition));
-
-               // Now simply write a NUL there. This will pre-allocate the file.
-               $this->writeData($seekPosition, chr(0));
+               $this->writeData(0, $header, FALSE);
 
                //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput(sprintf('[%s:%d:] EXIT!', __METHOD__, __LINE__));
        }
@@ -423,7 +177,7 @@ class BaseFileStack extends BaseStacker {
         *
         * @return      void
         */
-       private function analyzeStackFile () {
+       private function analyzeFile () {
                //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput(sprintf('[%s:%d:] CALLED!', __METHOD__, __LINE__));
 
                // Make sure the file is initialized
@@ -442,14 +196,16 @@ class BaseFileStack extends BaseStacker {
         * Initializes this file-based stack.
         *
         * @param       $fileName       File name of this stack
+        * @param       $type           Type of this stack (e.g. url_source for URL sources)
         * @return      void
+        * @todo        Currently the stack file is not cached, please implement a memory-handling class and if enough RAM is found, cache the whole stack file.
         */
-       protected function initFileStack ($fileName) {
-               // Get a file i/o pointer instance for stack file
-               $pointerInstance = ObjectFactory::createObjectByConfiguredName('file_raw_input_output_class', array($fileName));
+       protected function initFileStack ($fileName, $type) {
+               // Get a stack file instance
+               $fileInstance = ObjectFactory::createObjectByConfiguredName('stack_file_class', array($fileName));
 
                // Get iterator instance
-               $iteratorInstance = ObjectFactory::createObjectByConfiguredName('file_io_iterator_class', array($pointerInstance));
+               $iteratorInstance = ObjectFactory::createObjectByConfiguredName('file_io_iterator_class', array($fileInstance));
 
                // Is the instance implementing the right interface?
                assert($iteratorInstance instanceof SeekableWritableFileIterator);
@@ -463,25 +219,38 @@ class BaseFileStack extends BaseStacker {
                        $this->createFileHeader();
 
                        // And pre-allocate a bit
-                       $this->preAllocateFile();
+                       $this->preAllocateFile('file_stack');
                } // END - if
 
                // Load the file header
                $this->readFileHeader();
 
                // Count all entries in file
-               $this->analyzeStackFile();
+               $this->analyzeFile();
 
                /*
                 * Get stack index instance. This can be used for faster
                 * "defragmentation" and startup.
                 */
-               $indexInstance = FileStackIndexFactory::createFileStackIndex($fileName);
+               $indexInstance = FileStackIndexFactory::createFileStackIndexInstance($fileName, $type);
 
                // And set it here
                $this->setIndexInstance($indexInstance);
        }
 
+       /**
+        * Calculates minimum length for one entry
+        *
+        * @return      $length         Minimum length for one entry
+        */
+       protected function caluclateMinimumFileEntryLength () {
+               // Calulcate it
+               $length = self::getHashLength() + strlen(self::SEPARATOR_HASH_NAME) + self::LENGTH_NAME + 1;
+
+               // Return it
+               return $length;
+       }
+
        /**
         * Adds a value to given stack
         *