X-Git-Url: https://git.mxchange.org/?p=core.git;a=blobdiff_plain;f=inc%2Fclasses%2Fmain%2Fstacker%2Ffile%2Fclass_BaseFileStack.php;h=53afecbd50b5aede20952acbc1327a242ad3f141;hp=879f19d0187b09700380ccd3bdf086dbcc883b4a;hb=6b19898fa5c1cc332e83100ea41f55073ec20a8a;hpb=00dd4b10d1e78ce9b517a0595faa746343ca221c diff --git a/inc/classes/main/stacker/file/class_BaseFileStack.php b/inc/classes/main/stacker/file/class_BaseFileStack.php index 879f19d0..53afecbd 100644 --- a/inc/classes/main/stacker/file/class_BaseFileStack.php +++ b/inc/classes/main/stacker/file/class_BaseFileStack.php @@ -47,36 +47,6 @@ class BaseFileStack extends BaseStacker { */ const LENGTH_NAME = 10; - /** - * Length of count - */ - const LENGTH_COUNT = 20; - - /** - * Length of position - */ - const LENGTH_POSITION = 20; - - /** - * Counter for total entries - */ - private $totalEntries = 0; - - /** - * Current seek position - */ - private $seekPosition = 0; - - /** - * Size of header - */ - private $headerSize = 0; - - /** - * File header - */ - private $header = array(); - /** * Protected constructor * @@ -88,7 +58,7 @@ class BaseFileStack extends BaseStacker { parent::__construct($className); // Calculate header size - $this->headerSize = ( + $this->setHeaderSize( strlen(self::STACK_MAGIC) + strlen(self::SEPARATOR_HEADER_DATA) + self::LENGTH_COUNT + @@ -96,65 +66,9 @@ class BaseFileStack extends BaseStacker { self::LENGTH_POSITION + strlen(self::SEPARATOR_HEADER_ENTRIES) ); - } - - /** - * Getter for total entries - * - * @return $totalEntries Total entries in this stack - */ - private function getCounter () { - // Get it - return $this->totalEntries; - } - - /** - * Increment counter - * - * @return void - */ - private function incrementCounter () { - // Get it - $this->totalEntries++; - } - /** - * Getter for seek position - * - * @return $seekPosition Current seek position (stored here in object) - */ - private function getSeekPosition () { - // Get it - return $this->seekPosition; - } - - /** - * Setter for seek position - * - * @param $seekPosition Current seek position (stored here in object) - * @return void - */ - private function setSeekPosition ($seekPosition) { - // And set it - $this->seekPosition = $seekPosition; - } - - /** - * Updates seekPosition attribute from file to avoid to much access on file. - * - * @return void - */ - private function updateSeekPosition () { - //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput(sprintf('[%s:%d:] CALLED!', __METHOD__, __LINE__)); - - // Get key (= seek position) - $seekPosition = $this->getIteratorInstance()->key(); - //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput(sprintf('[%s:%d:] Setting seekPosition=%s', __METHOD__, __LINE__, $seekPosition)); - - // And set it here - $this->setSeekPosition($seekPosition); - - //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput(sprintf('[%s:%d:] EXIT!', __METHOD__, __LINE__)); + // Init counters and gaps array + $this->initCountersGapsArray(); } /** @@ -162,18 +76,18 @@ class BaseFileStack extends BaseStacker { * * @return void */ - private function readFileHeader () { + protected function readFileHeader () { //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput(sprintf('[%s:%d:] CALLED!', __METHOD__, __LINE__)); // First rewind to beginning as the header sits at the beginning ... $this->getIteratorInstance()->rewind(); // Then read it (see constructor for calculation) - $data = $this->getIteratorInstance()->read($this->headerSize); - //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput(sprintf('[%s:%d:] Read %d bytes (%d wanted).', __METHOD__, __LINE__, strlen($data), $this->headerSize)); + $data = $this->getIteratorInstance()->read($this->getHeaderSize()); + //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput(sprintf('[%s:%d:] Read %d bytes (%d wanted).', __METHOD__, __LINE__, strlen($data), $this->getHeaderSize())); // Have all requested bytes been read? - assert(strlen($data) == $this->headerSize); + assert(strlen($data) == $this->getHeaderSize()); //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput(sprintf('[%s:%d:] Passed assert().', __METHOD__, __LINE__)); // Last character must be the separator @@ -190,116 +104,43 @@ class BaseFileStack extends BaseStacker { /* * Now split it: * - * 0 => Magic - * 1 => Total entries - * 2 => Current seek position + * 0 => magic + * 1 => total entries + * 2 => current seek position */ - $this->header = explode(chr(self::SEPARATOR_HEADER_DATA), $data); + $header = explode(chr(self::SEPARATOR_HEADER_DATA), $data); + + // Set header here + $this->setHeader($header); // Check if the array has only 3 elements - //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput(sprintf('[%s:%d:] header(%d)=%s', __METHOD__, __LINE__, count($this->header), print_r($this->header, TRUE))); - assert(count($this->header) == 3); + //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput(sprintf('[%s:%d:] header(%d)=%s', __METHOD__, __LINE__, count($header), print_r($header, TRUE))); + assert(count($header) == 3); //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput(sprintf('[%s:%d:] Passed assert().', __METHOD__, __LINE__)); // Check magic - assert($this->header[0] == self::STACK_MAGIC); + assert($header[0] == self::STACK_MAGIC); //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput(sprintf('[%s:%d:] Passed assert().', __METHOD__, __LINE__)); // Check length of count and seek position - assert(strlen($this->header[1]) == self::LENGTH_COUNT); + assert(strlen($header[1]) == self::LENGTH_COUNT); //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput(sprintf('[%s:%d:] Passed assert().', __METHOD__, __LINE__)); - assert(strlen($this->header[2]) == self::LENGTH_POSITION); + assert(strlen($header[2]) == self::LENGTH_POSITION); //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput(sprintf('[%s:%d:] Passed assert().', __METHOD__, __LINE__)); // Decode count and seek position - $this->header[1] = hex2bin($this->header[1]); - $this->header[2] = hex2bin($this->header[2]); + $header[1] = hex2bin($header[1]); + $header[2] = hex2bin($header[2]); //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput(sprintf('[%s:%d:] EXIT!', __METHOD__, __LINE__)); } - /** - * Checks whether the file header is initialized - * - * @return $isInitialized Whether the file header is initialized - */ - private function isFileHeaderInitialized () { - //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput(sprintf('[%s:%d:] CALLED!', __METHOD__, __LINE__)); - // Default is not initialized - $isInitialized = FALSE; - - // Is the file initialized? - if ($this->isFileInitialized()) { - // Some bytes has been written, so rewind to start of it. - $rewindStatus = $this->getIteratorInstance()->rewind(); - //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput(sprintf('[%s:%d:] rewindStatus=%s', __METHOD__, __LINE__, $rewindStatus)); - - // Is the rewind() call successfull? - if ($rewindStatus != 1) { - // Something bad happened - self::createDebugInstance(__CLASS__)->debugOutput(sprintf('[%s:%d:] Could not rewind().', __METHOD__, __LINE__)); - } // END - if - - // Read file header - $this->readFileHeader(); - - // The above method does already check the header - $isInitialized = TRUE; - } // END - if - - // Return result - //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput(sprintf('[%s:%d:] isInitialized=%d - EXIT!', __METHOD__, __LINE__, intval($isInitialized))); - return $isInitialized; - } - - /** - * Checks whether the file-based stack has been initialized - * - * @return $isInitialized Whether the file's size is zero - */ - private function isFileInitialized () { - //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput(sprintf('[%s:%d:] CALLED!', __METHOD__, __LINE__)); - - // Get it from iterator which holds the pointer instance. If FALSE is returned - $fileSize = $this->getIteratorInstance()->size(); - //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput(sprintf('[%s:%d:] fileSize=%s', __METHOD__, __LINE__, $fileSize)); - - /* - * The returned file size should not be FALSE or NULL as this means - * that the pointer class does not work correctly. - */ - assert(is_int($fileSize)); - - // Is more than 0 returned? - $isInitialized = ($fileSize > 0); - - // Return result - //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput(sprintf('[%s:%d:] isInitialized=%d - EXIT!', __METHOD__, __LINE__, intval($isInitialized))); - return $isInitialized; - } - - /** - * Creates the file-stack's header - * - * @return void - */ - private function createFileHeader () { - //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput(sprintf('[%s:%d:] CALLED!', __METHOD__, __LINE__)); - // The file's header should not be initialized here - assert(!$this->isFileHeaderInitialized()); - - // Simple flush file header which will create it. - $this->flushFileHeader(); - - //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput(sprintf('[%s:%d:] EXIT!!', __METHOD__, __LINE__)); - } - /** * Flushes the file header * * @return void */ - private function flushFileHeader () { + protected function flushFileHeader () { //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput(sprintf('[%s:%d:] CALLED!', __METHOD__, __LINE__)); // Put all informations together @@ -324,63 +165,29 @@ class BaseFileStack extends BaseStacker { ); // Write it to disk (header is always at seek position 0) - $this->writeData(0, $header); - - //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput(sprintf('[%s:%d:] EXIT!', __METHOD__, __LINE__)); - } - - /** - * Writes data at given position - * - * @param $seekPosition Seek position - * @param $data Data to be written - * @return void - */ - private function writeData ($seekPosition, $data) { - //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput(sprintf('[%s:%d:] seekPosition=%s,data()=%s - CALLED!', __METHOD__, __LINE__, $seekPosition, strlen($data))); - - // Write data at given position - $this->getIteratorInstance()->writeAtPosition($seekPosition, $data); - - // Update seek position - $this->updateSeekPosition(); + $this->writeData(0, $header, FALSE); //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput(sprintf('[%s:%d:] EXIT!', __METHOD__, __LINE__)); } /** - * Pre-allocates file (if enabled) with some space for later faster write access. + * Analyzes entries in stack file. This will count all found (and valid) + * entries, mark invalid as damaged and count gaps ("fragmentation"). If + * only gaps are found, the file is considered as "virgin" (no entries). * * @return void */ - private function preAllocateFile () { + private function analyzeFile () { //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput(sprintf('[%s:%d:] CALLED!', __METHOD__, __LINE__)); - // Is it enabled? - if ($this->getConfigInstance()->getConfigEntry('file_stack_pre_allocate_enabled') != 'Y') { - // Not enabled - self::createDebugInstance(__CLASS__)->debugOutput(sprintf('[%s:%d:] Not pre-allocating stack file.', __METHOD__, __LINE__)); + // Make sure the file is initialized + assert($this->isFileInitialized()); - // Don't continue here. - return; - } // END - if + // Init counters and gaps array + $this->initCountersGapsArray(); - // Message to user - self::createDebugInstance(__CLASS__)->debugOutput(sprintf('[%s:%d:] Pre-allocating stack file ...', __METHOD__, __LINE__)); - - /* - * Calculate minimum length for one entry: - * minimum length = hash length + separator + name + minimum entry size = ?? + 1 + 10 + 1 = ?? - */ - $minLengthEntry = self::getHashLength() + strlen(self::SEPARATOR_HASH_NAME) + self::LENGTH_NAME + 1; - //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput(sprintf('[%s:%d:] minLengthEntry=%s', __METHOD__, __LINE__, $minLengthEntry)); - - // Calulcate seek position - $seekPosition = $minLengthEntry * $this->getConfigInstance()->getConfigEntry('file_stack_pre_allocate_count'); - //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput(sprintf('[%s:%d:] seekPosition=%s', __METHOD__, __LINE__, $seekPosition)); - - // Now simply write a NUL there. This will pre-allocate the file. - $this->writeData($seekPosition, chr(0)); + // Output message (as this may take some time) + self::createDebugInstance(__CLASS__)->debugOutput(sprintf('[%s:%d:] Analyzing file structure ... (this may take some time)', __METHOD__, __LINE__)); //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput(sprintf('[%s:%d:] EXIT!', __METHOD__, __LINE__)); } @@ -389,14 +196,16 @@ class BaseFileStack extends BaseStacker { * Initializes this file-based stack. * * @param $fileName File name of this stack + * @param $type Type of this stack (e.g. url_source for URL sources) * @return void + * @todo Currently the stack file is not cached, please implement a memory-handling class and if enough RAM is found, cache the whole stack file. */ - protected function initFileStack ($fileName) { - // Get a file i/o pointer instance - $pointerInstance = ObjectFactory::createObjectByConfiguredName('file_raw_input_output_class', array($fileName)); + protected function initFileStack ($fileName, $type) { + // Get a stack file instance + $fileInstance = ObjectFactory::createObjectByConfiguredName('stack_file_class', array($fileName)); // Get iterator instance - $iteratorInstance = ObjectFactory::createObjectByConfiguredName('file_io_iterator_class', array($pointerInstance)); + $iteratorInstance = ObjectFactory::createObjectByConfiguredName('file_io_iterator_class', array($fileInstance)); // Is the instance implementing the right interface? assert($iteratorInstance instanceof SeekableWritableFileIterator); @@ -410,11 +219,36 @@ class BaseFileStack extends BaseStacker { $this->createFileHeader(); // And pre-allocate a bit - $this->preAllocateFile(); + $this->preAllocateFile('file_stack'); } // END - if // Load the file header $this->readFileHeader(); + + // Count all entries in file + $this->analyzeFile(); + + /* + * Get stack index instance. This can be used for faster + * "defragmentation" and startup. + */ + $indexInstance = FileStackIndexFactory::createFileStackIndexInstance($fileName, $type); + + // And set it here + $this->setIndexInstance($indexInstance); + } + + /** + * Calculates minimum length for one entry + * + * @return $length Minimum length for one entry + */ + protected function caluclateMinimumFileEntryLength () { + // Calulcate it + $length = self::getHashLength() + strlen(self::SEPARATOR_HASH_NAME) + self::LENGTH_NAME + 1; + + // Return it + return $length; } /** @@ -524,7 +358,7 @@ class BaseFileStack extends BaseStacker { * @param $stackerName Name of the stack * @return $isFull Whether the stack is full */ - protected final function isStackFull ($stackerName) { + protected function isStackFull ($stackerName) { // File-based stacks will only run full if the disk space is low. // @TODO Please implement this, returning FALSE $isFull = FALSE; @@ -540,7 +374,7 @@ class BaseFileStack extends BaseStacker { * @return $isEmpty Whether the stack is empty * @throws NoStackerException If given stack is missing */ - public final function isStackEmpty ($stackerName) { + public function isStackEmpty ($stackerName) { // So, is the stack empty? $isEmpty = (($this->getStackCount($stackerName)) == 0); @@ -588,12 +422,8 @@ class BaseFileStack extends BaseStacker { * @return $count Size of stack (array count) */ public function getStackCount ($stackerName) { - // Now, count the array of entries - $this->partialStub('stackerName=' . $stackerName); - $count = 0; - - // Return result - return $count; + // Now, simply return the found count value, this must be up-to-date then! + return $this->getCounter(); } }