X-Git-Url: https://git.mxchange.org/?p=core.git;a=blobdiff_plain;f=inc%2Fclasses%2Fmain%2Fstacker%2Ffile%2Fclass_BaseFileStack.php;h=d16068d32d80ef3245a390e2f8ba0a626de8b49b;hp=4ff2dfc5179d3afd32462d8ac5c6a4c4e6eb2988;hb=4af01023fc4b9ffc4c7174264dbff53966aecc91;hpb=fa4a8357806244a39eb6e8dadf028190b03d34fb diff --git a/inc/classes/main/stacker/file/class_BaseFileStack.php b/inc/classes/main/stacker/file/class_BaseFileStack.php index 4ff2dfc5..d16068d3 100644 --- a/inc/classes/main/stacker/file/class_BaseFileStack.php +++ b/inc/classes/main/stacker/file/class_BaseFileStack.php @@ -27,66 +27,6 @@ class BaseFileStack extends BaseStacker { */ const STACK_MAGIC = 'STACKv0.1'; - /** - * Separator for header data - */ - const SEPARATOR_HEADER_DATA = 0x01; - - /** - * Separator header->entries - */ - const SEPARATOR_HEADER_ENTRIES = 0x02; - - /** - * Separator hash->name - */ - const SEPARATOR_HASH_NAME = 0x03; - - /** - * Length of name - */ - const LENGTH_NAME = 10; - - /** - * Length of count - */ - const LENGTH_COUNT = 20; - - /** - * Length of position - */ - const LENGTH_POSITION = 20; - - /** - * Counter for total entries - */ - private $totalEntries = 0; - - /** - * Current seek position - */ - private $seekPosition = 0; - - /** - * Size of header - */ - private $headerSize = 0; - - /** - * File header - */ - private $header = array(); - - /** - * Seek positions for gaps ("fragmentation") - */ - private $gaps = array(); - - /** - * Seek positions for damaged entries (e.g. mismatching hash sum, ...) - */ - private $damagedEntries = array(); - /** * Protected constructor * @@ -98,7 +38,7 @@ class BaseFileStack extends BaseStacker { parent::__construct($className); // Calculate header size - $this->headerSize = ( + $this->setHeaderSize( strlen(self::STACK_MAGIC) + strlen(self::SEPARATOR_HEADER_DATA) + self::LENGTH_COUNT + @@ -111,100 +51,23 @@ class BaseFileStack extends BaseStacker { $this->initCountersGapsArray(); } - /** - * Initializes counter for valid entries, arrays for damaged entries and - * an array for gap seek positions. If you call this method on your own, - * please re-analyze the file structure. So you are better to call - * analyzeStackFile() instead of this method. - * - * @return void - */ - private function initCountersGapsArray () { - // Init counter and seek position - $this->setCounter(0); - $this->setSeekPosition(0); - - // Init arrays - $this->gaps = array(); - $this->damagedEntries = array(); - } - - /** - * Getter for total entries - * - * @return $totalEntries Total entries in this stack - */ - private final function getCounter () { - // Get it - return $this->totalEntries; - } - - /** - * Increment counter - * - * @return void - */ - private final function incrementCounter () { - // Get it - $this->totalEntries++; - } - - /** - * Getter for seek position - * - * @return $seekPosition Current seek position (stored here in object) - */ - private final function getSeekPosition () { - // Get it - return $this->seekPosition; - } - - /** - * Setter for seek position - * - * @param $seekPosition Current seek position (stored here in object) - * @return void - */ - private final function setSeekPosition ($seekPosition) { - // And set it - $this->seekPosition = $seekPosition; - } - - /** - * Updates seekPosition attribute from file to avoid to much access on file. - * - * @return void - */ - private function updateSeekPosition () { - //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput(sprintf('[%s:%d:] CALLED!', __METHOD__, __LINE__)); - - // Get key (= seek position) - $seekPosition = $this->getIteratorInstance()->key(); - //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput(sprintf('[%s:%d:] Setting seekPosition=%s', __METHOD__, __LINE__, $seekPosition)); - - // And set it here - $this->setSeekPosition($seekPosition); - - //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput(sprintf('[%s:%d:] EXIT!', __METHOD__, __LINE__)); - } - /** * Reads the file header * * @return void */ - private function readFileHeader () { + protected function readFileHeader () { //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput(sprintf('[%s:%d:] CALLED!', __METHOD__, __LINE__)); // First rewind to beginning as the header sits at the beginning ... $this->getIteratorInstance()->rewind(); // Then read it (see constructor for calculation) - $data = $this->getIteratorInstance()->read($this->headerSize); - //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput(sprintf('[%s:%d:] Read %d bytes (%d wanted).', __METHOD__, __LINE__, strlen($data), $this->headerSize)); + $data = $this->getIteratorInstance()->read($this->getHeaderSize()); + //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput(sprintf('[%s:%d:] Read %d bytes (%d wanted).', __METHOD__, __LINE__, strlen($data), $this->getHeaderSize())); // Have all requested bytes been read? - assert(strlen($data) == $this->headerSize); + assert(strlen($data) == $this->getHeaderSize()); //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput(sprintf('[%s:%d:] Passed assert().', __METHOD__, __LINE__)); // Last character must be the separator @@ -221,116 +84,43 @@ class BaseFileStack extends BaseStacker { /* * Now split it: * - * 0 => Magic - * 1 => Total entries - * 2 => Current seek position + * 0 => magic + * 1 => total entries + * 2 => current seek position */ - $this->header = explode(chr(self::SEPARATOR_HEADER_DATA), $data); + $header = explode(chr(self::SEPARATOR_HEADER_DATA), $data); + + // Set header here + $this->setHeader($header); // Check if the array has only 3 elements - //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput(sprintf('[%s:%d:] header(%d)=%s', __METHOD__, __LINE__, count($this->header), print_r($this->header, TRUE))); - assert(count($this->header) == 3); + //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput(sprintf('[%s:%d:] header(%d)=%s', __METHOD__, __LINE__, count($header), print_r($header, TRUE))); + assert(count($header) == 3); //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput(sprintf('[%s:%d:] Passed assert().', __METHOD__, __LINE__)); // Check magic - assert($this->header[0] == self::STACK_MAGIC); + assert($header[0] == self::STACK_MAGIC); //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput(sprintf('[%s:%d:] Passed assert().', __METHOD__, __LINE__)); // Check length of count and seek position - assert(strlen($this->header[1]) == self::LENGTH_COUNT); + assert(strlen($header[1]) == self::LENGTH_COUNT); //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput(sprintf('[%s:%d:] Passed assert().', __METHOD__, __LINE__)); - assert(strlen($this->header[2]) == self::LENGTH_POSITION); + assert(strlen($header[2]) == self::LENGTH_POSITION); //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput(sprintf('[%s:%d:] Passed assert().', __METHOD__, __LINE__)); // Decode count and seek position - $this->header[1] = hex2bin($this->header[1]); - $this->header[2] = hex2bin($this->header[2]); + $header[1] = hex2bin($header[1]); + $header[2] = hex2bin($header[2]); //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput(sprintf('[%s:%d:] EXIT!', __METHOD__, __LINE__)); } - /** - * Checks whether the file header is initialized - * - * @return $isInitialized Whether the file header is initialized - */ - private function isFileHeaderInitialized () { - //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput(sprintf('[%s:%d:] CALLED!', __METHOD__, __LINE__)); - // Default is not initialized - $isInitialized = FALSE; - - // Is the file initialized? - if ($this->isFileInitialized()) { - // Some bytes has been written, so rewind to start of it. - $rewindStatus = $this->getIteratorInstance()->rewind(); - //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput(sprintf('[%s:%d:] rewindStatus=%s', __METHOD__, __LINE__, $rewindStatus)); - - // Is the rewind() call successfull? - if ($rewindStatus != 1) { - // Something bad happened - self::createDebugInstance(__CLASS__)->debugOutput(sprintf('[%s:%d:] Could not rewind().', __METHOD__, __LINE__)); - } // END - if - - // Read file header - $this->readFileHeader(); - - // The above method does already check the header - $isInitialized = TRUE; - } // END - if - - // Return result - //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput(sprintf('[%s:%d:] isInitialized=%d - EXIT!', __METHOD__, __LINE__, intval($isInitialized))); - return $isInitialized; - } - - /** - * Checks whether the file-based stack has been initialized - * - * @return $isInitialized Whether the file's size is zero - */ - private function isFileInitialized () { - //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput(sprintf('[%s:%d:] CALLED!', __METHOD__, __LINE__)); - - // Get it from iterator which holds the pointer instance. If FALSE is returned - $fileSize = $this->getIteratorInstance()->size(); - //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput(sprintf('[%s:%d:] fileSize=%s', __METHOD__, __LINE__, $fileSize)); - - /* - * The returned file size should not be FALSE or NULL as this means - * that the pointer class does not work correctly. - */ - assert(is_int($fileSize)); - - // Is more than 0 returned? - $isInitialized = ($fileSize > 0); - - // Return result - //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput(sprintf('[%s:%d:] isInitialized=%d - EXIT!', __METHOD__, __LINE__, intval($isInitialized))); - return $isInitialized; - } - - /** - * Creates the file-stack's header - * - * @return void - */ - private function createFileHeader () { - //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput(sprintf('[%s:%d:] CALLED!', __METHOD__, __LINE__)); - // The file's header should not be initialized here - assert(!$this->isFileHeaderInitialized()); - - // Simple flush file header which will create it. - $this->flushFileHeader(); - - //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput(sprintf('[%s:%d:] EXIT!!', __METHOD__, __LINE__)); - } - /** * Flushes the file header * * @return void */ - private function flushFileHeader () { + protected function flushFileHeader () { //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput(sprintf('[%s:%d:] CALLED!', __METHOD__, __LINE__)); // Put all informations together @@ -355,63 +145,7 @@ class BaseFileStack extends BaseStacker { ); // Write it to disk (header is always at seek position 0) - $this->writeData(0, $header); - - //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput(sprintf('[%s:%d:] EXIT!', __METHOD__, __LINE__)); - } - - /** - * Writes data at given position - * - * @param $seekPosition Seek position - * @param $data Data to be written - * @return void - */ - private function writeData ($seekPosition, $data) { - //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput(sprintf('[%s:%d:] seekPosition=%s,data()=%s - CALLED!', __METHOD__, __LINE__, $seekPosition, strlen($data))); - - // Write data at given position - $this->getIteratorInstance()->writeAtPosition($seekPosition, $data); - - // Update seek position - $this->updateSeekPosition(); - - //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput(sprintf('[%s:%d:] EXIT!', __METHOD__, __LINE__)); - } - - /** - * Pre-allocates file (if enabled) with some space for later faster write access. - * - * @return void - */ - private function preAllocateFile () { - //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput(sprintf('[%s:%d:] CALLED!', __METHOD__, __LINE__)); - - // Is it enabled? - if ($this->getConfigInstance()->getConfigEntry('file_stack_pre_allocate_enabled') != 'Y') { - // Not enabled - self::createDebugInstance(__CLASS__)->debugOutput(sprintf('[%s:%d:] Not pre-allocating stack file.', __METHOD__, __LINE__)); - - // Don't continue here. - return; - } // END - if - - // Message to user - self::createDebugInstance(__CLASS__)->debugOutput(sprintf('[%s:%d:] Pre-allocating stack file ...', __METHOD__, __LINE__)); - - /* - * Calculate minimum length for one entry: - * minimum length = hash length + separator + name + minimum entry size = ?? + 1 + 10 + 1 = ?? - */ - $minLengthEntry = self::getHashLength() + strlen(self::SEPARATOR_HASH_NAME) + self::LENGTH_NAME + 1; - //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput(sprintf('[%s:%d:] minLengthEntry=%s', __METHOD__, __LINE__, $minLengthEntry)); - - // Calulcate seek position - $seekPosition = $minLengthEntry * $this->getConfigInstance()->getConfigEntry('file_stack_pre_allocate_count'); - //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput(sprintf('[%s:%d:] seekPosition=%s', __METHOD__, __LINE__, $seekPosition)); - - // Now simply write a NUL there. This will pre-allocate the file. - $this->writeData($seekPosition, chr(0)); + $this->writeData(0, $header, FALSE); //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput(sprintf('[%s:%d:] EXIT!', __METHOD__, __LINE__)); } @@ -423,7 +157,7 @@ class BaseFileStack extends BaseStacker { * * @return void */ - private function analyzeStackFile () { + private function analyzeFile () { //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput(sprintf('[%s:%d:] CALLED!', __METHOD__, __LINE__)); // Make sure the file is initialized @@ -435,6 +169,21 @@ class BaseFileStack extends BaseStacker { // Output message (as this may take some time) self::createDebugInstance(__CLASS__)->debugOutput(sprintf('[%s:%d:] Analyzing file structure ... (this may take some time)', __METHOD__, __LINE__)); + // First rewind to the begining + $this->getIteratorInstance()->rewind(); + + // Then try to load all entries + while ($this->getIteratorInstance()->valid()) { + // Go to next entry + $this->getIteratorInstance()->next(); + + // Get current entry + $current = $this->getIteratorInstance()->current(); + + // Simply output it + self::createDebugInstance(__CLASS__)->debugOutput(sprintf('[%s:%d:] current(%s)=%s', __METHOD__, __LINE__, strlen($current), print_r($current, TRUE))); + } // END - while + //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput(sprintf('[%s:%d:] EXIT!', __METHOD__, __LINE__)); } @@ -444,13 +193,14 @@ class BaseFileStack extends BaseStacker { * @param $fileName File name of this stack * @param $type Type of this stack (e.g. url_source for URL sources) * @return void + * @todo Currently the stack file is not cached, please implement a memory-handling class and if enough RAM is found, cache the whole stack file. */ protected function initFileStack ($fileName, $type) { - // Get a file i/o pointer instance for stack file - $pointerInstance = ObjectFactory::createObjectByConfiguredName('file_raw_input_output_class', array($fileName)); + // Get a stack file instance + $fileInstance = ObjectFactory::createObjectByConfiguredName('stack_file_class', array($fileName)); // Get iterator instance - $iteratorInstance = ObjectFactory::createObjectByConfiguredName('file_io_iterator_class', array($pointerInstance)); + $iteratorInstance = ObjectFactory::createObjectByConfiguredName('file_io_iterator_class', array($fileInstance, $this)); // Is the instance implementing the right interface? assert($iteratorInstance instanceof SeekableWritableFileIterator); @@ -464,14 +214,14 @@ class BaseFileStack extends BaseStacker { $this->createFileHeader(); // And pre-allocate a bit - $this->preAllocateFile(); + $this->preAllocateFile('file_stack'); } // END - if // Load the file header $this->readFileHeader(); // Count all entries in file - $this->analyzeStackFile(); + $this->analyzeFile(); /* * Get stack index instance. This can be used for faster @@ -657,6 +407,19 @@ class BaseFileStack extends BaseStacker { // Now, simply return the found count value, this must be up-to-date then! return $this->getCounter(); } + + /** + * Calculates minimum length for one entry/block + * + * @return $length Minimum length for one entry/block + */ + public function caluclateMinimumBlockLength () { + // Calulcate it + $length = self::getHashLength() + strlen(chr(self::SEPARATOR_HASH_NAME)) + self::LENGTH_NAME + 1 + strlen(self::getBlockSeparator()); + + // Return it + return $length; + } } // [EOF]