From 1a91dabdfed365947d1ce11675aacae9d424edff Mon Sep 17 00:00:00 2001 From: Roland Haeder Date: Thu, 5 Mar 2015 02:56:56 +0100 Subject: [PATCH] Continued CSV parsing: - Introduced readCsvFileLine() which reads a line from a CSV file and parses it to an indexed array - Introduced readLine() which will read a line (not limited) from a text file - Method read() will now work without parameters (depending on implementation, e.g. binary files must always be read with a buffer length) - Other improvements - TODOs.txt updated MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Signed-off-by: Roland Häder --- docs/TODOs.txt | 19 +-- .../io/file/csv/class_CsvInputStreamer.php | 7 ++ .../io/pointer/class_InputPointer.php | 19 ++- .../binary/class_BaseBinaryFile.php | 5 +- .../class_FrameworkRawFileInputPointer.php | 19 ++- .../class_FrameworkTextFileInputPointer.php | 50 +++++--- .../class_FrameworkFileInputOutputPointer.php | 25 +++- .../text/input/class_BaseInputTextFile.php | 8 ++ .../text/input/csv/class_CsvInputFile.php | 115 ++++++++++++++++++ .../main/iterator/file/class_FileIterator.php | 2 +- 10 files changed, 224 insertions(+), 45 deletions(-) diff --git a/docs/TODOs.txt b/docs/TODOs.txt index d57865a4..ae078e4d 100644 --- a/docs/TODOs.txt +++ b/docs/TODOs.txt @@ -1,15 +1,16 @@ ### WARNING: THIS FILE IS AUTO-GENERATED BY ./todo-builder.sh ### ### DO NOT EDIT THIS FILE. ### +./inc/classes.php:10: * @todo Minimize these includes ./inc/classes/exceptions/main/class_MissingMethodException.php:13: * @todo Try to rewrite user/guest login classes and mark this exception as deprecated ./inc/classes/exceptions/main/class_NoConfigEntryException.php:10: * @todo Rename this class to NoFoundEntryException ./inc/classes/interfaces/class_FrameworkInterface.php:11: * @todo Find a better name for this interface ./inc/classes/interfaces/criteria/extended/class_LocalSearchCriteria.php:30: * @todo Find a nice casting here. (int) allows until and including 32766. ./inc/classes/interfaces/criteria/extended/class_LocalSearchCriteria.php:54: * @todo Find a nice casting here. (int) allows until and including 32766. -./inc/classes/main/class_BaseFrameworkSystem.php:1927: * @todo Write a logging mechanism for productive mode -./inc/classes/main/class_BaseFrameworkSystem.php:1942: // @TODO Finish this part! -./inc/classes/main/class_BaseFrameworkSystem.php:240: // @todo Try to clean these constants up -./inc/classes/main/class_BaseFrameworkSystem.php:465: // @TODO __CLASS__ does always return BaseFrameworkSystem but not the extending (=child) class -./inc/classes/main/class_BaseFrameworkSystem.php:539: * @todo SearchableResult and UpdateableResult shall have a super interface to use here +./inc/classes/main/class_BaseFrameworkSystem.php:1953: * @todo Write a logging mechanism for productive mode +./inc/classes/main/class_BaseFrameworkSystem.php:1968: // @TODO Finish this part! +./inc/classes/main/class_BaseFrameworkSystem.php:245: // @todo Try to clean these constants up +./inc/classes/main/class_BaseFrameworkSystem.php:470: // @TODO __CLASS__ does always return BaseFrameworkSystem but not the extending (=child) class +./inc/classes/main/class_BaseFrameworkSystem.php:544: * @todo SearchableResult and UpdateableResult shall have a super interface to use here ./inc/classes/main/commands/web/class_WebLoginAreaCommand.php:64: * @todo Add some stuff here: Some personal data, app/game related data ./inc/classes/main/commands/web/class_WebProblemCommand.php:58: * @todo 0% done ./inc/classes/main/commands/web/class_WebStatusCommand.php:58: * @todo 0% done @@ -31,12 +32,12 @@ ./inc/classes/main/criteria/search/class_SearchCriteria.php:102: * @todo Find a nice casting here. (int) allows until and including 32766. ./inc/classes/main/criteria/search/class_SearchCriteria.php:70: * @todo Find a nice casting here. (int) allows until and including 32766. ./inc/classes/main/database/databases/class_LocalFileDatabase.php:327: * @todo Do some checks on the database directory and files here -./inc/classes/main/database/databases/class_LocalFileDatabase.php:613: * @todo Add more generic non-public data for removal +./inc/classes/main/database/databases/class_LocalFileDatabase.php:616: * @todo Add more generic non-public data for removal ./inc/classes/main/decorator/template/class_XmlRewriterTemplateDecorator.php:427: * @todo Find something useful with this! ./inc/classes/main/discovery/payment/class_LocalPaymentDiscovery.php:85: * @todo 0% done -./inc/classes/main/file_directories/class_BaseFileIo.php:162: * @todo Handle seekStatus ./inc/classes/main/file_directories/class_BaseFile.php:135: * @todo ~10% done? ./inc/classes/main/file_directories/class_BaseFile.php:148: * @todo Handle seekStatus +./inc/classes/main/file_directories/class_BaseFileIo.php:162: * @todo Handle seekStatus ./inc/classes/main/file_directories/directory/class_FrameworkDirectoryPointer.php:68: * @todo Get rid of inConstructor, could be old-lost code. ./inc/classes/main/file_directories/io_stream/class_FileIoStream.php:270: * @todo 0% done ./inc/classes/main/file_directories/io_stream/class_FileIoStream.php:74: * @todo This method needs heavy rewrite @@ -108,7 +109,6 @@ ./inc/classes/middleware/compressor/class_CompressorChannel.php:103: // @TODO Is there a configurable fall-back compressor needed, or is NullCompressor okay? ./inc/classes/middleware/debug/class_DebugMiddleware.php:113: // @TODO Initialization phase ./inc/classes/middleware/io/class_FileIoHandler.php:174: * @todo 0% done -./inc/classes.php:10: * @todo Minimize these includes ./inc/classes/third_party/api/wernisportal/class_WernisApi.php:10: * @todo Out-dated since 0.6-BETA ./inc/config/class_FrameworkConfiguration.php:115: * @todo This method encapsulates a deprecated PHP function and should be deprecated, too. ./inc/config/class_FrameworkConfiguration.php:223: * @todo We have to add some more entries from $_SERVER here @@ -120,14 +120,15 @@ ./inc/loader/class_ClassLoader.php:319: /* @TODO: Do not exit here. */ ./inc/output.php:11: * @todo Minimize these includes ./inc/selector.php:11: * @todo Minimize these includes +./index.php:43: * @todo This method is old code and needs heavy rewrite and should be moved to ApplicationHelper ### ### DEPRECATION FOLLOWS: ### ### +./inc/classes.php:9: * @deprecated ./inc/classes/exceptions/main/class_MissingMethodException.php:14: * @deprecated Please do no longer use this exception ./inc/classes/interfaces/database/backend/class_DatabaseFrontendInterface.php:2:// @DEPRECATED ./inc/classes/interfaces/database/frontend/class_DatabaseFrontendInterface.php:2:// @DEPRECATED ./inc/classes/main/database/class_BaseDatabaseFrontend.php:2:// @DEPRECATED ./inc/classes/main/handler/class_BaseHandler.php:2:// @DEPRECATED ./inc/classes/main/handler/raw_data/class_BaseRawDataHandler.php:2:// @DEPRECATED -./inc/classes.php:9: * @deprecated ./inc/database.php:10: * @deprecated ./inc/hooks.php:2:// @DEPRECATED ./inc/includes.php:10: * @deprecated diff --git a/inc/classes/interfaces/io/file/csv/class_CsvInputStreamer.php b/inc/classes/interfaces/io/file/csv/class_CsvInputStreamer.php index 957f6775..8b72ad2d 100644 --- a/inc/classes/interfaces/io/file/csv/class_CsvInputStreamer.php +++ b/inc/classes/interfaces/io/file/csv/class_CsvInputStreamer.php @@ -22,6 +22,13 @@ * along with this program. If not, see . */ interface CsvInputStreamer extends FileInputStreamer { + /** + * Reads a line from CSV file and returns it as an indexed array + * + * @param $columnSeparator Character to use separting columns + * @return $lineArray An index array with the read line + */ + function readCsvFileLine ($columnSeparator); } // [EOF] diff --git a/inc/classes/interfaces/io/pointer/class_InputPointer.php b/inc/classes/interfaces/io/pointer/class_InputPointer.php index eb032537..c38cdd5b 100644 --- a/inc/classes/interfaces/io/pointer/class_InputPointer.php +++ b/inc/classes/interfaces/io/pointer/class_InputPointer.php @@ -25,20 +25,27 @@ interface InputPointer extends StreamableInput, FilePointer { /** * Read data a file pointer * - * @return mixed The result of fread() - * @throws NullPointerException If the file pointer instance - * is not set by setPointer() - * @throws InvalidResourceException If there is being set + * @return $data Read data from file */ function readFromFile (); + /** + * Reads a line, maximum 4096 Bytes from current file pointer + * + * @return $data Read data from file + */ + function readLine (); + /** * Reads given amount of bytes from file. * - * @param $bytes Amount of bytes to read + * @param $bytes Amount of bytes to read or whole line (only text files) * @return $data Data read from file + * @throws NullPointerException If the file pointer instance + * is not set by setPointer() + * @throws InvalidResourceException If there is being set */ - function read ($bytes); + function read ($bytes = NULL); } // [EOF] diff --git a/inc/classes/main/file_directories/binary/class_BaseBinaryFile.php b/inc/classes/main/file_directories/binary/class_BaseBinaryFile.php index 8a338735..207f797d 100644 --- a/inc/classes/main/file_directories/binary/class_BaseBinaryFile.php +++ b/inc/classes/main/file_directories/binary/class_BaseBinaryFile.php @@ -575,7 +575,10 @@ class BaseBinaryFile extends BaseFile { * @param $bytes Amount of bytes to read * @return $data Data read from file */ - public function read ($bytes) { + public function read ($bytes = NULL) { + // $bytes shall be integer + assert(is_int($bytes)); + // Call pointer instance return $this->getPointerInstance()->read($bytes); } diff --git a/inc/classes/main/file_directories/input/raw/class_FrameworkRawFileInputPointer.php b/inc/classes/main/file_directories/input/raw/class_FrameworkRawFileInputPointer.php index 8c4f6529..70e00c5d 100644 --- a/inc/classes/main/file_directories/input/raw/class_FrameworkRawFileInputPointer.php +++ b/inc/classes/main/file_directories/input/raw/class_FrameworkRawFileInputPointer.php @@ -93,19 +93,30 @@ class FrameworkRawFileInputPointer extends BaseFileIo implements InputPointer { return $this->read(1024); } + /** + * Reads a line, maximum 4096 Bytes from current file pointer + * + * @return $data Read data from file + * @throws UnsupportedOperationException If this method is called + */ + public function readLine () { + // Not supported in binary files ... + throw new UnsupportedOperationException(array($this, __FUNCTION__), self::EXCEPTION_UNSPPORTED_OPERATION); + } + /** * Reads given amount of bytes from file. * * @param $bytes Amount of bytes to read * @return $data Data read from file */ - public function read ($bytes) { + public function read ($bytes = NULL) { + // $bytes shall be integer + assert(is_int($bytes)); + // Try to read given characters $data = fread($this->getPointer(), $bytes); - // Was this successfull? - assert(is_string($data)); - // Then return it return $data; } diff --git a/inc/classes/main/file_directories/input/text/class_FrameworkTextFileInputPointer.php b/inc/classes/main/file_directories/input/text/class_FrameworkTextFileInputPointer.php index 6dc36c31..bb3b97ff 100644 --- a/inc/classes/main/file_directories/input/text/class_FrameworkTextFileInputPointer.php +++ b/inc/classes/main/file_directories/input/text/class_FrameworkTextFileInputPointer.php @@ -75,12 +75,34 @@ class FrameworkTextFileInputPointer extends BaseFileIo implements InputPointer { /** * Read data a file pointer * - * @return mixed The result of fread() + * @return $data Read data from file + */ + public function readFromFile () { + // Read 1024 Byte data from the file pointer and return it + return $this->read(1024); + } + + /** + * Reads a line, maximum 4096 Bytes from current file pointer + * + * @return $data Read data from file + */ + public function readLine () { + // Read whole line from the file pointer and return it + return $this->read(); + } + + /** + * Reads given amount of bytes from file. + * + * @param $bytes Amount of bytes to read or whole line (only text files) + * @return $data Data read from file * @throws NullPointerException If the file pointer instance * is not set by setPointer() * @throws InvalidResourceException If there is being set */ - public function readFromFile () { + public function read ($bytes = NULL) { + // Some sanity checks if (is_null($this->getPointer())) { // Pointer not initialized throw new NullPointerException($this, self::EXCEPTION_IS_NULL_POINTER); @@ -89,22 +111,14 @@ class FrameworkTextFileInputPointer extends BaseFileIo implements InputPointer { throw new InvalidResourceException($this, self::EXCEPTION_INVALID_RESOURCE); } - // Read data from the file pointer and return it - return $this->read(1024); - } - - /** - * Reads given amount of bytes from file. - * - * @param $bytes Amount of bytes to read - * @return $data Data read from file - */ - public function read ($bytes) { - // Try to read given characters - $data = fgets($this->getPointer(), $bytes); - - // Was this successfull? - assert(is_string($data)); + // Is $bytes set? + if (is_int($bytes)) { + // Try to read given characters + $data = fgets($this->getPointer(), $bytes); + } else { + // Try to read whole line + $data = fgets($this->getPointer()); + } // Then return it return $data; diff --git a/inc/classes/main/file_directories/io/class_FrameworkFileInputOutputPointer.php b/inc/classes/main/file_directories/io/class_FrameworkFileInputOutputPointer.php index 0a750029..ec6c0e9b 100644 --- a/inc/classes/main/file_directories/io/class_FrameworkFileInputOutputPointer.php +++ b/inc/classes/main/file_directories/io/class_FrameworkFileInputOutputPointer.php @@ -167,21 +167,34 @@ class FrameworkFileInputOutputPointer extends BaseFileIo implements InputOutputP return fseek($this->getPointer(), $seekPosition, $whence); } + /** + * Reads a line, maximum 4096 Bytes from current file pointer + * + * @return $data Read data from file + */ + public function readLine () { + // Read whole line + return $this->read(); + } + /** * Reads given amount of bytes from file. * * @param $bytes Amount of bytes to read * @return $data Data read from file */ - public function read ($bytes) { + public function read ($bytes = NULL) { // Validate the pointer $this->validateFilePointer(); - // Try to read given characters - $data = fread($this->getPointer(), $bytes); - - // Was this successfull? - assert(is_string($data)); + // Is $bytes set? + if (is_int($bytes)) { + // Try to read given characters + $data = fread($this->getPointer(), $bytes); + } else { + // Try to read whole line + $data = fread($this->getPointer()); + } // Then return it return $data; diff --git a/inc/classes/main/file_directories/text/input/class_BaseInputTextFile.php b/inc/classes/main/file_directories/text/input/class_BaseInputTextFile.php index d5d2d0cb..cb1540ea 100644 --- a/inc/classes/main/file_directories/text/input/class_BaseInputTextFile.php +++ b/inc/classes/main/file_directories/text/input/class_BaseInputTextFile.php @@ -46,6 +46,14 @@ class BaseInputTextFile extends BaseTextFile { // ... and set it here $this->setPointerInstance($pointerInstance); } + + /** + * Reads a line from currently referenced file + * + * @return $data Read data from referenced file + */ + public function readLine () { + } } // [EOF] diff --git a/inc/classes/main/file_directories/text/input/csv/class_CsvInputFile.php b/inc/classes/main/file_directories/text/input/csv/class_CsvInputFile.php index b6c8d5e6..0c4c5a25 100644 --- a/inc/classes/main/file_directories/text/input/csv/class_CsvInputFile.php +++ b/inc/classes/main/file_directories/text/input/csv/class_CsvInputFile.php @@ -51,6 +51,121 @@ class CsvInputFile extends BaseInputTextFile implements CsvInputStreamer { // Return the prepared instance return $fileInstance; } + + /** + * Reads a line from CSV file and returns it as an indexed array. Please + * note that strings *must* be always in double-quotes, else any found + * column separators will be parsed or they may be interpreted incorrectly. + * + * @param $columnSeparator Character to use separting columns + * @return $lineArray An indexed array with the read line + */ + public function readCsvFileLine ($columnSeparator) { + // Debug message + //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput(sprintf('[%s:%d:] columnSeparator=%s - CALLED!', __METHOD__, __LINE__, $columnSeparator)); + + // Read raw line + $data = $this->getPointerInstance()->readLine(); + + // Debug message + //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput(sprintf('[%s:%d:] data()=%s', __METHOD__, __LINE__, strlen($data))); + + // Parse data + $lineArray = $this->parseDataToIndexedArray($data, $columnSeparator); + + // Debug message + //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput(sprintf('[%s:%d:] lineArray()=%s - EXIT!', __METHOD__, __LINE__, count($lineArray))); + + // Return it + return $lineArray; + } + + /** + * Parses given data into an array + * + * @param $data Raw data e.g. returned from readLine() + * @param $columnSeparator Character to use separting columns + * @return $lineArray An indexed array with the read line + */ + private function parseDataToIndexedArray ($data, $columnSeparator) { + // Debug message + //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput(sprintf('[%s:%d:] data()=%s,columnSeparator=%s - CALLED!', __METHOD__, __LINE__, strlen($data), $columnSeparator)); + + // Init return array + $lineArray = array(); + + // Whether the parser reads a quoted string (which may contain the column separator again) + $isInQuotes = FALSE; + + // Init column data + $column = ''; + + // Now parse the line + for ($idx = 0; $idx < strlen($data); $idx++) { + // "Cache" char + $char = substr($data, $idx, 1); + + // Debug message + //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput(sprintf('[%s:%d:] idx=%s,char=%s ...', __METHOD__, __LINE__, $idx, $char)); + + // Is the column separator found and not within quotes? + if (($isInQuotes === FALSE) && ($char == $columnSeparator)) { + // Debug message + //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput(sprintf('[%s:%d:] Adding column=%s ...', __METHOD__, __LINE__, $column)); + + // Add this line to the array + array_push($lineArray, $column); + + // Debug message + //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput(sprintf('[%s:%d:] lineArray()=%s - After add!', __METHOD__, __LINE__, count($lineArray))); + + // Clear variable ... + $column = ''; + + // ... and skip it + continue; + } elseif ($char == chr(34)) { + // Debug message + //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput(sprintf('[%s:%d:] column=%s ...', __METHOD__, __LINE__, $column)); + + // $column must be empty at this point if we are at starting quote + assert(($isInQuotes === TRUE) || (empty($column))); + + // Double-quote found, so flip variable + $isInQuotes = (!$isInQuotes); + + // Debug message + //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput(sprintf('[%s:%d:] isInQuotes=%d ...', __METHOD__, __LINE__, intval($isInQuotes))); + + // Skip double-quote (escaping of them is not yet supported) + continue; + } + + // Debug message + //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput(sprintf('[%s:%d:] Adding char=%s ...', __METHOD__, __LINE__, $idx, $char)); + + // Add char to column + $column .= $char; + } // END - for + + // Is there something outstanding? + if (!empty($column)) { + // Debug message + //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput(sprintf('[%s:%d:] Adding column=%s ...', __METHOD__, __LINE__, $column)); + + // Then don't forget this. :-) + array_push($lineArray, $column); + + // Debug message + //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput(sprintf('[%s:%d:] lineArray()=%s - After add!', __METHOD__, __LINE__, count($lineArray))); + } // END - if + + // Debug message + //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput(sprintf('[%s:%d:] lineArray()=%s - EXIT!', __METHOD__, __LINE__, count($lineArray))); + + // Return it + return $lineArray; + } } // [EOF] diff --git a/inc/classes/main/iterator/file/class_FileIterator.php b/inc/classes/main/iterator/file/class_FileIterator.php index fe4a57a7..b958b52b 100644 --- a/inc/classes/main/iterator/file/class_FileIterator.php +++ b/inc/classes/main/iterator/file/class_FileIterator.php @@ -131,7 +131,7 @@ class FileIterator extends BaseIterator implements SeekableWritableFileIterator * @param $bytes Amount of bytes to read * @return $data Data read from file */ - public function read ($bytes) { + public function read ($bytes = NULL) { // Call block instance return $this->getBlockInstance()->read($bytes); } -- 2.30.2