} // END - if
// Read next entry
- /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput('CRAWLER-SOURCE [' . __METHOD__ . ':' . __LINE__ . ']: this->csvFileImported=' . print_r($this->csvFileImported, TRUE));
+ //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput('CRAWLER-SOURCE [' . __METHOD__ . ':' . __LINE__ . ']: this->csvFileImported=' . print_r($this->csvFileImported, TRUE));
$directoryEntry = $this->getDirectoryInstance()->readDirectoryExcept(array_merge(array('.htaccess', '.', '..'), $this->csvFileImported));
// The read entry has not to be empty and extension must be '.csv'
} // END - if
// Initialize CSV file instance
- $this->lastCsvFileInstance = ObjectFactory::createObjectByConfiguredName('csv_file_class', array($this->csvFilePath . '/' . $directoryEntry));
+ $this->lastCsvFileInstance = ObjectFactory::createObjectByConfiguredName('csv_input_file_class', array($this->csvFilePath . '/' . $directoryEntry));
+
+ // Debug message
+ //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput('CRAWLER-SOURCE [' . __METHOD__ . ':' . __LINE__ . '] directoryEntry(' . strlen($directoryEntry) . ')=' . $directoryEntry . ' - Instance created - EXIT!');
// Found an entry
- //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput('CRAWLER-SOURCE [' . __METHOD__ . ':' . __LINE__ . ']: EXIT!');
return TRUE;
}
+ /**
+ * Checks whether a CSV file has been loaded (added to the stack)
+ *
+ * @return $isLoaded Whether a CSV file has been loaded
+ */
+ private function isCsvFileAdded () {
+ // Check whether the stacker is not empty
+ $isLoaded = (($this->getStackSourceInstance()->isStackInitialized(self::STACK_NAME_CSV_FILE)) && (!$this->getStackSourceInstance()->isStackEmpty(self::STACK_NAME_CSV_FILE)));
+
+ // Return the result
+ return $isLoaded;
+ }
+
/**
* Initializes the import of the CSV file which is being processed by other task
*
* @return void
* @throws NullPointerException If lastCsvFileInstance is not set
*/
- private function importCsvFile () {
+ private function addCsvFile () {
//* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput('CRAWLER-SOURCE [' . __METHOD__ . ':' . __LINE__ . ']: CALLED!');
// Is the instance set?
//* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput('CRAWLER-SOURCE [' . __METHOD__ . ':' . __LINE__ . ']: EXIT!');
}
+ /**
+ * Parses the next stacked CSV by reading only one line from it. Then the
+ * read line is being validated and if found good being feed to the next
+ * stack. The file is removed from stack only if it has been fully parsed.
+ */
+ private function parseCsvEntry () {
+ // Debug message
+ /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput('CRAWLER-SOURCE [' . __METHOD__ . ':' . __LINE__ . ']: CALLED!');
+
+ // Debug message
+ /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput('CRAWLER-SOURCE [' . __METHOD__ . ':' . __LINE__ . ']: EXIT!');
+ }
+
+
/**
* Getter for stackSourceInstance variable
*
* Processes entries in the stack.
*
* @return void
- * @todo ~10% done
+ * @todo ~20% done
*/
public function processStack () {
// Does the stack have some entries left?
- if (!$this->isUrlStackEmpty()) {
- // Handle next entry
- $this->processNextEntry();
+ if ($this->isCsvFileAdded()) {
+ /*
+ * A CSV file has been found and "imported" (added to stack). Now
+ * the file can be read line by line and checked every one of it.
+ */
+ $this->parseCsvEntry();
} elseif ($this->isCsvFileFound()) {
- // A CSV file has been found and can maybe be imported.
- $this->importCsvFile();
+ /*
+ * A file containing an URL list is found. Please note the format is
+ * CSV-like as you may wish to provide meta data such as crawl
+ * depth, handling of 3rd-party URLs and such.
+ */
+ $this->addCsvFile();
+ } elseif (!$this->isUrlStackEmpty()) {
+ /*
+ * Handle next entry. This method will be called very often, so need
+ * to process more than one entry at a time.
+ */
+ $this->processNextEntry();
}
$this->partialStub('Please implement this method.');