* @version 0.0.0 * @copyright Copyright (c) 2014 Crawler Developer Team * @license GNU GPL 3.0 or any newer version * @link http://www.ship-simu.org * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ class CrawlerUploadedListUrlSource extends BaseUrlSource implements UrlSource, Registerable { /** * "Cached" CSV path */ private $csvFilePath = ''; /** * Last CSV file instance */ private $lastCsvFileInstance = NULL; /** * Stack for pushing data from this clas to another */ private $stackSourceInstance = NULL; /** * Stack name for a CSV file */ const STACK_NAME_CSV_FILE = 'csv_file'; /** * "Imported" CSV files */ private $csvFileImported = array(); /** * Protected constructor * * @return void */ protected function __construct () { // Call parent constructor parent::__construct(__CLASS__); // "Cache" CSV path for faster usage $this->csvFilePath = $this->getConfigInstance()->getConfigEntry('base_path') . '/' . $this->getConfigInstance()->getConfigEntry('crawler_csv_file_path'); // Initialize directory instance $directoryInstance = ObjectFactory::createObjectByConfiguredName('directory_class', array($this->csvFilePath)); // Set it here $this->setDirectoryInstance($directoryInstance); // Init stack instance $this->stackSourceInstance = ObjectFactory::createObjectByConfiguredName('crawler_uploaded_list_url_source_stack_class'); // Init stack $this->getStackSourceInstance()->initStack(self::STACK_NAME_CSV_FILE); } /** * Checks whether a CSV file is found in configured path * * @return $isFound Whether a CSV file is found */ private function isCsvFileFound () { //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput('CRAWLER-SOURCE [' . __METHOD__ . ':' . __LINE__ . ']: CALLED!'); // Is it valid? if (!$this->getDirectoryInstance()->getDirectoryIteratorInstance()->valid()) { // Rewind to start $this->getDirectoryInstance()->getDirectoryIteratorInstance()->rewind(); } // END - if // Read next entry /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput('CRAWLER-SOURCE [' . __METHOD__ . ':' . __LINE__ . ']: this->csvFileImported=' . print_r($this->csvFileImported, TRUE)); $directoryEntry = $this->getDirectoryInstance()->readDirectoryExcept(array_merge(array('.htaccess', '.', '..'), $this->csvFileImported)); // Debug message /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput('SOURCE[' . __METHOD__ . ':' . __LINE__ . '] directoryEntry(' . strlen($directoryEntry) . ')=' . $directoryEntry); // Is it empty or wrong file extension? if ((empty($directoryEntry)) || (substr($directoryEntry, -4, 4) != '.csv')) { // Skip further processing /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput('SOURCE[' . __METHOD__ . ':' . __LINE__ . '] directoryEntry(' . strlen($directoryEntry) . ')=' . $directoryEntry . ' - SKIPPED!'); return FALSE; } // END - if // Initialize CSV file instance $this->lastCsvFileInstance = ObjectFactory::createObjectByConfiguredName('csv_file_class', array($this->csvFilePath . '/' . $directoryEntry)); // Found an entry //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput('CRAWLER-SOURCE [' . __METHOD__ . ':' . __LINE__ . ']: EXIT!'); return TRUE; } /** * Creates an instance of this class * * @return $sourceInstance An instance of a Source class */ public final static function createCrawlerUploadedListUrlSource () { // Get new instance $sourceInstance = new CrawlerUploadedListUrlSource(); // Init source $sourceInstance->initSource('crawler', 'uploaded_list'); // Return the prepared instance return $sourceInstance; } /** * Initializes the import of the CSV file which is being processed by other task * * @return void * @throws NullPointerException If lastCsvFileInstance is not set */ private function importCsvFile () { //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput('CRAWLER-SOURCE [' . __METHOD__ . ':' . __LINE__ . ']: CALLED!'); // Is the instance set? if (is_null($this->lastCsvFileInstance)) { // This should not happen throw new NullPointerException($this, self::EXCEPTION_IS_NULL_POINTER); } // END - if // Stack this file $this->getStackSourceInstance()->pushNamed(self::STACK_NAME_CSV_FILE, $this->lastCsvFileInstance); // ... and mark it as "imported" array_push($this->csvFileImported, basename($this->lastCsvFileInstance->getFileName())); // ... and finally NULL it (to save some RAM) $this->lastCsvFileInstance = NULL; //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput('CRAWLER-SOURCE [' . __METHOD__ . ':' . __LINE__ . ']: EXIT!'); } /** * Getter for stackSourceInstance variable * * @return $stackSourceInstance An instance of an additional stack */ public final function getStackSourceInstance () { return $this->stackSourceInstance; } /** * Processes entries in the stack. * * @return void * @todo ~10% done */ public function processStack () { // Does the stack have some entries left? if (!$this->isUrlStackEmpty()) { /* * Handle next entry. This method will be called very often, so need * to process more than one entry at a time. */ $this->processNextEntry(); } elseif ($this->isCsvFileFound()) { /* * A file containing an URL list is found. Please note the format is * CSV-like as you may wish to provide meta data such as crawl * depth, handling of 3rd-party URLs and such. */ $this->importCsvFile(); } $this->partialStub('Please implement this method.'); } } // [EOF] ?>