* @version 0.0.0 * @copyright Copyright (c) 2014 Crawler Developer Team * @license GNU GPL 3.0 or any newer version * @link http://www.ship-simu.org * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ class CrawlerUploadedListUrlSource extends BaseUrlSource implements UrlSource, Registerable { /** * Cached path of CSV files */ private $csvFilesPath = ''; /** * Last found CSV file */ private $lastCsvFile = ''; /** * Protected constructor * * @return void */ protected function __construct () { // Call parent constructor parent::__construct(__CLASS__); // Set CSV files path $this->csvFilesPath = $this->getConfigInstance()->getConfigEntry('base_path') . '/' . $this->getConfigInstance()->getConfigEntry('crawler_csv_file_path'); // Get directory instance $directoryInstance = ObjectFactory::createObjectByConfiguredName('directory_class', array($this->csvFilesPath)); // Set it here $this->setDirectoryInstance($directoryInstance); } /** * Checks whether a CSV file is found in configured path * * @return $isFound Whether a CSV file is found */ private function isCsvFileFound () { // Is it valid? if (!$this->getDirectoryInstance()->getDirectoryIteratorInstance()->valid()) { // Rewind to start $this->getDirectoryInstance()->getDirectoryIteratorInstance()->rewind(); } // END - if // Read next entry $directoryEntry = $this->getDirectoryInstance()->readDirectoryExcept(array('.htaccess', '.', '..')); // Debug message /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput('SOURCE[' . __METHOD__ . ':' . __LINE__ . '] directoryEntry(' . strlen($directoryEntry) . ')=' . $directoryEntry); // Is it empty or wrong file extension? if ((empty($directoryEntry)) || (substr($directoryEntry, -4, 4) != '.csv')) { // Skip further processing /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput('SOURCE[' . __METHOD__ . ':' . __LINE__ . '] directoryEntry(' . strlen($directoryEntry) . ')=' . $directoryEntry . ' - SKIPPED!'); return FALSE; } // END - if // Initialize CSV instance $csvFileInstance = ObjectFactory::createObjectByConfiguredName('csv_file_class', array($this->csvFilesPath . '/' . $directoryEntry)); // Set it here $this->setCsvFileInstance($csvFileInstance); // Found a file return TRUE; } /** * Creates an instance of this class * * @return $sourceInstance An instance of a Source class */ public final static function createCrawlerUploadedListUrlSource () { // Get new instance $sourceInstance = new CrawlerUploadedListUrlSource(); // Init source $sourceInstance->initSource('crawler', 'uploaded_list'); // Get a ??? @TODO // Return the prepared instance return $sourceInstance; } /** * Processes entries in the stack. * * @return void * @todo ~10% done */ public function processStack () { // Does the stack have some entries left? if (!$this->isUrlStackEmpty()) { /* * Handle next entry. This method will be called very often, so need * to process more than one entry at a time. */ $this->processNextEntry(); } elseif ($this->isCsvFileFound()) { /* * A file containing an URL list is found. Please note the format is * CSV-like as you may wish to provide meta data such as crawl * depth, handling of 3rd-party URLs and such. */ $this->importCsvFile(); } $this->partialStub('Please implement this method.'); } } // [EOF] ?>