// CFG: CRAWLER-URL-FOUND-RSS-FILE-STACK-INDEX-CLASS
$cfg->setConfigEntry('crawler_url_found_rss_file_stack_index_class', 'FileStackIndex');
+// CFG: CRAWLER-URL-UPLOADED-LIST-URL-SOURCE-STACK-CLASS
+$cfg->setConfigEntry('crawler_uploaded_list_url_source_stack_class', 'FiFoStacker');
+
+// CFG: STACKER-CSV-FILE-MAX-SIZE
+$cfg->setConfigEntry('stacker_csv_file_max_size', 10);
+
// CFG: TASK-CRAWLER-NODE-COMMUNICATOR-STARTUP-DELAY
$cfg->setConfigEntry('task_crawler_node_communicator_startup_delay', 500);
// CFG: TASK-CRAWLER-URL-SOURCE-FOUND-RSS-MAX-RUNS
$cfg->setConfigEntry('task_crawler_uploaded_list_scanner_max_runs', 0);
+// CFG: CSV-FILE-PATH
+$cfg->setConfigEntry('csv_file_path', 'data/url_lists');
+
///////////////////////////////////////////////////////////////////////////////
// HTTP Configuration
///////////////////////////////////////////////////////////////////////////////
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
class CrawlerUploadedListUrlSource extends BaseUrlSource implements UrlSource, Registerable {
+ /**
+ * "Cached" CSV path
+ */
+ private $csvFilePath = '';
+
+ /**
+ * Last CSV file instance
+ */
+ private $lastCsvFileInstance = NULL;
+
+ /**
+ * Stack for pushing data from this clas to another
+ */
+ private $stackSourceInstance = NULL;
+
+ /**
+ * Stack name for a CSV file
+ */
+ const STACK_NAME_CSV_FILE = 'csv_file';
+
+ /**
+ * "Imported" CSV files
+ */
+ private $csvFileImported = array();
+
/**
* Protected constructor
*
protected function __construct () {
// Call parent constructor
parent::__construct(__CLASS__);
+
+ // "Cache" CSV path for faster usage
+ $this->csvFilePath = $this->getConfigInstance()->getConfigEntry('base_path') . '/' . $this->getConfigInstance()->getConfigEntry('csv_file_path');
+
+ // Initialize directory instance
+ $directoryInstance = ObjectFactory::createObjectByConfiguredName('directory_class', array($this->csvFilePath));
+
+ // Set it here
+ $this->setDirectoryInstance($directoryInstance);
+
+ // Init stack instance
+ $this->stackSourceInstance = ObjectFactory::createObjectByConfiguredName('crawler_uploaded_list_url_source_stack_class');
+
+ // Init stack
+ $this->getStackSourceInstance()->initStack(self::STACK_NAME_CSV_FILE);
}
/**
// Init source
$sourceInstance->initSource('crawler', 'uploaded_list');
- // Get a
// Return the prepared instance
return $sourceInstance;
}
+ /**
+ * Checks whether a CSV file is found
+ *
+ * @return $isFound Whether a CSV file is found
+ */
+ private function isCsvFileFound () {
+ // Is the instance valid?
+ if (!$this->getDirectoryInstance()->getDirectoryIteratorInstance()->valid()) {
+ // Then rewind it
+ $this->getDirectoryInstance()->getDirectoryIteratorInstance()->rewind();
+ } // END - if
+
+ // Read next entry
+ $directoryEntry = $this->getDirectoryInstance()->readDirectoryExcept(array_merge(array('.htaccess', '.', '..'), $this->csvFileImported));
+
+ // The read entry has not to be empty and extension must be '.csv'
+ if ((empty($directoryEntry)) || (substr($directoryEntry, -4, 4) != '.csv')) {
+ // Skip further processing
+ return FALSE;
+ } // END - if
+
+ // Initialize CSV file instance
+ $this->lastCsvFileInstance = ObjectFactory::createObjectByConfiguredName('csv_file_class', array($this->csvFilePath . '/' . $directoryEntry));
+
+ // Found an entry
+ return TRUE;
+ }
+
+ /**
+ * Initializes the import of the CSV file which is being processed by other task
+ *
+ * @return void
+ * @throws NullPointerException If lastCsvFileInstance is not set
+ */
+ private function importCsvFile () {
+ //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput('CRAWLER-SOURCE [' . __METHOD__ . ':' . __LINE__ . ']: CALLED!');
+
+ // Is the instance set?
+ if (is_null($this->lastCsvFileInstance)) {
+ // This should not happen
+ throw new NullPointerException($this, self::EXCEPTION_IS_NULL_POINTER);
+ } // END - if
+
+ // Stack this file
+ $this->getStackSourceInstance()->pushNamed(self::STACK_NAME_CSV_FILE, $this->lastCsvFileInstance);
+
+ // ... and mark it as "imported"
+ array_push($this->csvFileImported, basename($this->lastCsvFileInstance->getFileName()));
+
+ // ... and finally NULL it (to save some RAM)
+ $this->lastCsvFileInstance = NULL;
+
+ //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput('CRAWLER-SOURCE [' . __METHOD__ . ':' . __LINE__ . ']: EXIT!');
+ }
+
+ /**
+ * Getter for stackSourceInstance variable
+ *
+ * @return $stackSourceInstance An instance of an additional stack
+ */
+ public final function stackSourceInstance () {
+ return $this->stackSourceInstance;
+ }
+
/**
* Processes entries in the stack.
*
public function processStack () {
// Does the stack have some entries left?
if (!$this->isUrlStackEmpty()) {
- // Nothing to handle here
+ // Handle next entry
$this->processNextEntry();
- } elseif ($this->
+ } elseif ($this->isCsvFileFound()) {
+ // A CSV file has been found and can maybe be imported.
+ $this->importCsvFile();
+ }
$this->partialStub('Please implement this method.');
}