}
/**
- * Creates an instance of this class
- *
- * @return $sourceInstance An instance of a Source class
- */
- public final static function createCrawlerUploadedListUrlSource () {
- // Get new instance
- $sourceInstance = new CrawlerUploadedListUrlSource();
-
- // Init source
- $sourceInstance->initSource('crawler', 'uploaded_list');
-
- // Return the prepared instance
- return $sourceInstance;
- }
-
- /**
- * Checks whether a CSV file is found
+ * Checks whether a CSV file is found in configured path
*
* @return $isFound Whether a CSV file is found
*/
private function isCsvFileFound () {
//* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput('CRAWLER-SOURCE [' . __METHOD__ . ':' . __LINE__ . ']: CALLED!');
- // Is the instance valid?
+ // Is it valid?
if (!$this->getDirectoryInstance()->getDirectoryIteratorInstance()->valid()) {
- // Then rewind it
+ // Rewind to start
$this->getDirectoryInstance()->getDirectoryIteratorInstance()->rewind();
} // END - if
/* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput('CRAWLER-SOURCE [' . __METHOD__ . ':' . __LINE__ . ']: this->csvFileImported=' . print_r($this->csvFileImported, TRUE));
$directoryEntry = $this->getDirectoryInstance()->readDirectoryExcept(array_merge(array('.htaccess', '.', '..'), $this->csvFileImported));
- // The read entry has not to be empty and extension must be '.csv'
+ // Debug message
+ /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput('SOURCE[' . __METHOD__ . ':' . __LINE__ . '] directoryEntry(' . strlen($directoryEntry) . ')=' . $directoryEntry);
+
+ // Is it empty or wrong file extension?
if ((empty($directoryEntry)) || (substr($directoryEntry, -4, 4) != '.csv')) {
// Skip further processing
+ /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput('SOURCE[' . __METHOD__ . ':' . __LINE__ . '] directoryEntry(' . strlen($directoryEntry) . ')=' . $directoryEntry . ' - SKIPPED!');
return FALSE;
} // END - if
return TRUE;
}
+ /**
+ * Creates an instance of this class
+ *
+ * @return $sourceInstance An instance of a Source class
+ */
+ public final static function createCrawlerUploadedListUrlSource () {
+ // Get new instance
+ $sourceInstance = new CrawlerUploadedListUrlSource();
+
+ // Init source
+ $sourceInstance->initSource('crawler', 'uploaded_list');
+
+ // Return the prepared instance
+ return $sourceInstance;
+ }
+
/**
* Initializes the import of the CSV file which is being processed by other task
*
public function processStack () {
// Does the stack have some entries left?
if (!$this->isUrlStackEmpty()) {
- // Handle next entry
+ /*
+ * Handle next entry. This method will be called very often, so need
+ * to process more than one entry at a time.
+ */
$this->processNextEntry();
} elseif ($this->isCsvFileFound()) {
- // A CSV file has been found and can maybe be imported.
+ /*
+ * A file containing an URL list is found. Please note the format is
+ * CSV-like as you may wish to provide meta data such as crawl
+ * depth, handling of 3rd-party URLs and such.
+ */
$this->importCsvFile();
}