*/
protected function initSource ($prefix, $sourceName) {
// Trace message
- /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__, __LINE__)->debugOutput('CRAWLER-SOURCE: prefix=' . $prefix . ',sourceName=' . $sourceName . ' - CALLED!');
+ /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__, __LINE__)->debugOutput('BASE-URL-SOURCE: prefix=' . $prefix . ',sourceName=' . $sourceName . ' - CALLED!');
// Use another object factory
$stackInstance = FileStackFactory::createFileStackInstance($prefix . '_url', $sourceName);
$this->setStackInstance($stackInstance);
// Trace message
- /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__, __LINE__)->debugOutput('CRAWLER-SOURCE: EXIT!');
+ /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__, __LINE__)->debugOutput('BASE-URL-SOURCE: EXIT!');
}
/**
*/
protected function enrichCrawlerQueueData (array &$crawlData) {
// Debug message
- /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__, __LINE__)->debugOutput('CRAWLER-SOURCE: crawlData()=' . count($crawlData) . ' - CALLED!');
+ /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__, __LINE__)->debugOutput('BASE-URL-SOURCE: crawlData()=' . count($crawlData) . ' - CALLED!');
// Check for minimum array elements
assert(isset($crawlData[self::CRAWL_JOB_ARRAY_START_URL]));
// @TODO Add more elements
// Debug message
- /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__, __LINE__)->debugOutput('CRAWLER-SOURCE: EXIT!');
+ /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__, __LINE__)->debugOutput('BASE-URL-SOURCE: EXIT!');
}
/**
*/
protected function enqueueInFileStack (array $crawlData) {
// Debug message
- /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__, __LINE__)->debugOutput('CRAWLER-SOURCE: crawlData()=' . count($crawlData) . ' - CALLED!');
+ /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__, __LINE__)->debugOutput('BASE-URL-SOURCE: crawlData()=' . count($crawlData) . ' - CALLED!');
// Get the stack instance and enqueue it
$this->getStackInstance()->pushNamed(self::STACKER_NAME_URLS, $crawlData);
// Debug message
- /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__, __LINE__)->debugOutput('CRAWLER-SOURCE: EXIT!');
+ /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__, __LINE__)->debugOutput('BASE-URL-SOURCE: EXIT!');
}
}
*/
protected function __construct () {
// Call parent constructor
- /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__, __LINE__)->debugOutput('CRAWLER-SOURCE: CONSTRUCTED!');
+ /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__, __LINE__)->debugOutput('CRAWLER-UPLOADED-LIST-URL-SOURCE: CONSTRUCTED!');
parent::__construct(__CLASS__);
// "Cache" CSV path for faster usage
$this->csvFilePath = $this->getConfigInstance()->getConfigEntry('root_base_path') . $this->getConfigInstance()->getConfigEntry('crawler_csv_file_path');
// Initialize directory instance
- /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__, __LINE__)->debugOutput(sprintf('CRAWLER-SOURCE: this->csvFilePath=%s', $this->csvFilePath));
+ /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__, __LINE__)->debugOutput(sprintf('CRAWLER-UPLOADED-LIST-URL-SOURCE: this->csvFilePath=%s', $this->csvFilePath));
$directoryInstance = ObjectFactory::createObjectByConfiguredName('directory_class', array($this->csvFilePath));
// Set it here
self::STACK_NAME_CSV_ENTRY,
] as $stackName) {
// Init single stack
- /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__, __LINE__)->debugOutput(sprintf('CRAWLER-SOURCE: stackName=%s', $stackName));
+ /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__, __LINE__)->debugOutput(sprintf('CRAWLER-UPLOADED-LIST-URL-SOURCE: stackName=%s', $stackName));
$this->getStackSourceInstance()->initStack($stackName);
}
$this->columnSeparator = $this->getConfigInstance()->getConfigEntry('crawler_url_list_column_separator');
// Trace message
- /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__, __LINE__)->debugOutput('CRAWLER-SOURCE: Finished contructing object.');
+ /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__, __LINE__)->debugOutput('CRAWLER-UPLOADED-LIST-URL-SOURCE: Finished contructing object.');
}
/**
*/
private function isCsvFileFound () {
// Is it valid?
- /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__, __LINE__)->debugOutput('CRAWLER-SOURCE: CALLED!');
+ /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__, __LINE__)->debugOutput('CRAWLER-UPLOADED-LIST-URL-SOURCE: CALLED!');
if (!$this->getDirectoryInstance()->getDirectoryIteratorInstance()->valid()) {
// Debug message
- /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__, __LINE__)->debugOutput('CRAWLER-SOURCE: Rewinding iterator ...');
+ /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__, __LINE__)->debugOutput('CRAWLER-UPLOADED-LIST-URL-SOURCE: Rewinding iterator ...');
// Rewind to start
$this->getDirectoryInstance()->getDirectoryIteratorInstance()->rewind();
} // END - if
// Read next entry
- /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__, __LINE__)->debugOutput('CRAWLER-SOURCE: this->csvFileImported=' . print_r($this->csvFileImported, TRUE));
+ /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__, __LINE__)->debugOutput('CRAWLER-UPLOADED-LIST-URL-SOURCE: this->csvFileImported=' . print_r($this->csvFileImported, TRUE));
$directoryEntry = $this->getDirectoryInstance()->readDirectoryExcept(array_merge(array('.htaccess', '.', '..'), $this->csvFileImported));
// Is it empty or wrong file extension?
- /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__, __LINE__)->debugOutput('CRAWLER-SOURCE: directoryEntry(' . strlen($directoryEntry) . ')=' . $directoryEntry);
+ /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__, __LINE__)->debugOutput('CRAWLER-UPLOADED-LIST-URL-SOURCE: directoryEntry(' . strlen($directoryEntry) . ')=' . $directoryEntry);
if ((empty($directoryEntry)) || (substr($directoryEntry, -4, 4) != '.csv')) {
// Skip further processing
- /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__, __LINE__)->debugOutput('CRAWLER-SOURCE: directoryEntry(' . strlen($directoryEntry) . ')=' . $directoryEntry . ' - SKIPPED!');
+ /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__, __LINE__)->debugOutput('CRAWLER-UPLOADED-LIST-URL-SOURCE: directoryEntry(' . strlen($directoryEntry) . ')=' . $directoryEntry . ' - SKIPPED!');
return FALSE;
} // END - if
$this->lastCsvFileInstance = ObjectFactory::createObjectByConfiguredName('csv_input_file_class', array($infoInstance));
// Found an entry
- /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__, __LINE__)->debugOutput('CRAWLER-SOURCE: directoryEntry(' . strlen($directoryEntry) . ')=' . $directoryEntry . ' - Instance created - EXIT!');
+ /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__, __LINE__)->debugOutput('CRAWLER-UPLOADED-LIST-URL-SOURCE: directoryEntry(' . strlen($directoryEntry) . ')=' . $directoryEntry . ' - Instance created - EXIT!');
return TRUE;
}
*/
private function saveCsvDataInCrawlerQueue (array $csvData) {
// Debug message
- /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__, __LINE__)->debugOutput('CRAWLER-SOURCE: csvData()=' . count($csvData) . ' - CALLED!');
+ /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__, __LINE__)->debugOutput('CRAWLER-UPLOADED-LIST-URL-SOURCE: csvData()=' . count($csvData) . ' - CALLED!');
// The array must have a fixed amount of elements, later enhancements may accept more
assert(count($csvData) == self::CRAWL_ENTRY_SIZE);
self::CRAWL_JOB_ARRAY_EXTERNAL_DEPTH => $csvData[2]
);
- // Debug message
- /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__, __LINE__)->debugOutput('CRAWLER-SOURCE: csvArray()=' . count($csvArray) . ' - BEFORE!');
-
// Then add more data to it
+ /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__, __LINE__)->debugOutput('CRAWLER-UPLOADED-LIST-URL-SOURCE: csvArray()=' . count($csvArray) . ' - BEFORE!');
$this->enrichCrawlerQueueData($csvArray);
- // Debug message
- /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__, __LINE__)->debugOutput('CRAWLER-SOURCE: csvArray()=' . count($csvArray) . ' - AFTER!');
-
/*
* Then enqueue it in the file stack. The local crawler "task" will
* then pick this up.
*/
+ /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__, __LINE__)->debugOutput('CRAWLER-UPLOADED-LIST-URL-SOURCE: csvArray()=' . count($csvArray) . ' - AFTER!');
$this->enqueueInFileStack($csvArray);
// Debug message
- /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__, __LINE__)->debugOutput('CRAWLER-SOURCE: EXIT!');
+ /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__, __LINE__)->debugOutput('CRAWLER-UPLOADED-LIST-URL-SOURCE: EXIT!');
}
/**
*/
private function addCsvFile () {
// Trace message
- /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__, __LINE__)->debugOutput('CRAWLER-SOURCE: CALLED!');
+ /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__, __LINE__)->debugOutput('CRAWLER-UPLOADED-LIST-URL-SOURCE: CALLED!');
// Is the instance set?
if (is_null($this->lastCsvFileInstance)) {
}
// Debug message
- /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__, __LINE__)->debugOutput('CRAWLER-SOURCE: this->lastCsvFileInstance=' . $this->lastCsvFileInstance);
+ /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__, __LINE__)->debugOutput('CRAWLER-UPLOADED-LIST-URL-SOURCE: this->lastCsvFileInstance=' . $this->lastCsvFileInstance);
// Stack this file
$this->getStackSourceInstance()->pushNamed(self::STACK_NAME_CSV_FILE, $this->lastCsvFileInstance);
$this->lastCsvFileInstance = NULL;
// Trace message
- /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__, __LINE__)->debugOutput('CRAWLER-SOURCE: EXIT!');
+ /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__, __LINE__)->debugOutput('CRAWLER-UPLOADED-LIST-URL-SOURCE: EXIT!');
}
/**
*/
private function parseCsvFile () {
// Debug message
- /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__, __LINE__)->debugOutput('CRAWLER-SOURCE: CALLED!');
+ /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__, __LINE__)->debugOutput('CRAWLER-UPLOADED-LIST-URL-SOURCE: CALLED!');
// Get next entry
$csvFileInstance = $this->getStackSourceInstance()->popNamed(self::STACK_NAME_CSV_FILE);
$csvData = $csvFileInstance->readCsvFileLine($this->columnSeparator);
// Debug message
- /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__, __LINE__)->debugOutput('CRAWLER-SOURCE: csvData[' . gettype($csvData) . ']=' . print_r($csvData, TRUE));
+ /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__, __LINE__)->debugOutput('CRAWLER-UPLOADED-LIST-URL-SOURCE: csvData[' . gettype($csvData) . ']=' . print_r($csvData, TRUE));
// Expect always an array
assert(is_array($csvData));
// Is the array empty?
if (count($csvData) == 0) {
// Debug message
- /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__, __LINE__)->debugOutput('CRAWLER-SOURCE: File ' . $csvFileInstance->getFilename() . ' has been fully read.');
+ /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__, __LINE__)->debugOutput('CRAWLER-UPLOADED-LIST-URL-SOURCE: File ' . $csvFileInstance->getFilename() . ' has been fully read.');
// Try to close it by actually unsetting (destructing) it
unset($csvFileInstance);
$this->getStackSourceInstance()->pushNamed(self::STACK_NAME_CSV_ENTRY, $csvData);
// Debug message
- /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__, __LINE__)->debugOutput('CRAWLER-SOURCE: EXIT!');
+ /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__, __LINE__)->debugOutput('CRAWLER-UPLOADED-LIST-URL-SOURCE: EXIT!');
}
/**
*/
private function parseCsvEntry () {
// Debug message
- /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__, __LINE__)->debugOutput('CRAWLER-SOURCE: CALLED!');
+ /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__, __LINE__)->debugOutput('CRAWLER-UPLOADED-LIST-URL-SOURCE: CALLED!');
// Pop it from stack
$csvData = $this->getStackSourceInstance()->popNamed(self::STACK_NAME_CSV_ENTRY);
// Debug message
- /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__, __LINE__)->debugOutput('CRAWLER-SOURCE: csvData[' . gettype($csvData) . ']=' . print_r($csvData, TRUE));
+ /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__, __LINE__)->debugOutput('CRAWLER-UPLOADED-LIST-URL-SOURCE: csvData[' . gettype($csvData) . ']=' . print_r($csvData, TRUE));
// It must have a fixed amount of elements (see method parseCsvFile() for details)
assert(count($csvData) == self::CRAWL_ENTRY_SIZE);
$this->saveCsvDataInCrawlerQueue($csvData);
// Debug message
- /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__, __LINE__)->debugOutput('CRAWLER-SOURCE: EXIT!');
+ /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__, __LINE__)->debugOutput('CRAWLER-UPLOADED-LIST-URL-SOURCE: EXIT!');
}
/**