* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
class BaseUrlSource extends BaseSource {
+ // Stack name for all URLs
+ const STACKER_NAME_URLS = 'urls';
+
// Array elements for CSV data array
const CRAWL_JOB_ARRAY_START_URL = 'start_url';
const CRAWL_JOB_ARRAY_DEPTH = 'start_depth';
*/
public function isUrlStackEmpty () {
// Determine it
- $isEmpty = $this->getStackInstance()->isStackEmpty('urls');
+ $isEmpty = $this->getStackInstance()->isStackEmpty(self::STACKER_NAME_URLS);
// Return result
return $isEmpty;
*
* @param $crawlData Array with partial data for being queued
* @return void
+ * @todo ~10% done
*/
protected function enrichCrawlerQueueData (array &$crawlData) {
// Debug message
- /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput('CRAWLER-SOURCE [' . __METHOD__ . ':' . __LINE__ . ']: crawlData()=' . count($crawlData) . ' - CALLED!');
+ //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput('CRAWLER-SOURCE [' . __METHOD__ . ':' . __LINE__ . ']: crawlData()=' . count($crawlData) . ' - CALLED!');
+
+ // Check for minimum array elements
+ assert(isset($crawlData[self::CRAWL_JOB_ARRAY_START_URL]));
+ assert(isset($crawlData[self::CRAWL_JOB_ARRAY_DEPTH]));
+
+ // @TODO Add more elements
+
+ // Debug message
+ //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput('CRAWLER-SOURCE [' . __METHOD__ . ':' . __LINE__ . ']: EXIT!');
+ }
+
+ /**
+ * Enqueues given crawler array in assigned file-based stack
+ *
+ * @param $crawlData Array with partial data for being queued
+ * @return void
+ */
+ protected function enqueueInFileStack (array $crawlData) {
+ // Debug message
+ //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput('CRAWLER-SOURCE [' . __METHOD__ . ':' . __LINE__ . ']: crawlData()=' . count($crawlData) . ' - CALLED!');
+
+ // Get the stack instance and enqueue it
+ $this->getStackInstance()->pushNamed(self::STACKER_NAME_URLS, $crawlData);
// Debug message
- /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput('CRAWLER-SOURCE [' . __METHOD__ . ':' . __LINE__ . ']: EXIT!');
+ //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput('CRAWLER-SOURCE [' . __METHOD__ . ':' . __LINE__ . ']: EXIT!');
}
}
*/
private function saveCsvDataInCrawlerQueue (array $csvData) {
// Debug message
- /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput('CRAWLER-SOURCE [' . __METHOD__ . ':' . __LINE__ . ']: csvData()=' . count($csvData) . ' - CALLED!');
+ //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput('CRAWLER-SOURCE [' . __METHOD__ . ':' . __LINE__ . ']: csvData()=' . count($csvData) . ' - CALLED!');
// The array has 3 elements, later enhancements may accept more
assert(count($csvData) == self::CRAWL_ENTRY_SIZE);
self::CRAWL_JOB_ARRAY_EXTERNAL_DEPTH => $csvData[2]
);
+ // Debug message
+ //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput('CRAWLER-SOURCE [' . __METHOD__ . ':' . __LINE__ . ']: csvArray()=' . count($csvArray) . ' - BEFORE!');
+
// Then add more data to it
- $this->enrichCrawlerQueueData($csvData);
+ $this->enrichCrawlerQueueData($csvArray);
// Debug message
- /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput('CRAWLER-SOURCE [' . __METHOD__ . ':' . __LINE__ . ']: EXIT!');
+ //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput('CRAWLER-SOURCE [' . __METHOD__ . ':' . __LINE__ . ']: csvArray()=' . count($csvArray) . ' - AFTER!');
+
+ /*
+ * Then enqueue it in the file stack. The local crawler "task" will
+ * then pick this up.
+ */
+ $this->enqueueInFileStack($csvArray);
+
+ // Debug message
+ //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput('CRAWLER-SOURCE [' . __METHOD__ . ':' . __LINE__ . ']: EXIT!');
}
/**
*/
private function parseCsvEntry () {
// Debug message
- /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput('CRAWLER-SOURCE [' . __METHOD__ . ':' . __LINE__ . ']: CALLED!');
+ //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput('CRAWLER-SOURCE [' . __METHOD__ . ':' . __LINE__ . ']: CALLED!');
// Pop it from stack
$csvData = $this->getStackSourceInstance()->popNamed(self::STACK_NAME_CSV_ENTRY);
// Debug message
- /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput('CRAWLER-SOURCE [' . __METHOD__ . ':' . __LINE__ . ']: csvData[' . gettype($csvData) . ']=' . print_r($csvData, TRUE));
+ //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput('CRAWLER-SOURCE [' . __METHOD__ . ':' . __LINE__ . ']: csvData[' . gettype($csvData) . ']=' . print_r($csvData, TRUE));
// It must have 3 elements (see method parseCsvFile() for details)
assert(count($csvData) == self::CRAWL_ENTRY_SIZE);
$this->saveCsvDataInCrawlerQueue($csvData);
// Debug message
- /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput('CRAWLER-SOURCE [' . __METHOD__ . ':' . __LINE__ . ']: EXIT!');
+ //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput('CRAWLER-SOURCE [' . __METHOD__ . ':' . __LINE__ . ']: EXIT!');
}
/**
-Subproject commit 23d0a889351670874a4b120e1487edf89dc1b540
+Subproject commit aebdd613aed13516c96d08fa6f72b3a11f4d3b85