*/
const STACK_NAME_CSV_ENTRY = 'csv_entry';
+ /**
+ * Size of crawl (CSV) entry which is an indexed array:
+ *
+ * 0 = URL to crawl
+ * 1 = Crawl depth of URL
+ * 2 = Crawl depth of linked URLs (same other host only)
+ */
+ const CRAWL_ENTRY_SIZE = 3;
+
/**
* "Imported" CSV files
*/
return $sourceInstance;
}
+ /**
+ * Enriches and saves the given CSV entry (array) in the assigned
+ * file-based stack. To such entry a lot more informations are added, such
+ * as which files shall be crawled and many more.
+ *
+ * @param $csvData Array with data from a CSV file
+ * @return void
+ */
+ private function saveCsvDataInCrawlerQueue (array $csvData) {
+ // Debug message
+ /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput('CRAWLER-SOURCE [' . __METHOD__ . ':' . __LINE__ . ']: csvData()=' . count($csvData) . ' - CALLED!');
+
+ // Debug message
+ /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput('CRAWLER-SOURCE [' . __METHOD__ . ':' . __LINE__ . ']: EXIT!');
+ }
+
/**
* Checks whether a CSV file has been loaded (added to the stack)
*
} // END - if
// ... with 3 elements, later enhancements may accept more
- assert(count($csvData) == 3);
+ assert(count($csvData) == self::CRAWL_ENTRY_SIZE);
/*
* Push the file back on stack as it may contain more entries. This way
// Debug message
/* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput('CRAWLER-SOURCE [' . __METHOD__ . ':' . __LINE__ . ']: CALLED!');
+ // Pop it from stack
+ $csvData = $this->getStackSourceInstance()->popNamed(self::STACK_NAME_CSV_ENTRY);
+
+ // Debug message
+ /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput('CRAWLER-SOURCE [' . __METHOD__ . ':' . __LINE__ . ']: csvData[' . gettype($csvData) . ']=' . print_r($csvData, TRUE));
+
+ // It must have 3 elements (see method parseCsvFile() for details)
+ assert(count($csvData) == self::CRAWL_ENTRY_SIZE);
+
+ // Save it in crawler queue (which will enrich it with way more informations
+ $this->saveCsvDataInCrawlerQueue($csvData);
+
// Debug message
/* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput('CRAWLER-SOURCE [' . __METHOD__ . ':' . __LINE__ . ']: EXIT!');
}