* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
class BaseUrlSource extends BaseSource {
+ // Stack name for all URLs
+ const STACKER_NAME_URLS = 'urls';
+
+ // Array elements for CSV data array
+ const CRAWL_JOB_ARRAY_START_URL = 'start_url';
+ const CRAWL_JOB_ARRAY_DEPTH = 'start_depth';
+ const CRAWL_JOB_ARRAY_EXTERNAL_DEPTH = 'external_depth';
+
/**
* Protected constructor
*
*
* @return $isEmpty Whether the stack 'urls' is empty.
*/
- protected function isUrlStackEmpty () {
+ public function isUrlStackEmpty () {
// Determine it
- $isEmpty = $this->getStackInstance()->isStackEmpty('urls');
+ $isEmpty = $this->getStackInstance()->isStackEmpty(self::STACKER_NAME_URLS);
// Return result
return $isEmpty;
}
+
+ /**
+ * Enriches the given associative array with more data, now at least 2
+ * elements are required:
+ *
+ * 'start_url' - Starting URL
+ * 'start_depth' - Crawl depth for starting URL
+ *
+ * @param $crawlData Array with partial data for being queued
+ * @return void
+ * @todo ~10% done
+ */
+ protected function enrichCrawlerQueueData (array &$crawlData) {
+ // Debug message
+ //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput('CRAWLER-SOURCE [' . __METHOD__ . ':' . __LINE__ . ']: crawlData()=' . count($crawlData) . ' - CALLED!');
+
+ // Check for minimum array elements
+ assert(isset($crawlData[self::CRAWL_JOB_ARRAY_START_URL]));
+ assert(isset($crawlData[self::CRAWL_JOB_ARRAY_DEPTH]));
+
+ // @TODO Add more elements
+
+ // Debug message
+ //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput('CRAWLER-SOURCE [' . __METHOD__ . ':' . __LINE__ . ']: EXIT!');
+ }
+
+ /**
+ * Enqueues given crawler array in assigned file-based stack
+ *
+ * @param $crawlData Array with partial data for being queued
+ * @return void
+ */
+ protected function enqueueInFileStack (array $crawlData) {
+ // Debug message
+ //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput('CRAWLER-SOURCE [' . __METHOD__ . ':' . __LINE__ . ']: crawlData()=' . count($crawlData) . ' - CALLED!');
+
+ // Get the stack instance and enqueue it
+ $this->getStackInstance()->pushNamed(self::STACKER_NAME_URLS, $crawlData);
+
+ // Debug message
+ //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput('CRAWLER-SOURCE [' . __METHOD__ . ':' . __LINE__ . ']: EXIT!');
+ }
}
// [EOF]