* @version 0.0.0 * @copyright Copyright (c) 2007, 2008 Roland Haeder, 2009 - 2015 Hub Developer Team * @license GNU GPL 3.0 or any newer version * @link http://www.shipsimu.org * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ class BaseUrlSource extends BaseSource { // Stack name for all URLs const STACKER_NAME_URLS = 'urls'; // Array elements for CSV data array const CRAWL_JOB_ARRAY_START_URL = 'start_url'; const CRAWL_JOB_ARRAY_DEPTH = 'start_depth'; const CRAWL_JOB_ARRAY_EXTERNAL_DEPTH = 'external_depth'; /** * Protected constructor * * @param $className Name of the class * @return void */ protected function __construct ($className) { // Call parent constructor parent::__construct($className); } /** * Initalizes this source * * @param $prefix Prefix for this source * @param $sourceName Name of this source * @return void */ protected function initSource ($prefix, $sourceName) { // Use another object factory $stackInstance = FileStackFactory::createFileStackInstance($prefix . '_url', $sourceName); // Set the stack here $this->setStackInstance($stackInstance); } /** * Determines whether the stack 'urls' is empty. * * @return $isEmpty Whether the stack 'urls' is empty. */ public function isUrlStackEmpty () { // Determine it $isEmpty = $this->getStackInstance()->isStackEmpty(self::STACKER_NAME_URLS); // Return result return $isEmpty; } /** * Enriches the given associative array with more data, now at least 2 * elements are required: * * 'start_url' - Starting URL * 'start_depth' - Crawl depth for starting URL * * @param $crawlData Array with partial data for being queued * @return void * @todo ~10% done */ protected function enrichCrawlerQueueData (array &$crawlData) { // Debug message //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput('CRAWLER-SOURCE [' . __METHOD__ . ':' . __LINE__ . ']: crawlData()=' . count($crawlData) . ' - CALLED!'); // Check for minimum array elements assert(isset($crawlData[self::CRAWL_JOB_ARRAY_START_URL])); assert(isset($crawlData[self::CRAWL_JOB_ARRAY_DEPTH])); // @TODO Add more elements // Debug message //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput('CRAWLER-SOURCE [' . __METHOD__ . ':' . __LINE__ . ']: EXIT!'); } /** * Enqueues given crawler array in assigned file-based stack * * @param $crawlData Array with partial data for being queued * @return void */ protected function enqueueInFileStack (array $crawlData) { // Debug message //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput('CRAWLER-SOURCE [' . __METHOD__ . ':' . __LINE__ . ']: crawlData()=' . count($crawlData) . ' - CALLED!'); // Get the stack instance and enqueue it $this->getStackInstance()->pushNamed(self::STACKER_NAME_URLS, $crawlData); // Debug message //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput('CRAWLER-SOURCE [' . __METHOD__ . ':' . __LINE__ . ']: EXIT!'); } } // [EOF] ?>