+++ /dev/null
-<?php
-/**
- * A general URL source class
- *
- * @author Roland Haeder <webmaster@shipsimu.org>
- * @version 0.0.0
- * @copyright Copyright (c) 2007, 2008 Roland Haeder, 2009 - 2015 Hub Developer Team
- * @license GNU GPL 3.0 or any newer version
- * @link http://www.shipsimu.org
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
-class BaseUrlSource extends BaseSource {
- // Stack name for all URLs
- const STACKER_NAME_URLS = 'urls';
-
- // Array elements for CSV data array
- const CRAWL_JOB_ARRAY_START_URL = 'start_url';
- const CRAWL_JOB_ARRAY_DEPTH = 'start_depth';
- const CRAWL_JOB_ARRAY_EXTERNAL_DEPTH = 'external_depth';
-
- /**
- * Protected constructor
- *
- * @param $className Name of the class
- * @return void
- */
- protected function __construct ($className) {
- // Call parent constructor
- parent::__construct($className);
- }
-
- /**
- * Initalizes this source
- *
- * @param $prefix Prefix for this source
- * @param $sourceName Name of this source
- * @return void
- */
- protected function initSource ($prefix, $sourceName) {
- // Use another object factory
- $stackInstance = FileStackFactory::createFileStackInstance($prefix . '_url', $sourceName);
-
- // Set the stack here
- $this->setStackInstance($stackInstance);
- }
-
- /**
- * Determines whether the stack 'urls' is empty.
- *
- * @return $isEmpty Whether the stack 'urls' is empty.
- */
- public function isUrlStackEmpty () {
- // Determine it
- $isEmpty = $this->getStackInstance()->isStackEmpty(self::STACKER_NAME_URLS);
-
- // Return result
- return $isEmpty;
- }
-
- /**
- * Enriches the given associative array with more data, now at least 2
- * elements are required:
- *
- * 'start_url' - Starting URL
- * 'start_depth' - Crawl depth for starting URL
- *
- * @param $crawlData Array with partial data for being queued
- * @return void
- * @todo ~10% done
- */
- protected function enrichCrawlerQueueData (array &$crawlData) {
- // Debug message
- //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput('CRAWLER-SOURCE [' . __METHOD__ . ':' . __LINE__ . ']: crawlData()=' . count($crawlData) . ' - CALLED!');
-
- // Check for minimum array elements
- assert(isset($crawlData[self::CRAWL_JOB_ARRAY_START_URL]));
- assert(isset($crawlData[self::CRAWL_JOB_ARRAY_DEPTH]));
-
- // @TODO Add more elements
-
- // Debug message
- //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput('CRAWLER-SOURCE [' . __METHOD__ . ':' . __LINE__ . ']: EXIT!');
- }
-
- /**
- * Enqueues given crawler array in assigned file-based stack
- *
- * @param $crawlData Array with partial data for being queued
- * @return void
- */
- protected function enqueueInFileStack (array $crawlData) {
- // Debug message
- //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput('CRAWLER-SOURCE [' . __METHOD__ . ':' . __LINE__ . ']: crawlData()=' . count($crawlData) . ' - CALLED!');
-
- // Get the stack instance and enqueue it
- $this->getStackInstance()->pushNamed(self::STACKER_NAME_URLS, $crawlData);
-
- // Debug message
- //* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput('CRAWLER-SOURCE [' . __METHOD__ . ':' . __LINE__ . ']: EXIT!');
- }
-}
-
-// [EOF]
-?>