*/
interface UrlSource extends Source {
/**
- * Processes entries in the stack.
+ * Fills the URL stack with new entries from source
*
* @return void
*/
- function processStack ();
+ function fillUrlStack ();
+
+ /**
+ * Determines whether the stack 'urls' is empty.
+ *
+ * @return $isEmpty Whether the stack 'urls' is empty.
+ */
+ function isUrlStackEmpty ();
}
// [EOF]
$resultInstance = $nodeInstance->getWrapperInstance()->doSelectByCriteria($searchInstance);
// Is the result valid?
- if ((!$resultInstance->valid()) || (! $resultInstance->next())) {
+ if ((!$resultInstance->valid()) || (!$resultInstance->next())) {
// Node not found in database, this could mean that your database file is damaged.
return NULL;
} // END - if
*
* @return $isEmpty Whether the stack 'urls' is empty.
*/
- protected function isUrlStackEmpty () {
+ public function isUrlStackEmpty () {
// Determine it
$isEmpty = $this->getStackInstance()->isStackEmpty('urls');
// Return result
return $isEmpty;
}
+
+ /**
+ * Enriches the given associative array with more data, now at least 2
+ * elements are required:
+ *
+ * 'start_url' - Starting URL
+ * 'start_depth' - Crawl depth for starting URL
+ *
+ * @param $crawlData Array with partial data for being queued
+ * @return void
+ */
+ protected function enrichCrawlerQueueData (array &$crawlData) {
+ // Debug message
+ /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput('CRAWLER-SOURCE [' . __METHOD__ . ':' . __LINE__ . ']: crawlData()=' . count($crawlData) . ' - CALLED!');
+
+ // Debug message
+ /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput('CRAWLER-SOURCE [' . __METHOD__ . ':' . __LINE__ . ']: EXIT!');
+ }
}
// [EOF]
}
/**
- * Processes entries in the stack.
+ * Fills the URL stack with new entries from source
*
* @return void
- * @todo ~10% done
+ * @todo 0% done
*/
- public function processStack () {
- // Does the stack have some entries left?
- if ($this->isUrlStackEmpty()) {
- // Nothing to handle here
- return;
- } // END - if
-
+ public function fillUrlStack () {
$this->partialStub('Please implement this method.');
}
}
}
/**
- * Processes entries in the stack.
+ * Fills the URL stack with new entries from source
*
* @return void
- * @todo ~10% done
+ * @todo 0% done
*/
- public function processStack () {
- // Does the stack have some entries left?
- if ($this->isUrlStackEmpty()) {
- // Nothing to handle here
- return;
- } // END - if
-
+ public function fillUrlStack () {
$this->partialStub('Please implement this method.');
}
}
}
/**
- * Processes entries in the stack.
+ * Fills the URL stack with new entries from source
*
* @return void
- * @todo ~10% done
+ * @todo 0% done
*/
- public function processStack () {
- // Does the stack have some entries left?
- if ($this->isUrlStackEmpty()) {
- // Nothing to handle here
- return;
- } // END - if
-
+ public function fillUrlStack () {
$this->partialStub('Please implement this method.');
}
}
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
class CrawlerUploadedListUrlSource extends BaseUrlSource implements UrlSource, Registerable {
- /**
- * "Cached" CSV path
- */
- private $csvFilePath = '';
-
- /**
- * Last CSV file instance
- */
- private $lastCsvFileInstance = NULL;
-
- /**
- * Stack for pushing data from this clas to another
- */
- private $stackSourceInstance = NULL;
-
/**
* Stack name for a CSV file
*/
*/
const CRAWL_ENTRY_SIZE = 3;
+ /**
+ * "Cached" CSV path
+ */
+ private $csvFilePath = '';
+
+ /**
+ * Last CSV file instance
+ */
+ private $lastCsvFileInstance = NULL;
+
+ /**
+ * Stack for pushing data from this clas to another
+ */
+ private $stackSourceInstance = NULL;
+
/**
* "Imported" CSV files
*/
self::CRAWL_JOB_ARRAY_EXTERNAL_DEPTH => $csvData[2]
);
+ // Then add more data to it
+ $this->enrichCrawlerQueueData($csvData);
+
// Debug message
/* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput('CRAWLER-SOURCE [' . __METHOD__ . ':' . __LINE__ . ']: EXIT!');
}
}
/**
- * Processes entries in the stack.
+ * Fills the URL stack with new entries from source
*
* @return void
- * @todo ~20% done
+ * @todo ~40% done
*/
- public function processStack () {
+ public function fillUrlStack () {
// Does the stack have some entries left?
if ($this->isCsvEntryAdded()) {
/*
* depth, handling of 3rd-party URLs and such.
*/
$this->addCsvFile();
- } elseif (!$this->isUrlStackEmpty()) {
- /*
- * Handle next entry. This method will be called very often, so need
- * to process more than one entry at a time.
- */
- $this->processNextEntry();
}
$this->partialStub('Please implement this method.');
--- /dev/null
+<?php
+/**
+ * A general URL source Task
+ *
+ * @author Roland Haeder <webmaster@shipsimu.org>
+ * @version 0.0.0
+ * @copyright Copyright (c) 2007, 2008 Roland Haeder, 2009 - 2014 Hub Developer Team
+ * @license GNU GPL 3.0 or any newer version
+ * @link http://www.shipsimu.org
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+class BaseUrlSourceTask extends BaseTask {
+ /**
+ * Protected constructor
+ *
+ * @param $className Name of the class
+ * @return void
+ */
+ protected function __construct ($className) {
+ // Call parent constructor
+ parent::__construct($className);
+
+ // Init this URL source task
+ $this->initUrlSourceTask();
+ }
+
+ /**
+ * Initializes URL source task (to keep the constructor small)
+ *
+ * @return void
+ */
+ private function initUrlSourceTask () {
+ // Get source instance
+ $sourceInstance = UrlSourceObjectFactory::createUrlSourceInstance($this);
+
+ // And set it here
+ $this->setSourceInstance($sourceInstance);
+ }
+}
+
+// [EOF]
+?>
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
-class CrawlerUrlSource???Task extends BaseTask implements Taskable, Visitable {
+class CrawlerUrlSource???Task extends BaseUrlSourceTask implements Taskable, Visitable {
/**
* Protected constructor
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
-class CrawlerUrlSourceFoundRssTask extends BaseTask implements Taskable, Visitable {
+class CrawlerUrlSourceFoundRssTask extends BaseUrlSourceTask implements Taskable, Visitable {
/**
* Protected constructor
*
*/
public function executeTask () {
// Get the URL source instance and announce us
- UrlSourceObjectFactory::createUrlSourceInstance($this)->processStack();
+ $this->getSourceInstance()->fillUrlStack();
}
}
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
-class CrawlerUrlSourceLocalStartTask extends BaseTask implements Taskable, Visitable {
+class CrawlerUrlSourceLocalStartTask extends BaseUrlSourceTask implements Taskable, Visitable {
/**
* Protected constructor
*
*/
public function executeTask () {
// Get the URL source instance and announce us
- UrlSourceObjectFactory::createUrlSourceInstance($this)->processStack();
+ $this->getSourceInstance()->fillUrlStack();
}
}
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
-class CrawlerUrlSourceRssStartTask extends BaseTask implements Taskable, Visitable {
+class CrawlerUrlSourceRssStartTask extends BaseUrlSourceTask implements Taskable, Visitable {
/**
* Protected constructor
*
*/
public function executeTask () {
// Get the URL source instance and announce us
- UrlSourceObjectFactory::createUrlSourceInstance($this)->processStack();
+ $this->getSourceInstance()->fillUrlStack();
}
}
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
-class CrawlerUrlSourceUploadedListTask extends BaseTask implements Taskable, Visitable {
+class CrawlerUrlSourceUploadedListTask extends BaseUrlSourceTask implements Taskable, Visitable {
/**
* Protected constructor
*
*/
public function executeTask () {
// Get the URL source instance and announce us
- UrlSourceObjectFactory::createUrlSourceInstance($this)->processStack();
+ $this->getSourceInstance()->fillUrlStack();
}
}