use Org\Shipsimu\Hub\Crawler\Source\BaseSource;
// Import framework stuff
+use Org\Mxchange\CoreFramework\Factory\ObjectFactory;
use Org\Mxchange\CoreFramework\Factory\Stack\FileStackFactory;
use Org\Mxchange\CoreFramework\Traits\Stack\StackableTrait;
// Stack name for all URLs
const STACKER_NAME_URLS = 'urls';
+ /**
+ * Stack for pushing data from this clas to another
+ */
+ private $stackSourceInstance = NULL;
+
/**
* Protected constructor
*
parent::__construct($className);
}
+ /**
+ * Initializes given stack instances
+ *
+ * @param $stacks Stacks to initialize
+ * @param $stackType Type of the stack/URL source
+ * @return void
+ */
+ protected function initStacks (array $stacks, string $stackType) {
+ // Init stack instance
+ /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__, __LINE__)->debugOutput(sprintf('BASE-URL-SOURCE: stacks()=%d,stackType=%s - CALLED!', count($stacks), $stackType));
+ $this->stackSourceInstance = ObjectFactory::createObjectByConfiguredName(sprintf('%s_url_source_stack_class', $stackType));
+
+ // Init stacks
+ foreach($stacks as $stackName) {
+ // Init single stack
+ /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__, __LINE__)->debugOutput(sprintf('BASE-URL-SOURCE: stackName=%s', $stackName));
+ $this->getStackSourceInstance()->initStack($stackName);
+ }
+
+ // Trace message
+ /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__, __LINE__)->debugOutput('BASE-URL-SOURCE: EXIT!');
+ }
+
+ /**
+ * Getter for stackSourceInstance variable
+ *
+ * @return $stackSourceInstance An instance of an additional stack
+ */
+ public final function getStackSourceInstance () {
+ return $this->stackSourceInstance;
+ }
+
/**
* Initalizes this source
*
if (count($crawlData) == 0) {
// Throw IAE
throw new InvalidArgumentException('Parameter "crawlData" has no elements');
- } elseif (!isset($crawlData[UrlSource::CRAWL_JOB_ARRAY_START_URL])) {
+ } elseif (!isset($crawlData[UrlSource::URL_SOURCE_ARRAY_START_URL])) {
// Throw IAE
- throw new InvalidArgumentException(sprintf('crawlData()=%d does not contain element "%s"', count($crawlData), UrlSource::CRAWL_JOB_ARRAY_START_URL));
- } elseif (!isset($crawlData[UrlSource::CRAWL_JOB_ARRAY_START_DEPTH])) {
+ throw new InvalidArgumentException(sprintf('crawlData()=%d does not contain element "%s"', count($crawlData), UrlSource::URL_SOURCE_ARRAY_START_URL));
+ } elseif (!isset($crawlData[UrlSource::URL_SOURCE_ARRAY_START_DEPTH])) {
// Throw IAE
- throw new InvalidArgumentException(sprintf('crawlData()=%d does not contain element "%s"', count($crawlData), UrlSource::CRAWL_JOB_ARRAY_START_DEPTH));
- } elseif (!isset($crawlData[UrlSource::CRAWL_JOB_ARRAY_EXTERNAL_DEPTH])) {
+ throw new InvalidArgumentException(sprintf('crawlData()=%d does not contain element "%s"', count($crawlData), UrlSource::URL_SOURCE_ARRAY_START_DEPTH));
+ } elseif (!isset($crawlData[UrlSource::URL_SOURCE_ARRAY_EXTERNAL_DEPTH])) {
// Throw IAE
- throw new InvalidArgumentException(sprintf('crawlData()=%d does not contain element "%s"', count($crawlData), UrlSource::CRAWL_JOB_ARRAY_EXTERNAL_DEPTH));
+ throw new InvalidArgumentException(sprintf('crawlData()=%d does not contain element "%s"', count($crawlData), UrlSource::URL_SOURCE_ARRAY_EXTERNAL_DEPTH));
}
// @TODO Add more elements
*/
private $lastCsvFileInstance = NULL;
- /**
- * Stack for pushing data from this clas to another
- */
- private $stackSourceInstance = NULL;
-
/**
* "Imported" CSV files
*/
// Set it here
$this->setDirectoryInstance($directoryInstance);
- // Init stack instance
- $this->stackSourceInstance = ObjectFactory::createObjectByConfiguredName('crawler_uploaded_list_url_source_stack_class');
-
- // Init stacks
- foreach([
- self::STACK_NAME_CSV_FILE,
- self::STACK_NAME_CSV_ENTRY,
- ] as $stackName) {
- // Init single stack
- /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__, __LINE__)->debugOutput(sprintf('CRAWLER-UPLOADED-LIST-URL-SOURCE: stackName=%s', $stackName));
- $this->getStackSourceInstance()->initStack($stackName);
- }
-
// "Cache" column separator
$this->columnSeparator = FrameworkBootstrap::getConfigurationInstance()->getConfigEntry('crawler_url_list_column_separator');
}
/**
- * Getter for stackSourceInstance variable
+ * Creates an instance of this class
*
- * @return $stackSourceInstance An instance of an additional stack
+ * @return $sourceInstance An instance of an UrlSource class
*/
- public final function getStackSourceInstance () {
- return $this->stackSourceInstance;
+ public final static function createCrawlerUploadedListUrlSource () {
+ // Get new instance
+ $sourceInstance = new CrawlerUploadedListUrlSource();
+
+ // Init source
+ $sourceInstance->initSource('crawler', 'uploaded_list');
+
+ // Init stacks
+ $sourceInstance->initStacks([self::STACK_NAME_CSV_FILE, self::STACK_NAME_CSV_ENTRY], 'crawler_uploaded_list');
+
+ // Return the prepared instance
+ return $sourceInstance;
}
/**
$infoInstance = new SplFileInfo($this->csvFilePath . '/' . $directoryEntry);
// Initialize CSV file instance
- $this->lastCsvFileInstance = ObjectFactory::createObjectByConfiguredName('csv_input_file_class', array($infoInstance));
+ $this->lastCsvFileInstance = ObjectFactory::createObjectByConfiguredName('csv_input_file_class', [$infoInstance]);
// Found an entry
- /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__, __LINE__)->debugOutput('CRAWLER-UPLOADED-LIST-URL-SOURCE: directoryEntry(' . strlen($directoryEntry) . ')=' . $directoryEntry . ' - Instance created - EXIT!');
+ /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__, __LINE__)->debugOutput(sprintf('CRAWLER-UPLOADED-LIST-URL-SOURCE: directoryEntry(%d)=%s - Instance created - EXIT!', strlen($directoryEntry), $directoryEntry));
return TRUE;
}
- /**
- * Creates an instance of this class
- *
- * @return $sourceInstance An instance of an UrlSource class
- */
- public final static function createCrawlerUploadedListUrlSource () {
- // Get new instance
- $sourceInstance = new CrawlerUploadedListUrlSource();
-
- // Init source
- $sourceInstance->initSource('crawler', 'uploaded_list');
-
- // Return the prepared instance
- return $sourceInstance;
- }
-
/**
* Enriches and saves the given CSV entry (array) in the assigned
* file-based stack. To such entry a lot more informations are added, such
* @return void
*/
private function saveCsvDataInCrawlerQueue (array $csvData) {
- // Debug message
- /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__, __LINE__)->debugOutput('CRAWLER-UPLOADED-LIST-URL-SOURCE: csvData()=' . count($csvData) . ' - CALLED!');
-
// The array must have a fixed amount of elements, later enhancements may accept more
+ /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__, __LINE__)->debugOutput('CRAWLER-UPLOADED-LIST-URL-SOURCE: csvData()=' . count($csvData) . ' - CALLED!');
assert(count($csvData) == self::CRAWL_ENTRY_SIZE);
/*
* column to the CSV file.
*/
$csvArray = [
- UrlSource::CRAWL_JOB_ARRAY_START_URL => $csvData[0],
- UrlSource::CRAWL_JOB_ARRAY_START_DEPTH => $csvData[1],
- UrlSource::CRAWL_JOB_ARRAY_EXTERNAL_DEPTH => $csvData[2]
+ UrlSource::URL_SOURCE_ARRAY_START_URL => $csvData[0],
+ UrlSource::URL_SOURCE_ARRAY_START_DEPTH => $csvData[1],
+ UrlSource::URL_SOURCE_ARRAY_EXTERNAL_DEPTH => $csvData[2]
];
// Then add more data to it
- /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__, __LINE__)->debugOutput('CRAWLER-UPLOADED-LIST-URL-SOURCE: csvArray()=' . count($csvArray) . ' - BEFORE!');
+ /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__, __LINE__)->debugOutput(sprintf('CRAWLER-UPLOADED-LIST-URL-SOURCE: csvArray()=%d - BEFORE!', count($csvArray)));
$this->enrichCrawlerQueueData($csvArray);
/*
* Then enqueue it in the file stack. The local crawler "task" will
* then pick this up.
*/
- /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__, __LINE__)->debugOutput('CRAWLER-UPLOADED-LIST-URL-SOURCE: csvArray()=' . count($csvArray) . ' - AFTER!');
+ /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__, __LINE__)->debugOutput(sprintf('CRAWLER-UPLOADED-LIST-URL-SOURCE: csvArray()=%d - AFTER!', count($csvArray)));
$this->enqueueInFileStack($csvArray);
// Trace message
* A CSV file has been found and "imported" (added to stack). Now
* the file can be read line by line and checked every one of it.
*/
+ /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__, __LINE__)->debugOutput('CRAWLER-UPLOADED-LIST-URL-SOURCE: Calling this->parseCsvEntry() ...');
$this->parseCsvEntry();
} elseif ($this->isCsvFileAdded()) {
/*
* A CSV file has been found and "imported" (added to stack). Now
* the file can be read line by line and checked every one of it.
*/
+ /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__, __LINE__)->debugOutput('CRAWLER-UPLOADED-LIST-URL-SOURCE: Calling this->parseCsvFile() ...');
$this->parseCsvFile();
} elseif ($this->isCsvFileFound() && !$this->isLastCsvFileImported()) {
/*
* CSV-like as you may wish to provide meta data such as crawl
* depth, handling of 3rd-party URLs and such.
*/
+ /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__, __LINE__)->debugOutput('CRAWLER-UPLOADED-LIST-URL-SOURCE: Calling this->addCsvFile() ...');
$this->addCsvFile();
}