]> git.mxchange.org Git - hub.git/commitdiff
Continued:
authorRoland Häder <roland@mxchange.org>
Mon, 7 Dec 2020 06:46:54 +0000 (07:46 +0100)
committerRoland Häder <roland@mxchange.org>
Mon, 7 Dec 2020 07:33:04 +0000 (08:33 +0100)
- moved $stackSourceInstance to BaseUrlSource
- also introduced BaseUrlSource->initStacks()
- renamed UrlSource::CRAWL_JOB_ARRAY_* to URL_SOURCE_ARRAY_*
- added more debug lines

Signed-off-by: Roland Häder <roland@mxchange.org>
application/hub/classes/source/class_BaseUrlSource.php
application/hub/classes/source/urls/class_CrawlerUploadedListUrlSource.php
application/hub/interfaces/source/urls/class_UrlSource.php

index cae9a475a10dc96ae982a4655c535362d2895766..d51e6ea7657339bc3fcbee5a75a31ecaf901bdb3 100644 (file)
@@ -6,6 +6,7 @@ namespace Org\Shipsimu\Hub\Crawler\Source\Url;
 use Org\Shipsimu\Hub\Crawler\Source\BaseSource;
 
 // Import framework stuff
+use Org\Mxchange\CoreFramework\Factory\ObjectFactory;
 use Org\Mxchange\CoreFramework\Factory\Stack\FileStackFactory;
 use Org\Mxchange\CoreFramework\Traits\Stack\StackableTrait;
 
@@ -41,6 +42,11 @@ abstract class BaseUrlSource extends BaseSource {
        // Stack name for all URLs
        const STACKER_NAME_URLS = 'urls';
 
+       /**
+        * Stack for pushing data from this clas to another
+        */
+       private $stackSourceInstance = NULL;
+
        /**
         * Protected constructor
         *
@@ -52,6 +58,38 @@ abstract class BaseUrlSource extends BaseSource {
                parent::__construct($className);
        }
 
+       /**
+        * Initializes given stack instances
+        *
+        * @param       $stacks         Stacks to initialize
+        * @param       $stackType      Type of the stack/URL source
+        * @return      void
+        */
+       protected function initStacks (array $stacks, string $stackType) {
+               // Init stack instance
+               /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__, __LINE__)->debugOutput(sprintf('BASE-URL-SOURCE: stacks()=%d,stackType=%s - CALLED!', count($stacks), $stackType));
+               $this->stackSourceInstance = ObjectFactory::createObjectByConfiguredName(sprintf('%s_url_source_stack_class', $stackType));
+
+               // Init stacks
+               foreach($stacks as $stackName) {
+                       // Init single stack
+                       /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__, __LINE__)->debugOutput(sprintf('BASE-URL-SOURCE: stackName=%s', $stackName));
+                       $this->getStackSourceInstance()->initStack($stackName);
+               }
+
+               // Trace message
+               /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__, __LINE__)->debugOutput('BASE-URL-SOURCE: EXIT!');
+       }
+
+       /**
+        * Getter for stackSourceInstance variable
+        *
+        * @return      $stackSourceInstance    An instance of an additional stack
+        */
+       public final function getStackSourceInstance () {
+               return $this->stackSourceInstance;
+       }
+
        /**
         * Initalizes this source
         *
@@ -114,15 +152,15 @@ abstract class BaseUrlSource extends BaseSource {
                if (count($crawlData) == 0) {
                        // Throw IAE
                        throw new InvalidArgumentException('Parameter "crawlData" has no elements');
-               } elseif (!isset($crawlData[UrlSource::CRAWL_JOB_ARRAY_START_URL])) {
+               } elseif (!isset($crawlData[UrlSource::URL_SOURCE_ARRAY_START_URL])) {
                        // Throw IAE
-                       throw new InvalidArgumentException(sprintf('crawlData()=%d does not contain element "%s"', count($crawlData), UrlSource::CRAWL_JOB_ARRAY_START_URL));
-               } elseif (!isset($crawlData[UrlSource::CRAWL_JOB_ARRAY_START_DEPTH])) {
+                       throw new InvalidArgumentException(sprintf('crawlData()=%d does not contain element "%s"', count($crawlData), UrlSource::URL_SOURCE_ARRAY_START_URL));
+               } elseif (!isset($crawlData[UrlSource::URL_SOURCE_ARRAY_START_DEPTH])) {
                        // Throw IAE
-                       throw new InvalidArgumentException(sprintf('crawlData()=%d does not contain element "%s"', count($crawlData), UrlSource::CRAWL_JOB_ARRAY_START_DEPTH));
-               } elseif (!isset($crawlData[UrlSource::CRAWL_JOB_ARRAY_EXTERNAL_DEPTH])) {
+                       throw new InvalidArgumentException(sprintf('crawlData()=%d does not contain element "%s"', count($crawlData), UrlSource::URL_SOURCE_ARRAY_START_DEPTH));
+               } elseif (!isset($crawlData[UrlSource::URL_SOURCE_ARRAY_EXTERNAL_DEPTH])) {
                        // Throw IAE
-                       throw new InvalidArgumentException(sprintf('crawlData()=%d does not contain element "%s"', count($crawlData), UrlSource::CRAWL_JOB_ARRAY_EXTERNAL_DEPTH));
+                       throw new InvalidArgumentException(sprintf('crawlData()=%d does not contain element "%s"', count($crawlData), UrlSource::URL_SOURCE_ARRAY_EXTERNAL_DEPTH));
                }
 
                // @TODO Add more elements
index a3c48303cc78b5ee2503751b33f10806a781181c..aa6f52a1135803f5cc6fee59430075752f2d08d2 100644 (file)
@@ -68,11 +68,6 @@ class CrawlerUploadedListUrlSource extends BaseUrlSource implements UrlSource, R
         */
        private $lastCsvFileInstance = NULL;
 
-       /**
-        * Stack for pushing data from this clas to another
-        */
-       private $stackSourceInstance = NULL;
-
        /**
         * "Imported" CSV files
         */
@@ -108,19 +103,6 @@ class CrawlerUploadedListUrlSource extends BaseUrlSource implements UrlSource, R
                // Set it here
                $this->setDirectoryInstance($directoryInstance);
 
-               // Init stack instance
-               $this->stackSourceInstance = ObjectFactory::createObjectByConfiguredName('crawler_uploaded_list_url_source_stack_class');
-
-               // Init stacks
-               foreach([
-                       self::STACK_NAME_CSV_FILE,
-                       self::STACK_NAME_CSV_ENTRY,
-               ] as $stackName) {
-                       // Init single stack
-                       /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__, __LINE__)->debugOutput(sprintf('CRAWLER-UPLOADED-LIST-URL-SOURCE: stackName=%s', $stackName));
-                       $this->getStackSourceInstance()->initStack($stackName);
-               }
-
                // "Cache" column separator
                $this->columnSeparator = FrameworkBootstrap::getConfigurationInstance()->getConfigEntry('crawler_url_list_column_separator');
 
@@ -129,12 +111,22 @@ class CrawlerUploadedListUrlSource extends BaseUrlSource implements UrlSource, R
        }
 
        /**
-        * Getter for stackSourceInstance variable
+        * Creates an instance of this class
         *
-        * @return      $stackSourceInstance    An instance of an additional stack
+        * @return      $sourceInstance         An instance of an UrlSource class
         */
-       public final function getStackSourceInstance () {
-               return $this->stackSourceInstance;
+       public final static function createCrawlerUploadedListUrlSource () {
+               // Get new instance
+               $sourceInstance = new CrawlerUploadedListUrlSource();
+
+               // Init source
+               $sourceInstance->initSource('crawler', 'uploaded_list');
+
+               // Init stacks
+               $sourceInstance->initStacks([self::STACK_NAME_CSV_FILE, self::STACK_NAME_CSV_ENTRY], 'crawler_uploaded_list');
+
+               // Return the prepared instance
+               return $sourceInstance;
        }
 
        /**
@@ -186,29 +178,13 @@ class CrawlerUploadedListUrlSource extends BaseUrlSource implements UrlSource, R
                $infoInstance = new SplFileInfo($this->csvFilePath . '/' . $directoryEntry);
 
                // Initialize CSV file instance
-               $this->lastCsvFileInstance = ObjectFactory::createObjectByConfiguredName('csv_input_file_class', array($infoInstance));
+               $this->lastCsvFileInstance = ObjectFactory::createObjectByConfiguredName('csv_input_file_class', [$infoInstance]);
 
                // Found an entry
-               /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__, __LINE__)->debugOutput('CRAWLER-UPLOADED-LIST-URL-SOURCE: directoryEntry(' . strlen($directoryEntry) . ')=' . $directoryEntry . ' - Instance created - EXIT!');
+               /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__, __LINE__)->debugOutput(sprintf('CRAWLER-UPLOADED-LIST-URL-SOURCE: directoryEntry(%d)=%s - Instance created - EXIT!', strlen($directoryEntry), $directoryEntry));
                return TRUE;
        }
 
-       /**
-        * Creates an instance of this class
-        *
-        * @return      $sourceInstance         An instance of an UrlSource class
-        */
-       public final static function createCrawlerUploadedListUrlSource () {
-               // Get new instance
-               $sourceInstance = new CrawlerUploadedListUrlSource();
-
-               // Init source
-               $sourceInstance->initSource('crawler', 'uploaded_list');
-
-               // Return the prepared instance
-               return $sourceInstance;
-       }
-
        /**
         * Enriches and saves the given CSV entry (array) in the assigned
         * file-based stack. To such entry a lot more informations are added, such
@@ -218,10 +194,8 @@ class CrawlerUploadedListUrlSource extends BaseUrlSource implements UrlSource, R
         * @return      void
         */
        private function saveCsvDataInCrawlerQueue (array $csvData) {
-               // Debug message
-               /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__, __LINE__)->debugOutput('CRAWLER-UPLOADED-LIST-URL-SOURCE: csvData()=' . count($csvData) . ' - CALLED!');
-
                // The array must have a fixed amount of elements, later enhancements may accept more
+               /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__, __LINE__)->debugOutput('CRAWLER-UPLOADED-LIST-URL-SOURCE: csvData()=' . count($csvData) . ' - CALLED!');
                assert(count($csvData) == self::CRAWL_ENTRY_SIZE);
 
                /*
@@ -230,20 +204,20 @@ class CrawlerUploadedListUrlSource extends BaseUrlSource implements UrlSource, R
                 * column to the CSV file.
                 */
                $csvArray = [
-                       UrlSource::CRAWL_JOB_ARRAY_START_URL      => $csvData[0],
-                       UrlSource::CRAWL_JOB_ARRAY_START_DEPTH    => $csvData[1],
-                       UrlSource::CRAWL_JOB_ARRAY_EXTERNAL_DEPTH => $csvData[2]
+                       UrlSource::URL_SOURCE_ARRAY_START_URL      => $csvData[0],
+                       UrlSource::URL_SOURCE_ARRAY_START_DEPTH    => $csvData[1],
+                       UrlSource::URL_SOURCE_ARRAY_EXTERNAL_DEPTH => $csvData[2]
                ];
 
                // Then add more data to it
-               /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__, __LINE__)->debugOutput('CRAWLER-UPLOADED-LIST-URL-SOURCE: csvArray()=' . count($csvArray) . ' - BEFORE!');
+               /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__, __LINE__)->debugOutput(sprintf('CRAWLER-UPLOADED-LIST-URL-SOURCE: csvArray()=%d - BEFORE!', count($csvArray)));
                $this->enrichCrawlerQueueData($csvArray);
 
                /*
                 * Then enqueue it in the file stack. The local crawler "task" will
                 * then pick this up.
                 */
-               /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__, __LINE__)->debugOutput('CRAWLER-UPLOADED-LIST-URL-SOURCE: csvArray()=' . count($csvArray) . ' - AFTER!');
+               /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__, __LINE__)->debugOutput(sprintf('CRAWLER-UPLOADED-LIST-URL-SOURCE: csvArray()=%d - AFTER!', count($csvArray)));
                $this->enqueueInFileStack($csvArray);
 
                // Trace message
@@ -392,12 +366,14 @@ class CrawlerUploadedListUrlSource extends BaseUrlSource implements UrlSource, R
                         * A CSV file has been found and "imported" (added to stack). Now
                         * the file can be read line by line and checked every one of it.
                         */
+                       /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__, __LINE__)->debugOutput('CRAWLER-UPLOADED-LIST-URL-SOURCE: Calling this->parseCsvEntry() ...');
                        $this->parseCsvEntry();
                } elseif ($this->isCsvFileAdded()) {
                        /*
                         * A CSV file has been found and "imported" (added to stack). Now
                         * the file can be read line by line and checked every one of it.
                         */
+                       /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__, __LINE__)->debugOutput('CRAWLER-UPLOADED-LIST-URL-SOURCE: Calling this->parseCsvFile() ...');
                        $this->parseCsvFile();
                } elseif ($this->isCsvFileFound() && !$this->isLastCsvFileImported()) {
                        /*
@@ -405,6 +381,7 @@ class CrawlerUploadedListUrlSource extends BaseUrlSource implements UrlSource, R
                         * CSV-like as you may wish to provide meta data such as crawl
                         * depth, handling of 3rd-party URLs and such.
                         */
+                       /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__, __LINE__)->debugOutput('CRAWLER-UPLOADED-LIST-URL-SOURCE: Calling this->addCsvFile() ...');
                        $this->addCsvFile();
                }
 
index 1fb816e4be218e449362763db8a2a94848ce6375..fefe8e3a0a20f980b54c457eb4f4a26c36a69ab9 100644 (file)
@@ -29,9 +29,9 @@ use Org\Shipsimu\Hub\Crawler\Source\Source;
  */
 interface UrlSource extends Source {
        // Array elements for CSV data array
-       const CRAWL_JOB_ARRAY_START_URL      = 'start_url';
-       const CRAWL_JOB_ARRAY_START_DEPTH    = 'start_depth';
-       const CRAWL_JOB_ARRAY_EXTERNAL_DEPTH = 'external_depth';
+       const URL_SOURCE_ARRAY_START_URL      = 'start_url';
+       const URL_SOURCE_ARRAY_START_DEPTH    = 'start_depth';
+       const URL_SOURCE_ARRAY_EXTERNAL_DEPTH = 'external_depth';
 
        /**
         * Fills the URL stack with new entries from source