]> git.mxchange.org Git - hub.git/commitdiff
Continued:
authorRoland Häder <roland@mxchange.org>
Mon, 7 Dec 2020 05:54:14 +0000 (06:54 +0100)
committerRoland Häder <roland@mxchange.org>
Mon, 7 Dec 2020 05:54:14 +0000 (06:54 +0100)
- moved BaseUrlSource::CRAWL_JOB_ARRAY_* to UrlSource interface
- added more checks on parameter
- replaced assert() with thrown exceptions

Signed-off-by: Roland Häder <roland@mxchange.org>
application/hub/classes/recipient/direct/class_DirectRecipient.php
application/hub/classes/recipient/self/class_SelfRecipient.php
application/hub/classes/recipient/upper/class_UpperRecipient.php
application/hub/classes/source/class_BaseUrlSource.php
application/hub/classes/source/urls/class_CrawlerUploadedListUrlSource.php
application/hub/interfaces/source/urls/class_UrlSource.php

index 712820ac231cfc3eef688ac9f5db86b193aa36d5..76d24d546fdc86d7774333f36e981c6649a36f9f 100644 (file)
@@ -13,8 +13,11 @@ use Org\Shipsimu\Hub\Tools\HubTools;
 use Org\Mxchange\CoreFramework\Bootstrap\FrameworkBootstrap;
 use Org\Mxchange\CoreFramework\Lists\Listable;
 
+// Import SPL stuff
+use \UnexpectedValueException;
+
 /**
- * A Direct recipient
+ * A direct recipient
  *
  * @author             Roland Haeder <webmaster@shipsimu.org>
  * @version            0.0.0
@@ -65,30 +68,34 @@ class DirectRecipient extends BaseRecipient implements Recipient {
         *
         * @param       $packageInstance        An instance of a DeliverablePackage class
         * @param       $listInstance           An instance of a Listable class
-        * @return      $resolved                       Resolved recipient or VOID if only the set list has been filled
-        * @throws      FrameworkException      Could throw different exceptions depending on implementation
+        * @return      void
+        * @throws      UnexpectedValueException        If count of recipients is higher than maximum allowed
         */
        public function resolveRecipientByPackageInstance (DeliverablePackage $packageInstance, Listable $listInstance) {
-               // Debug message
-               /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__, __LINE__)->debugOutput('DIRECT-RECIPIENT: packageInstance=' . $packageInstance->__toString() . ',listInstance=' . $listInstance->__toString());
-
                // Get recipient UNL
+               /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__, __LINE__)->debugOutput(sprintf('DIRECT-RECIPIENT: packageInstance=%s,listInstance=%s', $packageInstance->__toString(), $listInstance->__toString()));
                $recipientUnl = $packageInstance->getRecipientUnl();
 
                // "Explode" all recipients
-               /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__, __LINE__)->debugOutput('DIRECT-RECIPIENT: recipientUnl=' . $recipientUnl);
+               /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__, __LINE__)->debugOutput(sprintf('DIRECT-RECIPIENT: recipientUnl=%s', $recipientUnl));
                $recipients = explode(NetworkPackageHandler::PACKAGE_RECIPIENT_SEPARATOR, $recipientUnl);
 
                // Is maximum reached?
-               assert(count($recipients) <= FrameworkBootstrap::getConfigurationInstance()->getConfigEntry('package_recipient_max_count'));
+               /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__, __LINE__)->debugOutput(sprintf('DIRECT-RECIPIENT: recipients()=%d', count($recipients)));
+               if (count($recipients) > FrameworkBootstrap::getConfigurationInstance()->getConfigEntry('package_recipient_max_count')) {
+                       // To much recipients
+                       throw new UnexpectedValueException(sprintf('recipients()=%d has more than %d expected records.', count($recipients), FrameworkBootstrap::getConfigurationInstance()->getConfigEntry('package_recipient_max_count')));
+               }
 
-               // Try it on all
+               // Try to resolve sessionId to UNL data array and add it to list
+               /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__, __LINE__)->debugOutput(sprintf('DIRECT-RECIPIENT: Adding %d recipients ...', count($recipients)));
                foreach ($recipients as $recipient) {
                        // Try to sole a single recipient
-                       /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__, __LINE__)->debugOutput('DIRECT-RECIPIENT: recipient=' . $recipient);
+                       /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__, __LINE__)->debugOutput(sprintf('DIRECT-RECIPIENT: recipient=%s', $recipient));
                        $unlData = HubTools::resolveSessionIdToUnl($recipient);
 
                        // Add it as recipient
+                       /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__, __LINE__)->debugOutput(sprintf('DIRECT-RECIPIENT: unlData[]=%s', gettype($unlData)));
                        $listInstance->addEntry('unl', $unlData);
                }
 
index 989745d6624b546e79760311059dd7fce832d5c6..d65a7a79cd71a00027ff0fc910bb66e46a451aad 100644 (file)
@@ -12,7 +12,7 @@ use Org\Shipsimu\Hub\Tools\HubTools;
 use Org\Mxchange\CoreFramework\Lists\Listable;
 
 // Import SPL stuff
-use \InvalidArgumentException;
+use \UnexpectedValueException;
 
 /**
  * A Self recipient
@@ -66,21 +66,24 @@ class SelfRecipient extends BaseRecipient implements Recipient {
         *
         * @param       $packageInstance        An instance of a DeliverablePackage class
         * @param       $listInstance           An instance of a Listable class
-        * @return      $resolved                       Resolved recipient or VOID if only the set list has been filled
-        * @throws      FrameworkException      Could throw different exceptions depending on implementation
+        * @return      void
+        * @throws      UnexpectedValueException        If the returned UNL is empty
         */
        public function resolveRecipientByPackageInstance (DeliverablePackage $packageInstance, Listable $listInstance) {
                // Determine IP or 'external_address' if set
                /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__, __LINE__)->debugOutput(sprintf('SELF-RECIPIENT: packageInstance=%s,listInstance=%s', $packageInstance->__toString(), $listInstance->__toString()));
                $unl = HubTools::determineOwnExternalAddress();
 
-               // Is it not empty?
+               // Is it empty?
                /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__, __LINE__)->debugOutput(sprintf('SELF-RECIPIENT: unl(%d)=%s', strlen($unl), $unl));
-               if (!empty($unl)) {
-                       // Add it to the list
-                       $listInstance->addEntry('unl', $unl);
+               if (empty($unl)) {
+                       // Avoid adding those UNLs
+                       throw new UnexpectedValueException('Returned UNL is empty');
                }
 
+               // Add it to the list
+               $listInstance->addEntry('unl', $unl);
+
                // Trace message
                /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__, __LINE__)->debugOutput('SELF-RECIPIENT: EXIT!');
        }
index c81c827cd11436e449dc39cec24ee172004ce1db..fa5760e9af6c1fa1cb3a11d9c0cfd9607ec32f29 100644 (file)
@@ -84,19 +84,17 @@ class UpperRecipient extends BaseRecipient implements Recipient {
                        // Is maximum reached?
                        if ($listInstance->count() == FrameworkBootstrap::getConfigurationInstance()->getConfigEntry('package_recipient_max_count')) {
                                // Then stop adding more
-                               /* DEBUG: */ self::createDebugInstance(__CLASS__, __LINE__)->debugOutput('UPPER-RECIPIENT: Going to abort at maximum of ' . FrameworkBootstrap::getConfigurationInstance()->getConfigEntry('package_recipient_max_count') . ' recipients!');
+                               /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__, __LINE__)->debugOutput('UPPER-RECIPIENT: Going to abort at maximum of ' . FrameworkBootstrap::getConfigurationInstance()->getConfigEntry('package_recipient_max_count') . ' recipients!');
                                break;
                        }
 
-                       // Debug message
-                       /* DEBUG: */ self::createDebugInstance(__CLASS__, __LINE__)->debugOutput('UPPER-RECIPIENT: Adding node ' . print_r($unlData, TRUE) . ' as recipient.');
-
                        // Add the entry
+                       /* PRINTR-DEBUG: */ self::createDebugInstance(__CLASS__, __LINE__)->debugOutput('UPPER-RECIPIENT: Adding node ' . print_r($unlData, TRUE) . ' as recipient.');
                        $listInstance->addEntry('unl', $unlData);
                }
 
                // Trace message
-               /* DEBUG: */ self::createDebugInstance(__CLASS__, __LINE__)->debugOutput('UPPER-RECIPIENT: EXIT!');
+               /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__, __LINE__)->debugOutput('UPPER-RECIPIENT: EXIT!');
        }
 
 }
index f27278fc7ba03081e86c4be752a458395ad1a12f..cae9a475a10dc96ae982a4655c535362d2895766 100644 (file)
@@ -9,6 +9,9 @@ use Org\Shipsimu\Hub\Crawler\Source\BaseSource;
 use Org\Mxchange\CoreFramework\Factory\Stack\FileStackFactory;
 use Org\Mxchange\CoreFramework\Traits\Stack\StackableTrait;
 
+// Import SPL stuff
+use \InvalidArgumentException;
+
 /**
  * A general URL source class
  *
@@ -38,11 +41,6 @@ abstract class BaseUrlSource extends BaseSource {
        // Stack name for all URLs
        const STACKER_NAME_URLS = 'urls';
 
-       // Array elements for CSV data array
-       const CRAWL_JOB_ARRAY_START_URL      = 'start_url';
-       const CRAWL_JOB_ARRAY_DEPTH          = 'start_depth';
-       const CRAWL_JOB_ARRAY_EXTERNAL_DEPTH = 'external_depth';
-
        /**
         * Protected constructor
         *
@@ -60,10 +58,18 @@ abstract class BaseUrlSource extends BaseSource {
         * @param       $prefix                 Prefix for this source
         * @param       $sourceName             Name of this source
         * @return      void
+        * @throws      InvalidArgumentException        If a parameter is not valid
         */
-       protected function initSource ($prefix, $sourceName) {
-               // Trace message
+       protected function initSource (string $prefix, string $sourceName) {
+               // Validate parameter
                /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__, __LINE__)->debugOutput('BASE-URL-SOURCE: prefix=' . $prefix . ',sourceName=' . $sourceName . ' - CALLED!');
+               if (empty($prefix)) {
+                       // Throw IAE
+                       throw new InvalidArgumentException('Parameter "prefix" is empty');
+               } elseif (empty($sourceName)) {
+                       // Throw IAE
+                       throw new InvalidArgumentException('Parameter "sourceName" is empty');
+               }
 
                // Use another object factory
                $stackInstance = FileStackFactory::createFileStackInstance($prefix . '_url', $sourceName);
@@ -82,9 +88,11 @@ abstract class BaseUrlSource extends BaseSource {
         */
        public function isUrlStackEmpty () {
                // Determine it
+               /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__, __LINE__)->debugOutput('BASE-URL-SOURCE: CALLED!');
                $isEmpty = $this->getStackInstance()->isStackEmpty(self::STACKER_NAME_URLS);
 
                // Return result
+               /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__, __LINE__)->debugOutput(sprintf('BASE-URL-SOURCE: isEmpty=%d - EXIT!', intval($isEmpty)));
                return $isEmpty;
        }
 
@@ -97,15 +105,25 @@ abstract class BaseUrlSource extends BaseSource {
         *
         * @param       $crawlData      Array with partial data for being queued
         * @return      void
+        * @throws      InvalidArgumentException        If a parameter is not valid
         * @todo        ~1% done
         */
        protected function enrichCrawlerQueueData (array &$crawlData) {
-               // Debug message
-               /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__, __LINE__)->debugOutput('BASE-URL-SOURCE: crawlData()=' . count($crawlData) . ' - CALLED!');
-
                // Check for minimum array elements
-               assert(isset($crawlData[self::CRAWL_JOB_ARRAY_START_URL]));
-               assert(isset($crawlData[self::CRAWL_JOB_ARRAY_DEPTH]));
+               /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__, __LINE__)->debugOutput(sprintf('BASE-URL-SOURCE: crawlData()=%d - CALLED!', count($crawlData)));
+               if (count($crawlData) == 0) {
+                       // Throw IAE
+                       throw new InvalidArgumentException('Parameter "crawlData" has no elements');
+               } elseif (!isset($crawlData[UrlSource::CRAWL_JOB_ARRAY_START_URL])) {
+                       // Throw IAE
+                       throw new InvalidArgumentException(sprintf('crawlData()=%d does not contain element "%s"', count($crawlData), UrlSource::CRAWL_JOB_ARRAY_START_URL));
+               } elseif (!isset($crawlData[UrlSource::CRAWL_JOB_ARRAY_START_DEPTH])) {
+                       // Throw IAE
+                       throw new InvalidArgumentException(sprintf('crawlData()=%d does not contain element "%s"', count($crawlData), UrlSource::CRAWL_JOB_ARRAY_START_DEPTH));
+               } elseif (!isset($crawlData[UrlSource::CRAWL_JOB_ARRAY_EXTERNAL_DEPTH])) {
+                       // Throw IAE
+                       throw new InvalidArgumentException(sprintf('crawlData()=%d does not contain element "%s"', count($crawlData), UrlSource::CRAWL_JOB_ARRAY_EXTERNAL_DEPTH));
+               }
 
                // @TODO Add more elements
 
@@ -118,10 +136,15 @@ abstract class BaseUrlSource extends BaseSource {
         *
         * @param       $crawlData      Array with partial data for being queued
         * @return      void
+        * @throws      InvalidArgumentException        If a parameter is not valid
         */
        protected function enqueueInFileStack (array $crawlData) {
-               // Debug message
-               /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__, __LINE__)->debugOutput('BASE-URL-SOURCE: crawlData()=' . count($crawlData) . ' - CALLED!');
+               // Validate parameter
+               /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__, __LINE__)->debugOutput(sprintf('BASE-URL-SOURCE: crawlData()=%d - CALLED!', count($crawlData)));
+               if (count($crawlData) == 0) {
+                       // Throw IAE
+                       throw new InvalidArgumentException('Parameter "crawlData" has no elements');
+               }
 
                // Get the stack instance and enqueue it
                $this->getStackInstance()->pushNamed(self::STACKER_NAME_URLS, $crawlData);
index 2da427c2727d5f885c843893ec0b99bcc9106a7d..a3c48303cc78b5ee2503751b33f10806a781181c 100644 (file)
@@ -230,9 +230,9 @@ class CrawlerUploadedListUrlSource extends BaseUrlSource implements UrlSource, R
                 * column to the CSV file.
                 */
                $csvArray = [
-                       self::CRAWL_JOB_ARRAY_START_URL      => $csvData[0],
-                       self::CRAWL_JOB_ARRAY_DEPTH          => $csvData[1],
-                       self::CRAWL_JOB_ARRAY_EXTERNAL_DEPTH => $csvData[2]
+                       UrlSource::CRAWL_JOB_ARRAY_START_URL      => $csvData[0],
+                       UrlSource::CRAWL_JOB_ARRAY_START_DEPTH    => $csvData[1],
+                       UrlSource::CRAWL_JOB_ARRAY_EXTERNAL_DEPTH => $csvData[2]
                ];
 
                // Then add more data to it
@@ -334,10 +334,10 @@ class CrawlerUploadedListUrlSource extends BaseUrlSource implements UrlSource, R
                /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__, __LINE__)->debugOutput(sprintf('CRAWLER-UPLOADED-LIST-URL-SOURCE: csvData[%s]=%s', gettype($csvData), print_r($csvData, TRUE)));
                if (count($csvData) == 0) {
                        // Try to close it by actually unsetting (destructing) it
-                       /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__, __LINE__)->debugOutput(sprintf('CRAWLER-UPLOADED-LIST-URL-SOURCE: File "%s" has been fully read.', $csvFileInstance->getFilename()));
                        unset($csvFileInstance);
 
                        // This file as been fully read, so don't push it back on stack.
+                       /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__, __LINE__)->debugOutput(sprintf('CRAWLER-UPLOADED-LIST-URL-SOURCE: File "%s" has been fully read. - EXIT!', $csvFileInstance->getFilename()));
                        return;
                }
 
@@ -368,7 +368,7 @@ class CrawlerUploadedListUrlSource extends BaseUrlSource implements UrlSource, R
                $csvData = $this->getStackSourceInstance()->popNamed(self::STACK_NAME_CSV_ENTRY);
 
                // It must have a fixed amount of elements (see method parseCsvFile() for details)
-               /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__, __LINE__)->debugOutput('CRAWLER-UPLOADED-LIST-URL-SOURCE: csvData[' . gettype($csvData) . ']=' . print_r($csvData, TRUE));
+               /* PRINTR-DEBUG: */ self::createDebugInstance(__CLASS__, __LINE__)->debugOutput('CRAWLER-UPLOADED-LIST-URL-SOURCE: csvData[' . gettype($csvData) . ']=' . print_r($csvData, TRUE));
                assert(count($csvData) == self::CRAWL_ENTRY_SIZE);
 
                // Save it in crawler queue (which will enrich it with way more informations
index 1ad7a309758a23cf3861b387c428515423b4b5e9..1fb816e4be218e449362763db8a2a94848ce6375 100644 (file)
@@ -28,6 +28,11 @@ use Org\Shipsimu\Hub\Crawler\Source\Source;
  * along with this program. If not, see <http://www.gnu.org/licenses/>.
  */
 interface UrlSource extends Source {
+       // Array elements for CSV data array
+       const CRAWL_JOB_ARRAY_START_URL      = 'start_url';
+       const CRAWL_JOB_ARRAY_START_DEPTH    = 'start_depth';
+       const CRAWL_JOB_ARRAY_EXTERNAL_DEPTH = 'external_depth';
+
        /**
         * Fills the URL stack with new entries from source
         *