From: Roland Häder Date: Mon, 7 Dec 2020 05:54:14 +0000 (+0100) Subject: Continued: X-Git-Url: https://git.mxchange.org/?a=commitdiff_plain;h=b01ffc39c4d9aeb5c4d8f0192bed15722885de70;p=hub.git Continued: - moved BaseUrlSource::CRAWL_JOB_ARRAY_* to UrlSource interface - added more checks on parameter - replaced assert() with thrown exceptions Signed-off-by: Roland Häder --- diff --git a/application/hub/classes/recipient/direct/class_DirectRecipient.php b/application/hub/classes/recipient/direct/class_DirectRecipient.php index 712820ac2..76d24d546 100644 --- a/application/hub/classes/recipient/direct/class_DirectRecipient.php +++ b/application/hub/classes/recipient/direct/class_DirectRecipient.php @@ -13,8 +13,11 @@ use Org\Shipsimu\Hub\Tools\HubTools; use Org\Mxchange\CoreFramework\Bootstrap\FrameworkBootstrap; use Org\Mxchange\CoreFramework\Lists\Listable; +// Import SPL stuff +use \UnexpectedValueException; + /** - * A Direct recipient + * A direct recipient * * @author Roland Haeder * @version 0.0.0 @@ -65,30 +68,34 @@ class DirectRecipient extends BaseRecipient implements Recipient { * * @param $packageInstance An instance of a DeliverablePackage class * @param $listInstance An instance of a Listable class - * @return $resolved Resolved recipient or VOID if only the set list has been filled - * @throws FrameworkException Could throw different exceptions depending on implementation + * @return void + * @throws UnexpectedValueException If count of recipients is higher than maximum allowed */ public function resolveRecipientByPackageInstance (DeliverablePackage $packageInstance, Listable $listInstance) { - // Debug message - /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__, __LINE__)->debugOutput('DIRECT-RECIPIENT: packageInstance=' . $packageInstance->__toString() . ',listInstance=' . $listInstance->__toString()); - // Get recipient UNL + /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__, __LINE__)->debugOutput(sprintf('DIRECT-RECIPIENT: packageInstance=%s,listInstance=%s', $packageInstance->__toString(), $listInstance->__toString())); $recipientUnl = $packageInstance->getRecipientUnl(); // "Explode" all recipients - /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__, __LINE__)->debugOutput('DIRECT-RECIPIENT: recipientUnl=' . $recipientUnl); + /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__, __LINE__)->debugOutput(sprintf('DIRECT-RECIPIENT: recipientUnl=%s', $recipientUnl)); $recipients = explode(NetworkPackageHandler::PACKAGE_RECIPIENT_SEPARATOR, $recipientUnl); // Is maximum reached? - assert(count($recipients) <= FrameworkBootstrap::getConfigurationInstance()->getConfigEntry('package_recipient_max_count')); + /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__, __LINE__)->debugOutput(sprintf('DIRECT-RECIPIENT: recipients()=%d', count($recipients))); + if (count($recipients) > FrameworkBootstrap::getConfigurationInstance()->getConfigEntry('package_recipient_max_count')) { + // To much recipients + throw new UnexpectedValueException(sprintf('recipients()=%d has more than %d expected records.', count($recipients), FrameworkBootstrap::getConfigurationInstance()->getConfigEntry('package_recipient_max_count'))); + } - // Try it on all + // Try to resolve sessionId to UNL data array and add it to list + /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__, __LINE__)->debugOutput(sprintf('DIRECT-RECIPIENT: Adding %d recipients ...', count($recipients))); foreach ($recipients as $recipient) { // Try to sole a single recipient - /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__, __LINE__)->debugOutput('DIRECT-RECIPIENT: recipient=' . $recipient); + /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__, __LINE__)->debugOutput(sprintf('DIRECT-RECIPIENT: recipient=%s', $recipient)); $unlData = HubTools::resolveSessionIdToUnl($recipient); // Add it as recipient + /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__, __LINE__)->debugOutput(sprintf('DIRECT-RECIPIENT: unlData[]=%s', gettype($unlData))); $listInstance->addEntry('unl', $unlData); } diff --git a/application/hub/classes/recipient/self/class_SelfRecipient.php b/application/hub/classes/recipient/self/class_SelfRecipient.php index 989745d66..d65a7a79c 100644 --- a/application/hub/classes/recipient/self/class_SelfRecipient.php +++ b/application/hub/classes/recipient/self/class_SelfRecipient.php @@ -12,7 +12,7 @@ use Org\Shipsimu\Hub\Tools\HubTools; use Org\Mxchange\CoreFramework\Lists\Listable; // Import SPL stuff -use \InvalidArgumentException; +use \UnexpectedValueException; /** * A Self recipient @@ -66,21 +66,24 @@ class SelfRecipient extends BaseRecipient implements Recipient { * * @param $packageInstance An instance of a DeliverablePackage class * @param $listInstance An instance of a Listable class - * @return $resolved Resolved recipient or VOID if only the set list has been filled - * @throws FrameworkException Could throw different exceptions depending on implementation + * @return void + * @throws UnexpectedValueException If the returned UNL is empty */ public function resolveRecipientByPackageInstance (DeliverablePackage $packageInstance, Listable $listInstance) { // Determine IP or 'external_address' if set /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__, __LINE__)->debugOutput(sprintf('SELF-RECIPIENT: packageInstance=%s,listInstance=%s', $packageInstance->__toString(), $listInstance->__toString())); $unl = HubTools::determineOwnExternalAddress(); - // Is it not empty? + // Is it empty? /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__, __LINE__)->debugOutput(sprintf('SELF-RECIPIENT: unl(%d)=%s', strlen($unl), $unl)); - if (!empty($unl)) { - // Add it to the list - $listInstance->addEntry('unl', $unl); + if (empty($unl)) { + // Avoid adding those UNLs + throw new UnexpectedValueException('Returned UNL is empty'); } + // Add it to the list + $listInstance->addEntry('unl', $unl); + // Trace message /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__, __LINE__)->debugOutput('SELF-RECIPIENT: EXIT!'); } diff --git a/application/hub/classes/recipient/upper/class_UpperRecipient.php b/application/hub/classes/recipient/upper/class_UpperRecipient.php index c81c827cd..fa5760e9a 100644 --- a/application/hub/classes/recipient/upper/class_UpperRecipient.php +++ b/application/hub/classes/recipient/upper/class_UpperRecipient.php @@ -84,19 +84,17 @@ class UpperRecipient extends BaseRecipient implements Recipient { // Is maximum reached? if ($listInstance->count() == FrameworkBootstrap::getConfigurationInstance()->getConfigEntry('package_recipient_max_count')) { // Then stop adding more - /* DEBUG: */ self::createDebugInstance(__CLASS__, __LINE__)->debugOutput('UPPER-RECIPIENT: Going to abort at maximum of ' . FrameworkBootstrap::getConfigurationInstance()->getConfigEntry('package_recipient_max_count') . ' recipients!'); + /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__, __LINE__)->debugOutput('UPPER-RECIPIENT: Going to abort at maximum of ' . FrameworkBootstrap::getConfigurationInstance()->getConfigEntry('package_recipient_max_count') . ' recipients!'); break; } - // Debug message - /* DEBUG: */ self::createDebugInstance(__CLASS__, __LINE__)->debugOutput('UPPER-RECIPIENT: Adding node ' . print_r($unlData, TRUE) . ' as recipient.'); - // Add the entry + /* PRINTR-DEBUG: */ self::createDebugInstance(__CLASS__, __LINE__)->debugOutput('UPPER-RECIPIENT: Adding node ' . print_r($unlData, TRUE) . ' as recipient.'); $listInstance->addEntry('unl', $unlData); } // Trace message - /* DEBUG: */ self::createDebugInstance(__CLASS__, __LINE__)->debugOutput('UPPER-RECIPIENT: EXIT!'); + /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__, __LINE__)->debugOutput('UPPER-RECIPIENT: EXIT!'); } } diff --git a/application/hub/classes/source/class_BaseUrlSource.php b/application/hub/classes/source/class_BaseUrlSource.php index f27278fc7..cae9a475a 100644 --- a/application/hub/classes/source/class_BaseUrlSource.php +++ b/application/hub/classes/source/class_BaseUrlSource.php @@ -9,6 +9,9 @@ use Org\Shipsimu\Hub\Crawler\Source\BaseSource; use Org\Mxchange\CoreFramework\Factory\Stack\FileStackFactory; use Org\Mxchange\CoreFramework\Traits\Stack\StackableTrait; +// Import SPL stuff +use \InvalidArgumentException; + /** * A general URL source class * @@ -38,11 +41,6 @@ abstract class BaseUrlSource extends BaseSource { // Stack name for all URLs const STACKER_NAME_URLS = 'urls'; - // Array elements for CSV data array - const CRAWL_JOB_ARRAY_START_URL = 'start_url'; - const CRAWL_JOB_ARRAY_DEPTH = 'start_depth'; - const CRAWL_JOB_ARRAY_EXTERNAL_DEPTH = 'external_depth'; - /** * Protected constructor * @@ -60,10 +58,18 @@ abstract class BaseUrlSource extends BaseSource { * @param $prefix Prefix for this source * @param $sourceName Name of this source * @return void + * @throws InvalidArgumentException If a parameter is not valid */ - protected function initSource ($prefix, $sourceName) { - // Trace message + protected function initSource (string $prefix, string $sourceName) { + // Validate parameter /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__, __LINE__)->debugOutput('BASE-URL-SOURCE: prefix=' . $prefix . ',sourceName=' . $sourceName . ' - CALLED!'); + if (empty($prefix)) { + // Throw IAE + throw new InvalidArgumentException('Parameter "prefix" is empty'); + } elseif (empty($sourceName)) { + // Throw IAE + throw new InvalidArgumentException('Parameter "sourceName" is empty'); + } // Use another object factory $stackInstance = FileStackFactory::createFileStackInstance($prefix . '_url', $sourceName); @@ -82,9 +88,11 @@ abstract class BaseUrlSource extends BaseSource { */ public function isUrlStackEmpty () { // Determine it + /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__, __LINE__)->debugOutput('BASE-URL-SOURCE: CALLED!'); $isEmpty = $this->getStackInstance()->isStackEmpty(self::STACKER_NAME_URLS); // Return result + /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__, __LINE__)->debugOutput(sprintf('BASE-URL-SOURCE: isEmpty=%d - EXIT!', intval($isEmpty))); return $isEmpty; } @@ -97,15 +105,25 @@ abstract class BaseUrlSource extends BaseSource { * * @param $crawlData Array with partial data for being queued * @return void + * @throws InvalidArgumentException If a parameter is not valid * @todo ~1% done */ protected function enrichCrawlerQueueData (array &$crawlData) { - // Debug message - /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__, __LINE__)->debugOutput('BASE-URL-SOURCE: crawlData()=' . count($crawlData) . ' - CALLED!'); - // Check for minimum array elements - assert(isset($crawlData[self::CRAWL_JOB_ARRAY_START_URL])); - assert(isset($crawlData[self::CRAWL_JOB_ARRAY_DEPTH])); + /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__, __LINE__)->debugOutput(sprintf('BASE-URL-SOURCE: crawlData()=%d - CALLED!', count($crawlData))); + if (count($crawlData) == 0) { + // Throw IAE + throw new InvalidArgumentException('Parameter "crawlData" has no elements'); + } elseif (!isset($crawlData[UrlSource::CRAWL_JOB_ARRAY_START_URL])) { + // Throw IAE + throw new InvalidArgumentException(sprintf('crawlData()=%d does not contain element "%s"', count($crawlData), UrlSource::CRAWL_JOB_ARRAY_START_URL)); + } elseif (!isset($crawlData[UrlSource::CRAWL_JOB_ARRAY_START_DEPTH])) { + // Throw IAE + throw new InvalidArgumentException(sprintf('crawlData()=%d does not contain element "%s"', count($crawlData), UrlSource::CRAWL_JOB_ARRAY_START_DEPTH)); + } elseif (!isset($crawlData[UrlSource::CRAWL_JOB_ARRAY_EXTERNAL_DEPTH])) { + // Throw IAE + throw new InvalidArgumentException(sprintf('crawlData()=%d does not contain element "%s"', count($crawlData), UrlSource::CRAWL_JOB_ARRAY_EXTERNAL_DEPTH)); + } // @TODO Add more elements @@ -118,10 +136,15 @@ abstract class BaseUrlSource extends BaseSource { * * @param $crawlData Array with partial data for being queued * @return void + * @throws InvalidArgumentException If a parameter is not valid */ protected function enqueueInFileStack (array $crawlData) { - // Debug message - /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__, __LINE__)->debugOutput('BASE-URL-SOURCE: crawlData()=' . count($crawlData) . ' - CALLED!'); + // Validate parameter + /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__, __LINE__)->debugOutput(sprintf('BASE-URL-SOURCE: crawlData()=%d - CALLED!', count($crawlData))); + if (count($crawlData) == 0) { + // Throw IAE + throw new InvalidArgumentException('Parameter "crawlData" has no elements'); + } // Get the stack instance and enqueue it $this->getStackInstance()->pushNamed(self::STACKER_NAME_URLS, $crawlData); diff --git a/application/hub/classes/source/urls/class_CrawlerUploadedListUrlSource.php b/application/hub/classes/source/urls/class_CrawlerUploadedListUrlSource.php index 2da427c27..a3c48303c 100644 --- a/application/hub/classes/source/urls/class_CrawlerUploadedListUrlSource.php +++ b/application/hub/classes/source/urls/class_CrawlerUploadedListUrlSource.php @@ -230,9 +230,9 @@ class CrawlerUploadedListUrlSource extends BaseUrlSource implements UrlSource, R * column to the CSV file. */ $csvArray = [ - self::CRAWL_JOB_ARRAY_START_URL => $csvData[0], - self::CRAWL_JOB_ARRAY_DEPTH => $csvData[1], - self::CRAWL_JOB_ARRAY_EXTERNAL_DEPTH => $csvData[2] + UrlSource::CRAWL_JOB_ARRAY_START_URL => $csvData[0], + UrlSource::CRAWL_JOB_ARRAY_START_DEPTH => $csvData[1], + UrlSource::CRAWL_JOB_ARRAY_EXTERNAL_DEPTH => $csvData[2] ]; // Then add more data to it @@ -334,10 +334,10 @@ class CrawlerUploadedListUrlSource extends BaseUrlSource implements UrlSource, R /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__, __LINE__)->debugOutput(sprintf('CRAWLER-UPLOADED-LIST-URL-SOURCE: csvData[%s]=%s', gettype($csvData), print_r($csvData, TRUE))); if (count($csvData) == 0) { // Try to close it by actually unsetting (destructing) it - /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__, __LINE__)->debugOutput(sprintf('CRAWLER-UPLOADED-LIST-URL-SOURCE: File "%s" has been fully read.', $csvFileInstance->getFilename())); unset($csvFileInstance); // This file as been fully read, so don't push it back on stack. + /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__, __LINE__)->debugOutput(sprintf('CRAWLER-UPLOADED-LIST-URL-SOURCE: File "%s" has been fully read. - EXIT!', $csvFileInstance->getFilename())); return; } @@ -368,7 +368,7 @@ class CrawlerUploadedListUrlSource extends BaseUrlSource implements UrlSource, R $csvData = $this->getStackSourceInstance()->popNamed(self::STACK_NAME_CSV_ENTRY); // It must have a fixed amount of elements (see method parseCsvFile() for details) - /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__, __LINE__)->debugOutput('CRAWLER-UPLOADED-LIST-URL-SOURCE: csvData[' . gettype($csvData) . ']=' . print_r($csvData, TRUE)); + /* PRINTR-DEBUG: */ self::createDebugInstance(__CLASS__, __LINE__)->debugOutput('CRAWLER-UPLOADED-LIST-URL-SOURCE: csvData[' . gettype($csvData) . ']=' . print_r($csvData, TRUE)); assert(count($csvData) == self::CRAWL_ENTRY_SIZE); // Save it in crawler queue (which will enrich it with way more informations diff --git a/application/hub/interfaces/source/urls/class_UrlSource.php b/application/hub/interfaces/source/urls/class_UrlSource.php index 1ad7a3097..1fb816e4b 100644 --- a/application/hub/interfaces/source/urls/class_UrlSource.php +++ b/application/hub/interfaces/source/urls/class_UrlSource.php @@ -28,6 +28,11 @@ use Org\Shipsimu\Hub\Crawler\Source\Source; * along with this program. If not, see . */ interface UrlSource extends Source { + // Array elements for CSV data array + const CRAWL_JOB_ARRAY_START_URL = 'start_url'; + const CRAWL_JOB_ARRAY_START_DEPTH = 'start_depth'; + const CRAWL_JOB_ARRAY_EXTERNAL_DEPTH = 'external_depth'; + /** * Fills the URL stack with new entries from source *