]> git.mxchange.org Git - hub.git/blob - application/hub/main/source/urls/class_CrawlerUploadedListUrlSource.php
Continued:
[hub.git] / application / hub / main / source / urls / class_CrawlerUploadedListUrlSource.php
1 <?php
2 /**
3  * A UploadedList URL source class for crawlers
4  *
5  * @author              Roland Haeder <webmaster@ship-simu.org>
6  * @version             0.0.0
7  * @copyright   Copyright (c) 2014 Crawler Developer Team
8  * @license             GNU GPL 3.0 or any newer version
9  * @link                http://www.ship-simu.org
10  *
11  * This program is free software: you can redistribute it and/or modify
12  * it under the terms of the GNU General Public License as published by
13  * the Free Software Foundation, either version 3 of the License, or
14  * (at your option) any later version.
15  *
16  * This program is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19  * GNU General Public License for more details.
20  *
21  * You should have received a copy of the GNU General Public License
22  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
23  */
24 class CrawlerUploadedListUrlSource extends BaseUrlSource implements UrlSource, Registerable {
25         /**
26          * Cached path of CSV files
27          */
28         private $csvFilesPath = '';
29
30         /**
31          * Last found CSV file
32          */
33         private $lastCsvFile = '';
34
35         /**
36          * Protected constructor
37          *
38          * @return      void
39          */
40         protected function __construct () {
41                 // Call parent constructor
42                 parent::__construct(__CLASS__);
43
44                 // Set CSV files path
45                 $this->csvFilesPath = $this->getConfigInstance()->getConfigEntry('base_path') . '/' . $this->getConfigInstance()->getConfigEntry('crawler_csv_file_path');
46
47                 // Get directory instance
48                 $directoryInstance = ObjectFactory::createObjectByConfiguredName('directory_class', array($this->csvFilesPath));
49
50                 // Set it here
51                 $this->setDirectoryInstance($directoryInstance);
52         }
53
54         /**
55          * Checks whether a CSV file is found in configured path
56          *
57          * @return      $isFound        Whether a CSV file is found
58          */
59         private function isCsvFileFound () {
60                 // Is it valid?
61                 if (!$this->getDirectoryInstance()->getDirectoryIteratorInstance()->valid()) {
62                         // Rewind to start
63                         $this->getDirectoryInstance()->getDirectoryIteratorInstance()->rewind();
64                 } // END - if
65
66                 // Read next entry
67                 $directoryEntry = $this->getDirectoryInstance()->readDirectoryExcept(array('.htaccess', '.', '..'));
68
69                 // Debug message
70                 /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput('SOURCE[' . __METHOD__ . ':' . __LINE__ . '] directoryEntry(' . strlen($directoryEntry) . ')=' . $directoryEntry);
71
72                 // Is it empty or wrong file extension?
73                 if ((empty($directoryEntry)) || (substr($directoryEntry, -4, 4) != '.csv')) {
74                         // Skip further processing
75                         /* NOISY-DEBUG: */ self::createDebugInstance(__CLASS__)->debugOutput('SOURCE[' . __METHOD__ . ':' . __LINE__ . '] directoryEntry(' . strlen($directoryEntry) . ')=' . $directoryEntry . ' - SKIPPED!');
76                         return FALSE;
77                 } // END - if
78
79                 // Initialize CSV instance
80                 $csvFileInstance = ObjectFactory::createObjectByConfiguredName('csv_file_class', array($this->csvFilesPath . '/' . $directoryEntry));
81
82                 // Set it here
83                 $this->setCsvFileInstance($csvFileInstance);
84
85                 // Found a file
86                 return TRUE;
87         }
88
89         /**
90          * Creates an instance of this class
91          *
92          * @return      $sourceInstance         An instance of a Source class
93          */
94         public final static function createCrawlerUploadedListUrlSource () {
95                 // Get new instance
96                 $sourceInstance = new CrawlerUploadedListUrlSource();
97
98                 // Init source
99                 $sourceInstance->initSource('crawler', 'uploaded_list');
100
101                 // Get a ??? @TODO
102
103                 // Return the prepared instance
104                 return $sourceInstance;
105         }
106
107         /**
108          * Processes entries in the stack.
109          *
110          * @return      void
111          * @todo        ~10% done
112          */
113         public function processStack () {
114                 // Does the stack have some entries left?
115                 if (!$this->isUrlStackEmpty()) {
116                         /*
117                          * Handle next entry. This method will be called very often, so need
118                          * to process more than one entry at a time.
119                          */
120                         $this->processNextEntry();
121                 } elseif ($this->isCsvFileFound()) {
122                         /*
123                          * A file containing an URL list is found. Please note the format is
124                          * CSV-like as you may wish to provide meta data such as crawl
125                          * depth, handling of 3rd-party URLs and such.
126                          */
127                         $this->importCsvFile();
128                 }
129
130                 $this->partialStub('Please implement this method.');
131         }
132 }
133
134 // [EOF]
135 ?>