]> git.mxchange.org Git - simgear.git/blob - simgear/package/untar.cxx
ee8b717806d632486f4814b668de8ab8fbc576e7
[simgear.git] / simgear / package / untar.cxx
1 // Copyright (C) 2016  James Turner - <zakalawe@mac.com>
2 //
3 // This library is free software; you can redistribute it and/or
4 // modify it under the terms of the GNU Library General Public
5 // License as published by the Free Software Foundation; either
6 // version 2 of the License, or (at your option) any later version.
7 //
8 // This library is distributed in the hope that it will be useful,
9 // but WITHOUT ANY WARRANTY; without even the implied warranty of
10 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11 // Library General Public License for more details.
12 //
13 // You should have received a copy of the GNU General Public License
14 // along with this program; if not, write to the Free Software
15 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
16 //
17
18 #include "untar.hxx"
19
20 #include <cstdlib>
21 #include <cassert>
22 #include <stdint.h>
23 #include <cstring>
24 #include <cstddef>
25
26 #include <zlib.h>
27
28 #include <simgear/io/sg_file.hxx>
29 #include <simgear/misc/sg_dir.hxx>
30
31 #include <simgear/debug/logstream.hxx>
32
33 namespace simgear
34 {
35
36 namespace pkg
37 {
38
39     const int ZLIB_DECOMPRESS_BUFFER_SIZE = 32 * 1024;
40     const int ZLIB_INFLATE_WINDOW_BITS = MAX_WBITS;
41     const int ZLIB_DECODE_GZIP_HEADER = 16;
42
43 /* tar Header Block, from POSIX 1003.1-1990.  */
44
45 typedef struct
46 {
47     char fileName[100];
48     char mode[8];                 /* 100 */
49     char uid[8];                  /* 108 */
50     char gid[8];                  /* 116 */
51     char size[12];                /* 124 */
52     char mtime[12];               /* 136 */
53     char chksum[8];               /* 148 */
54     char typeflag;                /* 156 */
55     char linkname[100];           /* 157 */
56     char magic[6];                /* 257 */
57     char version[2];              /* 263 */
58     char uname[32];               /* 265 */
59     char gname[32];               /* 297 */
60     char devmajor[8];             /* 329 */
61     char devminor[8];             /* 337 */
62     char prefix[155];             /* 345 */
63 } UstarHeaderBlock;
64
65     const size_t TAR_HEADER_BLOCK_SIZE = 512;
66
67 #define TMAGIC   "ustar"        /* ustar and a null */
68 #define TMAGLEN  6
69 #define TVERSION "00"           /* 00 and no null */
70 #define TVERSLEN 2
71
72     /* Values used in typeflag field.  */
73 #define REGTYPE  '0'            /* regular file */
74 #define AREGTYPE '\0'           /* regular file */
75 #define LNKTYPE  '1'            /* link */
76 #define SYMTYPE  '2'            /* reserved */
77 #define CHRTYPE  '3'            /* character special */
78 #define BLKTYPE  '4'            /* block special */
79 #define DIRTYPE  '5'            /* directory */
80 #define FIFOTYPE '6'            /* FIFO special */
81 #define CONTTYPE '7'            /* reserved */
82
83 class TarExtractorPrivate
84 {
85 public:
86     typedef enum {
87         INVALID = 0,
88         READING_HEADER,
89         READING_FILE,
90         READING_PADDING,
91         PRE_END_OF_ARCHVE,
92         END_OF_ARCHIVE,
93         ERROR_STATE, ///< states above this are error conditions
94         BAD_ARCHIVE,
95         BAD_DATA
96     } State;
97
98     SGPath path;
99     State state;
100     union {
101         UstarHeaderBlock header;
102         uint8_t headerBytes[TAR_HEADER_BLOCK_SIZE];
103     };
104
105     size_t bytesRemaining;
106     std::auto_ptr<SGFile> currentFile;
107     size_t currentFileSize;
108     z_stream zlibStream;
109     uint8_t* zlibOutput;
110     bool haveInitedZLib;
111     uint8_t* headerPtr;
112
113     TarExtractorPrivate() :
114         haveInitedZLib(false)
115     {
116     }
117
118     ~TarExtractorPrivate()
119     {
120         free(zlibOutput);
121     }
122
123     void checkEndOfState()
124     {
125         if (bytesRemaining > 0) {
126             return;
127         }
128
129         if (state == READING_FILE) {
130             currentFile->close();
131             size_t pad = currentFileSize % TAR_HEADER_BLOCK_SIZE;
132             if (pad) {
133                 bytesRemaining = TAR_HEADER_BLOCK_SIZE - pad;
134                 setState(READING_PADDING);
135             } else {
136                 setState(READING_HEADER);
137             }
138         } else if (state == READING_HEADER) {
139             processHeader();
140         } else if (state == PRE_END_OF_ARCHVE) {
141             if (headerIsAllZeros()) {
142                 setState(END_OF_ARCHIVE);
143             } else {
144                 // what does the spec say here?
145             }
146         } else if (state == READING_PADDING) {
147             setState(READING_HEADER);
148         }
149     }
150
151     void setState(State newState)
152     {
153         if ((newState == READING_HEADER) || (newState == PRE_END_OF_ARCHVE)) {
154             bytesRemaining = TAR_HEADER_BLOCK_SIZE;
155             headerPtr = headerBytes;
156         }
157
158         state = newState;
159     }
160
161     void processHeader()
162     {
163         if (headerIsAllZeros()) {
164             if (state == PRE_END_OF_ARCHVE) {
165                 setState(END_OF_ARCHIVE);
166             } else {
167                 setState(PRE_END_OF_ARCHVE);
168             }
169             return;
170         }
171
172         if (strncmp(header.magic, TMAGIC, TMAGLEN) != 0) {
173             SG_LOG(SG_IO, SG_WARN, "magic is wrong");
174             state = BAD_ARCHIVE;
175             return;
176         }
177
178         std::string tarPath = std::string(header.prefix) + std::string(header.fileName);
179
180         if (!isSafePath(tarPath)) {
181             //state = BAD_ARCHIVE;
182             SG_LOG(SG_IO, SG_WARN, "bad tar path:" << tarPath);
183             //return;
184         }
185
186         SGPath p = path;
187         p.append(tarPath);
188
189         if (header.typeflag == DIRTYPE) {
190             Dir dir(p);
191             dir.create(0755);
192             setState(READING_HEADER);
193         } else if ((header.typeflag == REGTYPE) || (header.typeflag == AREGTYPE)) {
194             currentFileSize = ::strtol(header.size, NULL, 8);
195             bytesRemaining = currentFileSize;
196             currentFile.reset(new SGBinaryFile(p.str()));
197             currentFile->open(SG_IO_OUT);
198             setState(READING_FILE);
199         } else {
200             SG_LOG(SG_IO, SG_WARN, "Unsupported tar file type:" << header.typeflag);
201             state = BAD_ARCHIVE;
202         }
203     }
204
205     void processBytes(const char* bytes, size_t count)
206     {
207         if ((state >= ERROR_STATE) || (state == END_OF_ARCHIVE)) {
208             return;
209         }
210
211         size_t curBytes = std::min(bytesRemaining, count);
212         if (state == READING_FILE) {
213             currentFile->write(bytes, curBytes);
214             bytesRemaining -= curBytes;
215         } else if ((state == READING_HEADER) || (state == PRE_END_OF_ARCHVE) || (state == END_OF_ARCHIVE)) {
216             memcpy(headerPtr, bytes, curBytes);
217             bytesRemaining -= curBytes;
218             headerPtr += curBytes;
219         } else if (state == READING_PADDING) {
220             bytesRemaining -= curBytes;
221         }
222
223         checkEndOfState();
224         if (count > curBytes) {
225             // recurse with unprocessed bytes
226             processBytes(bytes + curBytes, count - curBytes);
227         }
228     }
229
230     bool headerIsAllZeros() const
231     {
232         char* headerAsChar = (char*) &header;
233         for (size_t i=0; i < offsetof(UstarHeaderBlock, magic); ++i) {
234             if (*headerAsChar++ != 0) {
235                 return false;
236             }
237         }
238
239         return true;
240     }
241
242     bool isSafePath(const std::string& p) const
243     {
244         if (p.empty()) {
245             return false;
246         }
247
248         // reject absolute paths
249         if (p.at(0) == '/') {
250             return false;
251         }
252
253         // reject paths containing '..'
254         size_t doubleDot = p.find("..");
255         if (doubleDot != std::string::npos) {
256             return false;
257         }
258
259         // on POSIX could use realpath to sanity check
260         return true;
261     }
262 };
263
264 TarExtractor::TarExtractor(const SGPath& rootPath) :
265     d(new TarExtractorPrivate)
266 {
267
268     d->path = rootPath;
269     d->state = TarExtractorPrivate::INVALID;
270
271     memset(&d->zlibStream, 0, sizeof(z_stream));
272     d->zlibOutput = (unsigned char*) malloc(ZLIB_DECOMPRESS_BUFFER_SIZE);
273     d->zlibStream.zalloc = Z_NULL;
274     d->zlibStream.zfree = Z_NULL;
275
276     d->zlibStream.avail_out = ZLIB_DECOMPRESS_BUFFER_SIZE;
277     d->zlibStream.next_out = d->zlibOutput;
278 }
279
280 void TarExtractor::extractBytes(const char* bytes, size_t count)
281 {
282     if (d->state >= TarExtractorPrivate::ERROR_STATE) {
283         return;
284     }
285
286     d->zlibStream.next_in = (uint8_t*) bytes;
287     d->zlibStream.avail_in = count;
288
289     if (!d->haveInitedZLib) {
290         if (inflateInit2(&d->zlibStream, ZLIB_INFLATE_WINDOW_BITS | ZLIB_DECODE_GZIP_HEADER) != Z_OK) {
291             SG_LOG(SG_IO, SG_WARN, "inflateInit2 failed");
292             d->state = TarExtractorPrivate::BAD_DATA;
293             return;
294         } else {
295             d->haveInitedZLib = true;
296             d->setState(TarExtractorPrivate::READING_HEADER);
297         }
298     }
299     
300     size_t writtenSize;
301
302     // loop, running zlib() inflate and sending output bytes to
303     // our request body handler. Keep calling inflate until no bytes are
304     // written, and ZLIB has consumed all available input
305     do {
306         d->zlibStream.next_out = d->zlibOutput;
307         d->zlibStream.avail_out = ZLIB_DECOMPRESS_BUFFER_SIZE;
308         int result = inflate(&d->zlibStream, Z_NO_FLUSH);
309         if (result == Z_OK || result == Z_STREAM_END) {
310             // nothing to do
311
312         } else if (result == Z_BUF_ERROR) {
313             // transient error, fall through
314         } else {
315             //  _error = result;
316             SG_LOG(SG_IO, SG_WARN, "Permanent ZLib error:" << d->zlibStream.msg);
317             d->state = TarExtractorPrivate::BAD_DATA;
318             return;
319         }
320
321         writtenSize = ZLIB_DECOMPRESS_BUFFER_SIZE - d->zlibStream.avail_out;
322         if (writtenSize > 0) {
323             d->processBytes((const char*) d->zlibOutput, writtenSize);
324         }
325
326         if (result == Z_STREAM_END) {
327             break;
328         }
329     } while ((d->zlibStream.avail_in > 0) || (writtenSize > 0));
330 }
331
332 bool TarExtractor::isAtEndOfArchive() const
333 {
334     return (d->state == TarExtractorPrivate::END_OF_ARCHIVE);
335 }
336
337 bool TarExtractor::hasError() const
338 {
339     return (d->state >= TarExtractorPrivate::ERROR_STATE);
340 }
341
342 } // of pkg
343
344 } // of simgear