]> git.mxchange.org Git - simgear.git/blob - simgear/io/HTTPRepository.cxx
AbstractRepository interface removed.
[simgear.git] / simgear / io / HTTPRepository.cxx
1 // HTTPRepository.cxx -- plain HTTP TerraSync remote client
2 //
3 // Copyright (C) 20126  James Turner <zakalawe@mac.com>
4 //
5 // This program is free software; you can redistribute it and/or
6 // modify it under the terms of the GNU General Public License as
7 // published by the Free Software Foundation; either version 2 of the
8 // License, or (at your option) any later version.
9 //
10 // This program is distributed in the hope that it will be useful, but
11 // WITHOUT ANY WARRANTY; without even the implied warranty of
12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13 // General Public License for more details.
14 //
15 // You should have received a copy of the GNU General Public License
16 // along with this program; if not, write to the Free Software
17 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
18
19 #include "HTTPRepository.hxx"
20
21 #include <simgear_config.h>
22
23 #include <iostream>
24 #include <cassert>
25 #include <algorithm>
26 #include <sstream>
27 #include <map>
28 #include <set>
29 #include <fstream>
30 #include <limits>
31 #include <cstdlib>
32
33 #include <fcntl.h>
34
35 #include "simgear/debug/logstream.hxx"
36 #include "simgear/misc/strutils.hxx"
37 #include <simgear/misc/sg_dir.hxx>
38 #include <simgear/io/HTTPClient.hxx>
39 #include <simgear/io/sg_file.hxx>
40 #include <simgear/misc/sgstream.hxx>
41 #include <simgear/structure/exception.hxx>
42 #include <simgear/timing/timestamp.hxx>
43
44 #include <simgear/misc/sg_hash.hxx>
45
46 namespace simgear
47 {
48
49     class HTTPDirectory;
50
51     class HTTPRepoGetRequest : public HTTP::Request
52     {
53     public:
54         HTTPRepoGetRequest(HTTPDirectory* d, const std::string& u) :
55             HTTP::Request(u),
56             _directory(d)
57         {
58         }
59
60         virtual void cancel();
61
62         size_t contentSize() const
63         {
64             return _contentSize;
65         }
66
67         void setContentSize(size_t sz)
68         {
69             _contentSize = sz;
70         }
71     protected:
72         HTTPDirectory* _directory;
73         size_t _contentSize;
74     };
75
76     typedef SGSharedPtr<HTTPRepoGetRequest> RepoRequestPtr;
77
78 class HTTPRepoPrivate
79 {
80 public:
81     struct HashCacheEntry
82     {
83         std::string filePath;
84         time_t modTime;
85         size_t lengthBytes;
86         std::string hashHex;
87
88     };
89
90     typedef std::vector<HashCacheEntry> HashCache;
91     HashCache hashes;
92     bool hashCacheDirty;
93
94     struct Failure
95     {
96         SGPath path;
97         HTTPRepository::ResultCode error;
98     };
99
100     typedef std::vector<Failure> FailureList;
101     FailureList failures;
102
103     HTTPRepoPrivate(HTTPRepository* parent) :
104         hashCacheDirty(false),
105         p(parent),
106         isUpdating(false),
107         status(HTTPRepository::REPO_NO_ERROR),
108         totalDownloaded(0)
109     { ; }
110
111     ~HTTPRepoPrivate();
112
113     HTTPRepository* p; // link back to outer
114     HTTP::Client* http;
115     std::string baseUrl;
116     SGPath basePath;
117     bool isUpdating;
118     HTTPRepository::ResultCode status;
119     HTTPDirectory* rootDir;
120     size_t totalDownloaded;
121
122     HTTP::Request_ptr updateFile(HTTPDirectory* dir, const std::string& name,
123                                  size_t sz);
124     HTTP::Request_ptr updateDir(HTTPDirectory* dir, const std::string& hash,
125                                 size_t sz);
126
127     std::string hashForPath(const SGPath& p);
128     void updatedFileContents(const SGPath& p, const std::string& newHash);
129     void parseHashCache();
130     std::string computeHashForPath(const SGPath& p);
131     void writeHashCache();
132
133     void failedToGetRootIndex(HTTPRepository::ResultCode st);
134     void failedToUpdateChild(const SGPath& relativePath,
135                              HTTPRepository::ResultCode fileStatus);
136
137     typedef std::vector<RepoRequestPtr> RequestVector;
138     RequestVector queuedRequests,
139         activeRequests;
140
141     void makeRequest(RepoRequestPtr req);
142     void finishedRequest(const RepoRequestPtr& req);
143
144     HTTPDirectory* getOrCreateDirectory(const std::string& path);
145     bool deleteDirectory(const std::string& path);
146
147     typedef std::vector<HTTPDirectory*> DirectoryVector;
148     DirectoryVector directories;
149
150 };
151
152 class HTTPDirectory
153 {
154     struct ChildInfo
155     {
156         enum Type
157         {
158             FileType,
159             DirectoryType
160         };
161
162         ChildInfo(Type ty, const std::string & nameData, const std::string & hashData) :
163             type(ty),
164             name(nameData),
165             hash(hashData),
166             sizeInBytes(0)
167         {
168         }
169
170         ChildInfo(const ChildInfo& other) :
171             type(other.type),
172             name(other.name),
173             hash(other.hash),
174             sizeInBytes(other.sizeInBytes)
175         { }
176
177         void setSize(const std::string & sizeData)
178         {
179             sizeInBytes = ::strtol(sizeData.c_str(), NULL, 10);
180         }
181
182         bool operator<(const ChildInfo& other) const
183         {
184             return name < other.name;
185         }
186
187         Type type;
188         std::string name, hash;
189         size_t sizeInBytes;
190     };
191
192     typedef std::vector<ChildInfo> ChildInfoList;
193     ChildInfoList children;
194
195 public:
196     HTTPDirectory(HTTPRepoPrivate* repo, const std::string& path) :
197         _repository(repo),
198         _relativePath(path)
199   {
200       assert(repo);
201
202       SGPath p(absolutePath());
203       if (p.exists()) {
204           try {
205               // already exists on disk
206               parseDirIndex(children);
207               std::sort(children.begin(), children.end());
208           } catch (sg_exception& ) {
209               // parsing cache failed
210               children.clear();
211           }
212       }
213   }
214
215     HTTPRepoPrivate* repository() const
216     {
217         return _repository;
218     }
219
220     std::string url() const
221     {
222         if (_relativePath.str().empty()) {
223             return _repository->baseUrl;
224         }
225
226         return _repository->baseUrl + "/" + _relativePath.str();
227     }
228
229     void dirIndexUpdated(const std::string& hash)
230     {
231         SGPath fpath(_relativePath);
232         fpath.append(".dirindex");
233         _repository->updatedFileContents(fpath, hash);
234
235         children.clear();
236         parseDirIndex(children);
237         std::sort(children.begin(), children.end());
238     }
239
240     void failedToUpdate(HTTPRepository::ResultCode status)
241     {
242         if (_relativePath.isNull()) {
243             // root dir failed
244             _repository->failedToGetRootIndex(status);
245         } else {
246             _repository->failedToUpdateChild(_relativePath, status);
247         }
248     }
249
250     void updateChildrenBasedOnHash()
251     {
252         //SG_LOG(SG_TERRASYNC, SG_DEBUG, "updated children for:" << relativePath());
253
254         string_list indexNames = indexChildren(),
255             toBeUpdated, orphans;
256         simgear::Dir d(absolutePath());
257         PathList fsChildren = d.children(0);
258         PathList::const_iterator it = fsChildren.begin();
259
260
261         for (; it != fsChildren.end(); ++it) {
262             ChildInfo info(it->isDir() ? ChildInfo::DirectoryType : ChildInfo::FileType,
263                            it->file(), "");
264             std::string hash = hashForChild(info);
265
266             ChildInfoList::iterator c = findIndexChild(it->file());
267             if (c == children.end()) {
268                 SG_LOG(SG_TERRASYNC, SG_DEBUG, "is orphan '" << it->file() << "'" );
269                 orphans.push_back(it->file());
270             } else if (c->hash != hash) {
271                 SG_LOG(SG_TERRASYNC, SG_DEBUG, "hash mismatch'" << it->file() );
272                 // file exists, but hash mismatch, schedule update
273                 if (!hash.empty()) {
274                     SG_LOG(SG_TERRASYNC, SG_DEBUG, "file exists but hash is wrong for:" << it->file() );
275                     SG_LOG(SG_TERRASYNC, SG_DEBUG, "on disk:" << hash << " vs in info:" << c->hash);
276                 }
277
278                 toBeUpdated.push_back(it->file() );
279             } else {
280                 // file exists and hash is valid. If it's a directory,
281                 // perform a recursive check.
282                 SG_LOG(SG_TERRASYNC, SG_DEBUG, "file exists hash is good:" << it->file() );
283                 if (c->type == ChildInfo::DirectoryType) {
284                     SGPath p(relativePath());
285                     p.append(it->file());
286                     HTTPDirectory* childDir = _repository->getOrCreateDirectory(p.str());
287                     childDir->updateChildrenBasedOnHash();
288                 }
289             }
290
291             // remove existing file system children from the index list,
292             // so we can detect new children
293             // https://en.wikibooks.org/wiki/More_C%2B%2B_Idioms/Erase-Remove
294             indexNames.erase(std::remove(indexNames.begin(), indexNames.end(), it->file()), indexNames.end());
295         } // of real children iteration
296
297         // all remaining names in indexChilden are new children
298         toBeUpdated.insert(toBeUpdated.end(), indexNames.begin(), indexNames.end());
299
300         removeOrphans(orphans);
301         scheduleUpdates(toBeUpdated);
302     }
303
304     void removeOrphans(const string_list& orphans)
305     {
306         string_list::const_iterator it;
307         for (it = orphans.begin(); it != orphans.end(); ++it) {
308             removeChild(*it);
309         }
310     }
311
312     string_list indexChildren() const
313     {
314         string_list r;
315         r.reserve(children.size());
316         ChildInfoList::const_iterator it;
317         for (it=children.begin(); it != children.end(); ++it) {
318             r.push_back(it->name);
319         }
320         return r;
321     }
322
323     void scheduleUpdates(const string_list& names)
324     {
325         string_list::const_iterator it;
326         for (it = names.begin(); it != names.end(); ++it) {
327             ChildInfoList::iterator cit = findIndexChild(*it);
328             if (cit == children.end()) {
329                 SG_LOG(SG_TERRASYNC, SG_WARN, "scheduleUpdate, unknown child:" << *it);
330                 continue;
331             }
332
333             SG_LOG(SG_TERRASYNC,SG_DEBUG, "scheduling update for " << *it );
334             if (cit->type == ChildInfo::FileType) {
335                 _repository->updateFile(this, *it, cit->sizeInBytes);
336             } else {
337                 SGPath p(relativePath());
338                 p.append(*it);
339                 HTTPDirectory* childDir = _repository->getOrCreateDirectory(p.str());
340                 _repository->updateDir(childDir, cit->hash, cit->sizeInBytes);
341             }
342         }
343     }
344
345     SGPath absolutePath() const
346     {
347         SGPath r(_repository->basePath);
348         r.append(_relativePath.str());
349         return r;
350     }
351
352     SGPath relativePath() const
353     {
354         return _relativePath;
355     }
356
357     void didUpdateFile(const std::string& file, const std::string& hash, size_t sz)
358     {
359         // check hash matches what we expected
360         ChildInfoList::iterator it = findIndexChild(file);
361         if (it == children.end()) {
362             SG_LOG(SG_TERRASYNC, SG_WARN, "updated file but not found in dir:" << _relativePath << " " << file);
363         } else {
364             SGPath fpath(_relativePath);
365             fpath.append(file);
366
367             if (it->hash != hash) {
368                 _repository->failedToUpdateChild(_relativePath, HTTPRepository::REPO_ERROR_CHECKSUM);
369             } else {
370                 _repository->updatedFileContents(fpath, hash);
371                 _repository->totalDownloaded += sz;
372                 //SG_LOG(SG_TERRASYNC, SG_INFO, "did update:" << fpath);
373             } // of hash matches
374         } // of found in child list
375     }
376
377     void didFailToUpdateFile(const std::string& file,
378                              HTTPRepository::ResultCode status)
379     {
380         SGPath fpath(_relativePath);
381         fpath.append(file);
382         _repository->failedToUpdateChild(fpath, status);
383     }
384 private:
385
386     struct ChildWithName
387     {
388         ChildWithName(const std::string& n) : name(n) {}
389         std::string name;
390
391         bool operator()(const ChildInfo& info) const
392         { return info.name == name; }
393     };
394
395     ChildInfoList::iterator findIndexChild(const std::string& name)
396     {
397         return std::find_if(children.begin(), children.end(), ChildWithName(name));
398     }
399
400     bool parseDirIndex(ChildInfoList& children)
401     {
402         SGPath p(absolutePath());
403         p.append(".dirindex");
404         if (!p.exists()) {
405             return false;
406         }
407
408         std::ifstream indexStream( p.c_str(), std::ios::in );
409
410         if ( !indexStream.is_open() ) {
411             throw sg_io_exception("cannot open dirIndex file", p);
412         }
413
414         while (!indexStream.eof() ) {
415             std::string line;
416             std::getline( indexStream, line );
417             line = simgear::strutils::strip(line);
418
419             // skip blank line or comment beginning with '#'
420             if( line.empty() || line[0] == '#' )
421                 continue;
422
423             string_list tokens = simgear::strutils::split( line, ":" );
424
425             std::string typeData = tokens[0];
426
427             if( typeData == "version" ) {
428                 if( tokens.size() < 2 ) {
429                     SG_LOG(SG_TERRASYNC, SG_WARN, "malformed .dirindex file: missing version number in line '" << line << "'" );
430                     break;
431                 }
432                 if( tokens[1] != "1" ) {
433                     SG_LOG(SG_TERRASYNC, SG_WARN, "invalid .dirindex file: wrong version number '" << tokens[1] << "' (expected 1)" );
434                     break;
435                 }
436                 continue; // version is good, continue
437             }
438
439             if( typeData == "path" ) {
440                 continue; // ignore path, next line
441             }
442
443             if( tokens.size() < 3 ) {
444                 SG_LOG(SG_TERRASYNC, SG_WARN, "malformed .dirindex file: not enough tokens in line '" << line << "' (ignoring line)" );
445                 continue;
446             }
447
448             if (typeData != "f" && typeData != "d" ) {
449                 SG_LOG(SG_TERRASYNC, SG_WARN, "malformed .dirindex file: invalid type in line '" << line << "', expected 'd' or 'f', (ignoring line)" );
450                 continue;
451             }
452             children.push_back(ChildInfo(typeData == "f" ? ChildInfo::FileType : ChildInfo::DirectoryType, tokens[1], tokens[2]));
453
454             if (tokens.size() > 3) {
455                 children.back().setSize(tokens[3]);
456             }
457         }
458
459         return true;
460     }
461
462     void removeChild(const std::string& name)
463     {
464         SGPath p(absolutePath());
465         p.append(name);
466         bool ok;
467
468         SGPath fpath(_relativePath);
469         fpath.append(name);
470
471         if (p.isDir()) {
472             ok = _repository->deleteDirectory(fpath.str());
473         } else {
474             // remove the hash cache entry
475             _repository->updatedFileContents(fpath, std::string());
476             ok = p.remove();
477         }
478
479         if (!ok) {
480             SG_LOG(SG_TERRASYNC, SG_WARN, "removal failed for:" << p);
481             throw sg_io_exception("Failed to remove existing file/dir:", p);
482         }
483     }
484
485     std::string hashForChild(const ChildInfo& child) const
486     {
487         SGPath p(absolutePath());
488         p.append(child.name);
489         if (child.type == ChildInfo::DirectoryType) {
490             p.append(".dirindex");
491         }
492         return _repository->hashForPath(p);
493     }
494
495   HTTPRepoPrivate* _repository;
496   SGPath _relativePath; // in URL and file-system space
497
498
499 };
500
501 HTTPRepository::HTTPRepository(const SGPath& base, HTTP::Client *cl) :
502     _d(new HTTPRepoPrivate(this))
503 {
504     _d->http = cl;
505     _d->basePath = base;
506     _d->rootDir = new HTTPDirectory(_d.get(), "");
507     _d->parseHashCache();
508 }
509
510 HTTPRepository::~HTTPRepository()
511 {
512 }
513
514 void HTTPRepository::setBaseUrl(const std::string &url)
515 {
516   _d->baseUrl = url;
517 }
518
519 std::string HTTPRepository::baseUrl() const
520 {
521   return _d->baseUrl;
522 }
523
524 HTTP::Client* HTTPRepository::http() const
525 {
526   return _d->http;
527 }
528
529 SGPath HTTPRepository::fsBase() const
530 {
531   return SGPath();
532 }
533
534 void HTTPRepository::update()
535 {
536     if (_d->isUpdating) {
537         return;
538     }
539
540     _d->status = REPO_NO_ERROR;
541     _d->isUpdating = true;
542     _d->failures.clear();
543     _d->updateDir(_d->rootDir, std::string(), 0);
544 }
545
546 bool HTTPRepository::isDoingSync() const
547 {
548     if (_d->status != REPO_NO_ERROR) {
549         return false;
550     }
551
552     return _d->isUpdating;
553 }
554
555 size_t HTTPRepository::bytesToDownload() const
556 {
557     size_t result = 0;
558
559     HTTPRepoPrivate::RequestVector::const_iterator r;
560     for (r = _d->queuedRequests.begin(); r != _d->queuedRequests.end(); ++r) {
561         result += (*r)->contentSize();
562     }
563
564     for (r = _d->activeRequests.begin(); r != _d->activeRequests.end(); ++r) {
565         result += (*r)->contentSize() - (*r)->responseBytesReceived();
566     }
567
568     return result;
569 }
570
571 size_t HTTPRepository::bytesDownloaded() const
572 {
573     size_t result = _d->totalDownloaded;
574
575     HTTPRepoPrivate::RequestVector::const_iterator r;
576     for (r = _d->activeRequests.begin(); r != _d->activeRequests.end(); ++r) {
577         result += (*r)->responseBytesReceived();
578     }
579
580     return result;
581 }
582
583 HTTPRepository::ResultCode
584 HTTPRepository::failure() const
585 {
586     if ((_d->status == REPO_NO_ERROR) && !_d->failures.empty()) {
587         return REPO_PARTIAL_UPDATE;
588     }
589
590     return _d->status;
591 }
592
593     void HTTPRepoGetRequest::cancel()
594     {
595         _directory->repository()->http->cancelRequest(this, "Reposiotry cancelled");
596         _directory = 0;
597     }
598
599     class FileGetRequest : public HTTPRepoGetRequest
600     {
601     public:
602         FileGetRequest(HTTPDirectory* d, const std::string& file) :
603             HTTPRepoGetRequest(d, makeUrl(d, file)),
604             fileName(file)
605         {
606             pathInRepo = _directory->absolutePath();
607             pathInRepo.append(fileName);
608             //SG_LOG(SG_TERRASYNC, SG_INFO, "will GET file " << url());
609         }
610
611     protected:
612         virtual void gotBodyData(const char* s, int n)
613         {
614             if (!file.get()) {
615                 file.reset(new SGBinaryFile(pathInRepo.str()));
616                 if (!file->open(SG_IO_OUT)) {
617                   SG_LOG(SG_TERRASYNC, SG_WARN, "unable to create file " << pathInRepo);
618                   _directory->repository()->http->cancelRequest(this, "Unable to create output file");
619                 }
620
621                 sha1_init(&hashContext);
622             }
623
624             sha1_write(&hashContext, s, n);
625             file->write(s, n);
626         }
627
628         virtual void onDone()
629         {
630             file->close();
631             if (responseCode() == 200) {
632                 std::string hash = strutils::encodeHex(sha1_result(&hashContext), HASH_LENGTH);
633                 _directory->didUpdateFile(fileName, hash, contentSize());
634                 SG_LOG(SG_TERRASYNC, SG_DEBUG, "got file " << fileName << " in " << _directory->absolutePath());
635             } else if (responseCode() == 404) {
636                 SG_LOG(SG_TERRASYNC, SG_WARN, "terrasync file not found on server: " << fileName << " for " << _directory->absolutePath());
637                 _directory->didFailToUpdateFile(fileName, HTTPRepository::REPO_ERROR_FILE_NOT_FOUND);
638             } else {
639                 SG_LOG(SG_TERRASYNC, SG_WARN, "terrasync file download error on server: " << fileName << " for " << _directory->absolutePath() << ": " << responseCode() );
640                 _directory->didFailToUpdateFile(fileName, HTTPRepository::REPO_ERROR_HTTP);
641             }
642
643             _directory->repository()->finishedRequest(this);
644         }
645
646         virtual void onFail()
647         {
648             file.reset();
649             if (pathInRepo.exists()) {
650                 pathInRepo.remove();
651             }
652             
653             if (_directory) {
654                 _directory->didFailToUpdateFile(fileName, HTTPRepository::REPO_ERROR_SOCKET);
655                 _directory->repository()->finishedRequest(this);
656             }
657         }
658     private:
659         static std::string makeUrl(HTTPDirectory* d, const std::string& file)
660         {
661             return d->url() + "/" + file;
662         }
663
664         std::string fileName; // if empty, we're getting the directory itself
665         SGPath pathInRepo;
666         simgear::sha1nfo hashContext;
667         std::auto_ptr<SGBinaryFile> file;
668     };
669
670     class DirGetRequest : public HTTPRepoGetRequest
671     {
672     public:
673         DirGetRequest(HTTPDirectory* d, const std::string& targetHash) :
674             HTTPRepoGetRequest(d, makeUrl(d)),
675             _isRootDir(false),
676             _targetHash(targetHash)
677         {
678             sha1_init(&hashContext);
679            //SG_LOG(SG_TERRASYNC, SG_INFO, "will GET dir " << url());
680         }
681
682         void setIsRootDir()
683         {
684             _isRootDir = true;
685         }
686
687         bool isRootDir() const
688         {
689             return _isRootDir;
690         }
691
692     protected:
693         virtual void gotBodyData(const char* s, int n)
694         {
695             body += std::string(s, n);
696             sha1_write(&hashContext, s, n);
697         }
698
699         virtual void onDone()
700         {
701             if (responseCode() == 200) {
702                 std::string hash = strutils::encodeHex(sha1_result(&hashContext), HASH_LENGTH);
703                 if (!_targetHash.empty() && (hash != _targetHash)) {
704                     _directory->failedToUpdate(HTTPRepository::REPO_ERROR_CHECKSUM);
705                     _directory->repository()->finishedRequest(this);
706                     return;
707                 }
708
709                 std::string curHash = _directory->repository()->hashForPath(path());
710                 if (hash != curHash) {
711                     simgear::Dir d(_directory->absolutePath());
712                     if (!d.exists()) {
713                         if (!d.create(0700)) {
714                             throw sg_io_exception("Unable to create directory", d.path());
715                         }
716                     }
717
718                     // dir index data has changed, so write to disk and update
719                     // the hash accordingly
720                     std::ofstream of(pathInRepo().c_str(), std::ios::trunc | std::ios::out);
721                     if (!of.is_open()) {
722                         throw sg_io_exception("Failed to open directory index file for writing", pathInRepo().c_str());
723                     }
724
725                     of.write(body.data(), body.size());
726                     of.close();
727                     _directory->dirIndexUpdated(hash);
728
729                     //SG_LOG(SG_TERRASYNC, SG_INFO, "updated dir index " << _directory->absolutePath());
730                 }
731
732                 _directory->repository()->totalDownloaded += contentSize();
733
734                 try {
735                     // either way we've confirmed the index is valid so update
736                     // children now
737                     SGTimeStamp st;
738                     st.stamp();
739                     _directory->updateChildrenBasedOnHash();
740                     SG_LOG(SG_TERRASYNC, SG_INFO, "after update of:" << _directory->absolutePath() << " child update took:" << st.elapsedMSec());
741                 } catch (sg_exception& ) {
742                     _directory->failedToUpdate(HTTPRepository::REPO_ERROR_IO);
743                 }
744             } else if (responseCode() == 404) {
745                 _directory->failedToUpdate(HTTPRepository::REPO_ERROR_FILE_NOT_FOUND);
746             } else {
747                 _directory->failedToUpdate(HTTPRepository::REPO_ERROR_HTTP);
748             }
749
750             _directory->repository()->finishedRequest(this);
751         }
752
753         virtual void onFail()
754         {
755             if (_directory) {
756                 _directory->failedToUpdate(HTTPRepository::REPO_ERROR_SOCKET);
757                 _directory->repository()->finishedRequest(this);
758             }
759         }
760     private:
761         static std::string makeUrl(HTTPDirectory* d)
762         {
763             return d->url() + "/.dirindex";
764         }
765
766         SGPath pathInRepo() const
767         {
768             SGPath p(_directory->absolutePath());
769             p.append(".dirindex");
770             return p;
771         }
772
773         simgear::sha1nfo hashContext;
774         std::string body;
775         bool _isRootDir; ///< is this the repository root?
776         std::string _targetHash;
777     };
778
779     HTTPRepoPrivate::~HTTPRepoPrivate()
780     {
781         DirectoryVector::iterator it;
782         for (it=directories.begin(); it != directories.end(); ++it) {
783             delete *it;
784         }
785
786         RequestVector::iterator r;
787         for (r=activeRequests.begin(); r != activeRequests.end(); ++r) {
788             (*r)->cancel();
789         }
790     }
791
792     HTTP::Request_ptr HTTPRepoPrivate::updateFile(HTTPDirectory* dir, const std::string& name, size_t sz)
793     {
794         RepoRequestPtr r(new FileGetRequest(dir, name));
795         r->setContentSize(sz);
796         makeRequest(r);
797         return r;
798     }
799
800     HTTP::Request_ptr HTTPRepoPrivate::updateDir(HTTPDirectory* dir, const std::string& hash, size_t sz)
801     {
802         RepoRequestPtr r(new DirGetRequest(dir, hash));
803         r->setContentSize(sz);
804         makeRequest(r);
805         return r;
806     }
807
808
809     class HashEntryWithPath
810     {
811     public:
812         HashEntryWithPath(const std::string& p) : path(p) {}
813         bool operator()(const HTTPRepoPrivate::HashCacheEntry& entry) const
814         { return entry.filePath == path; }
815     private:
816         std::string path;
817     };
818
819     std::string HTTPRepoPrivate::hashForPath(const SGPath& p)
820     {
821         HashCache::iterator it = std::find_if(hashes.begin(), hashes.end(), HashEntryWithPath(p.str()));
822         if (it != hashes.end()) {
823             // ensure data on disk hasn't changed.
824             // we could also use the file type here if we were paranoid
825             if ((p.sizeInBytes() == it->lengthBytes) && (p.modTime() == it->modTime)) {
826                 return it->hashHex;
827             }
828
829             // entry in the cache, but it's stale so remove and fall through
830             hashes.erase(it);
831         }
832
833         std::string hash = computeHashForPath(p);
834         updatedFileContents(p, hash);
835         return hash;
836     }
837
838     std::string HTTPRepoPrivate::computeHashForPath(const SGPath& p)
839     {
840         if (!p.exists())
841             return std::string();
842         sha1nfo info;
843         sha1_init(&info);
844         char* buf = static_cast<char*>(malloc(1024 * 1024));
845         size_t readLen;
846         SGBinaryFile f(p.str());
847         if (!f.open(SG_IO_IN)) {
848             throw sg_io_exception("Couldn't open file for compute hash", p);
849         }
850         while ((readLen = f.read(buf, 1024 * 1024)) > 0) {
851             sha1_write(&info, buf, readLen);
852         }
853
854         f.close();
855         free(buf);
856         std::string hashBytes((char*) sha1_result(&info), HASH_LENGTH);
857         return strutils::encodeHex(hashBytes);
858     }
859
860     void HTTPRepoPrivate::updatedFileContents(const SGPath& p, const std::string& newHash)
861     {
862         // remove the existing entry
863         HashCache::iterator it = std::find_if(hashes.begin(), hashes.end(), HashEntryWithPath(p.str()));
864         if (it != hashes.end()) {
865             hashes.erase(it);
866             hashCacheDirty = true;
867         }
868
869         if (newHash.empty()) {
870             return; // we're done
871         }
872
873         // use a cloned SGPath and reset its caching to force one stat() call
874         SGPath p2(p);
875         p2.set_cached(false);
876         p2.set_cached(true);
877
878         HashCacheEntry entry;
879         entry.filePath = p.str();
880         entry.hashHex = newHash;
881         entry.modTime = p2.modTime();
882         entry.lengthBytes = p2.sizeInBytes();
883         hashes.push_back(entry);
884
885         hashCacheDirty = true;
886     }
887
888     void HTTPRepoPrivate::writeHashCache()
889     {
890         if (!hashCacheDirty) {
891             return;
892         }
893
894         SGPath cachePath = basePath;
895         cachePath.append(".hashes");
896
897         std::ofstream stream(cachePath.c_str(),std::ios::out | std::ios::trunc);
898         HashCache::const_iterator it;
899         for (it = hashes.begin(); it != hashes.end(); ++it) {
900             stream << it->filePath << ":" << it->modTime << ":"
901             << it->lengthBytes << ":" << it->hashHex << "\n";
902         }
903         stream.close();
904         hashCacheDirty = false;
905     }
906
907     void HTTPRepoPrivate::parseHashCache()
908     {
909         hashes.clear();
910         SGPath cachePath = basePath;
911         cachePath.append(".hashes");
912         if (!cachePath.exists()) {
913             return;
914         }
915
916         std::ifstream stream(cachePath.c_str(), std::ios::in);
917
918         while (!stream.eof()) {
919             std::string line;
920             std::getline(stream,line);
921             line = simgear::strutils::strip(line);
922             if( line.empty() || line[0] == '#' )
923                 continue;
924
925             string_list tokens = simgear::strutils::split( line, ":" );
926             if( tokens.size() < 4 ) {
927                 SG_LOG(SG_TERRASYNC, SG_WARN, "invalid entry in '" << cachePath.str() << "': '" << line << "' (ignoring line)");
928                 continue;
929             }
930             const std::string nameData = simgear::strutils::strip(tokens[0]);
931             const std::string timeData = simgear::strutils::strip(tokens[1]);
932             const std::string sizeData = simgear::strutils::strip(tokens[2]);
933             const std::string hashData = simgear::strutils::strip(tokens[3]);
934
935             if (nameData.empty() || timeData.empty() || sizeData.empty() || hashData.empty() ) {
936                 SG_LOG(SG_TERRASYNC, SG_WARN, "invalid entry in '" << cachePath.str() << "': '" << line << "' (ignoring line)");
937                 continue;
938             }
939
940             HashCacheEntry entry;
941             entry.filePath = nameData;
942             entry.hashHex = hashData;
943             entry.modTime = strtol(timeData.c_str(), NULL, 10);
944             entry.lengthBytes = strtol(sizeData.c_str(), NULL, 10);
945             hashes.push_back(entry);
946         }
947     }
948
949     class DirectoryWithPath
950     {
951     public:
952         DirectoryWithPath(const std::string& p) : path(p) {}
953         bool operator()(const HTTPDirectory* entry) const
954         { return entry->relativePath().str() == path; }
955     private:
956         std::string path;
957     };
958
959     HTTPDirectory* HTTPRepoPrivate::getOrCreateDirectory(const std::string& path)
960     {
961         DirectoryWithPath p(path);
962         DirectoryVector::iterator it = std::find_if(directories.begin(), directories.end(), p);
963         if (it != directories.end()) {
964             return *it;
965         }
966
967         HTTPDirectory* d = new HTTPDirectory(this, path);
968         directories.push_back(d);
969         return d;
970     }
971
972     bool HTTPRepoPrivate::deleteDirectory(const std::string& path)
973     {
974         DirectoryWithPath p(path);
975         DirectoryVector::iterator it = std::find_if(directories.begin(), directories.end(), p);
976         if (it != directories.end()) {
977             HTTPDirectory* d = *it;
978             directories.erase(it);
979             Dir dir(d->absolutePath());
980             bool result = dir.remove(true);
981             delete d;
982
983             // update the hash cache too
984             updatedFileContents(path, std::string());
985
986             return result;
987         }
988
989         return false;
990     }
991
992     void HTTPRepoPrivate::makeRequest(RepoRequestPtr req)
993     {
994         if (activeRequests.size() > 4) {
995             queuedRequests.push_back(req);
996         } else {
997             activeRequests.push_back(req);
998             http->makeRequest(req);
999         }
1000     }
1001
1002     void HTTPRepoPrivate::finishedRequest(const RepoRequestPtr& req)
1003     {
1004         RequestVector::iterator it = std::find(activeRequests.begin(), activeRequests.end(), req);
1005         if (it == activeRequests.end()) {
1006             throw sg_exception("lost request somehow", req->url());
1007         }
1008         activeRequests.erase(it);
1009
1010         if (!queuedRequests.empty()) {
1011             RepoRequestPtr rr = queuedRequests.front();
1012             queuedRequests.erase(queuedRequests.begin());
1013             activeRequests.push_back(rr);
1014             http->makeRequest(rr);
1015         }
1016
1017         writeHashCache();
1018
1019         if (activeRequests.empty() && queuedRequests.empty()) {
1020             isUpdating = false;
1021         }
1022     }
1023
1024     void HTTPRepoPrivate::failedToGetRootIndex(HTTPRepository::ResultCode st)
1025     {
1026         SG_LOG(SG_TERRASYNC, SG_WARN, "Failed to get root of repo:" << baseUrl);
1027         status = st;
1028     }
1029
1030     void HTTPRepoPrivate::failedToUpdateChild(const SGPath& relativePath,
1031                                               HTTPRepository::ResultCode fileStatus)
1032     {
1033         Failure f;
1034         f.path = relativePath;
1035         f.error = fileStatus;
1036         failures.push_back(f);
1037
1038         SG_LOG(SG_TERRASYNC, SG_WARN, "failed to update entry:" << relativePath << " code:" << fileStatus);
1039     }
1040
1041
1042
1043 } // of namespace simgear