]> git.mxchange.org Git - simgear.git/blob - simgear/io/HTTPRepository.cxx
HTTP: Always use absolute paths for hashes
[simgear.git] / simgear / io / HTTPRepository.cxx
1 // HTTPRepository.cxx -- plain HTTP TerraSync remote client
2 //
3 // Copyright (C) 20126  James Turner <zakalawe@mac.com>
4 //
5 // This program is free software; you can redistribute it and/or
6 // modify it under the terms of the GNU General Public License as
7 // published by the Free Software Foundation; either version 2 of the
8 // License, or (at your option) any later version.
9 //
10 // This program is distributed in the hope that it will be useful, but
11 // WITHOUT ANY WARRANTY; without even the implied warranty of
12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13 // General Public License for more details.
14 //
15 // You should have received a copy of the GNU General Public License
16 // along with this program; if not, write to the Free Software
17 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
18
19 #include "HTTPRepository.hxx"
20
21 #include <simgear_config.h>
22
23 #include <iostream>
24 #include <cassert>
25 #include <algorithm>
26 #include <sstream>
27 #include <map>
28 #include <set>
29 #include <fstream>
30 #include <limits>
31 #include <cstdlib>
32
33 #include <fcntl.h>
34
35 #include "simgear/debug/logstream.hxx"
36 #include "simgear/misc/strutils.hxx"
37 #include <simgear/misc/sg_dir.hxx>
38 #include <simgear/io/HTTPClient.hxx>
39 #include <simgear/io/sg_file.hxx>
40 #include <simgear/misc/sgstream.hxx>
41 #include <simgear/structure/exception.hxx>
42 #include <simgear/timing/timestamp.hxx>
43
44 #include <simgear/misc/sg_hash.hxx>
45
46 namespace simgear
47 {
48
49     class HTTPDirectory;
50
51     class HTTPRepoGetRequest : public HTTP::Request
52     {
53     public:
54         HTTPRepoGetRequest(HTTPDirectory* d, const std::string& u) :
55             HTTP::Request(u),
56             _directory(d)
57         {
58         }
59
60         size_t contentSize() const
61         {
62             return _contentSize;
63         }
64
65         void setContentSize(size_t sz)
66         {
67             _contentSize = sz;
68         }
69     protected:
70         HTTPDirectory* _directory;
71         size_t _contentSize;
72     };
73
74     typedef SGSharedPtr<HTTPRepoGetRequest> RepoRequestPtr;
75
76 class HTTPRepoPrivate
77 {
78 public:
79     struct HashCacheEntry
80     {
81         std::string filePath;
82         time_t modTime;
83         size_t lengthBytes;
84         std::string hashHex;
85
86     };
87
88     typedef std::vector<HashCacheEntry> HashCache;
89     HashCache hashes;
90     bool hashCacheDirty;
91
92     struct Failure
93     {
94         SGPath path;
95         HTTPRepository::ResultCode error;
96     };
97
98     typedef std::vector<Failure> FailureList;
99     FailureList failures;
100
101     HTTPRepoPrivate(HTTPRepository* parent) :
102         hashCacheDirty(false),
103         p(parent),
104         isUpdating(false),
105         updateEverything(false),
106         status(HTTPRepository::REPO_NO_ERROR),
107         totalDownloaded(0)
108     { ; }
109
110     ~HTTPRepoPrivate();
111
112     HTTPRepository* p; // link back to outer
113     HTTP::Client* http;
114     std::string baseUrl;
115     SGPath basePath;
116     bool isUpdating;
117     bool updateEverything;
118     string_list updatePaths;
119     HTTPRepository::ResultCode status;
120     HTTPDirectory* rootDir;
121     size_t totalDownloaded;
122
123     void updateWaiting();
124
125     HTTP::Request_ptr updateFile(HTTPDirectory* dir, const std::string& name,
126                                  size_t sz);
127     HTTP::Request_ptr updateDir(HTTPDirectory* dir, const std::string& hash,
128                                 size_t sz);
129
130     std::string hashForPath(const SGPath& p);
131     void updatedFileContents(const SGPath& p, const std::string& newHash);
132     void parseHashCache();
133     std::string computeHashForPath(const SGPath& p);
134     void writeHashCache();
135
136     void failedToGetRootIndex(HTTPRepository::ResultCode st);
137     void failedToUpdateChild(const SGPath& relativePath,
138                              HTTPRepository::ResultCode fileStatus);
139
140     typedef std::vector<RepoRequestPtr> RequestVector;
141     RequestVector queuedRequests,
142         activeRequests;
143
144     void makeRequest(RepoRequestPtr req);
145     void finishedRequest(const RepoRequestPtr& req);
146
147     HTTPDirectory* getOrCreateDirectory(const std::string& path);
148     bool deleteDirectory(const std::string& path);
149
150     typedef std::vector<HTTPDirectory*> DirectoryVector;
151     DirectoryVector directories;
152
153 };
154
155 class HTTPDirectory
156 {
157     struct ChildInfo
158     {
159         enum Type
160         {
161             FileType,
162             DirectoryType
163         };
164
165         ChildInfo(Type ty, const std::string & nameData, const std::string & hashData) :
166             type(ty),
167             name(nameData),
168             hash(hashData),
169             sizeInBytes(0)
170         {
171         }
172
173         ChildInfo(const ChildInfo& other) :
174             type(other.type),
175             name(other.name),
176             hash(other.hash),
177             sizeInBytes(other.sizeInBytes)
178         { }
179
180         void setSize(const std::string & sizeData)
181         {
182             sizeInBytes = ::strtol(sizeData.c_str(), NULL, 10);
183         }
184
185         bool operator<(const ChildInfo& other) const
186         {
187             return name < other.name;
188         }
189
190         Type type;
191         std::string name, hash;
192         size_t sizeInBytes;
193     };
194
195     typedef std::vector<ChildInfo> ChildInfoList;
196     ChildInfoList children;
197
198
199 public:
200     HTTPDirectory(HTTPRepoPrivate* repo, const std::string& path) :
201         _repository(repo),
202         _relativePath(path),
203         _state(DoNotUpdate)
204   {
205       assert(repo);
206
207       SGPath p(absolutePath());
208       if (p.exists()) {
209           try {
210               // already exists on disk
211               parseDirIndex(children);
212               std::sort(children.begin(), children.end());
213           } catch (sg_exception& ) {
214               // parsing cache failed
215               children.clear();
216           }
217       }
218   }
219
220     HTTPRepoPrivate* repository() const
221     {
222         return _repository;
223     }
224
225     std::string url() const
226     {
227         if (_relativePath.str().empty()) {
228             return _repository->baseUrl;
229         }
230
231         return _repository->baseUrl + "/" + _relativePath.str();
232     }
233
234     void dirIndexUpdated(const std::string& hash)
235     {
236         SGPath fpath(absolutePath());
237         fpath.append(".dirindex");
238         _repository->updatedFileContents(fpath, hash);
239
240         _state = Updated;
241
242         children.clear();
243         parseDirIndex(children);
244         std::sort(children.begin(), children.end());
245     }
246
247     void failedToUpdate(HTTPRepository::ResultCode status)
248     {
249         _state = UpdateFailed;
250         if (_relativePath.isNull()) {
251             // root dir failed
252             _repository->failedToGetRootIndex(status);
253         } else {
254             _repository->failedToUpdateChild(_relativePath, status);
255         }
256     }
257
258     void updateChildrenBasedOnHash()
259     {
260         // if we got here for a dir which is still updating or excluded
261         // from updates, just bail out right now.
262         if (_state != Updated) {
263             return;
264         }
265
266         string_list indexNames = indexChildren(),
267             toBeUpdated, orphans;
268         simgear::Dir d(absolutePath());
269         PathList fsChildren = d.children(0);
270         PathList::const_iterator it = fsChildren.begin();
271
272
273         for (; it != fsChildren.end(); ++it) {
274             ChildInfo info(it->isDir() ? ChildInfo::DirectoryType : ChildInfo::FileType,
275                            it->file(), "");
276             std::string hash = hashForChild(info);
277
278             ChildInfoList::iterator c = findIndexChild(it->file());
279             if (c == children.end()) {
280                 SG_LOG(SG_TERRASYNC, SG_DEBUG, "is orphan '" << it->file() << "'" );
281                 orphans.push_back(it->file());
282             } else if (c->hash != hash) {
283                 SG_LOG(SG_TERRASYNC, SG_DEBUG, "hash mismatch'" << it->file() );
284                 // file exists, but hash mismatch, schedule update
285                 if (!hash.empty()) {
286                     SG_LOG(SG_TERRASYNC, SG_DEBUG, "file exists but hash is wrong for:" << it->file() );
287                     SG_LOG(SG_TERRASYNC, SG_DEBUG, "on disk:" << hash << " vs in info:" << c->hash);
288                 }
289
290                 toBeUpdated.push_back(it->file() );
291             } else {
292                 // file exists and hash is valid. If it's a directory,
293                 // perform a recursive check.
294                 SG_LOG(SG_TERRASYNC, SG_DEBUG, "file exists hash is good:" << it->file() );
295                 if (c->type == ChildInfo::DirectoryType) {
296                     SGPath p(relativePath());
297                     p.append(it->file());
298                     HTTPDirectory* childDir = _repository->getOrCreateDirectory(p.str());
299                     if (childDir->_state == NotUpdated) {
300                         childDir->_state = Updated;
301                     }
302                     childDir->updateChildrenBasedOnHash();
303                 }
304             }
305
306             // remove existing file system children from the index list,
307             // so we can detect new children
308             // https://en.wikibooks.org/wiki/More_C%2B%2B_Idioms/Erase-Remove
309             indexNames.erase(std::remove(indexNames.begin(), indexNames.end(), it->file()), indexNames.end());
310         } // of real children iteration
311
312         // all remaining names in indexChilden are new children
313         toBeUpdated.insert(toBeUpdated.end(), indexNames.begin(), indexNames.end());
314
315         removeOrphans(orphans);
316         scheduleUpdates(toBeUpdated);
317     }
318
319     void markAsUpToDate()
320     {
321         _state = Updated;
322     }
323
324     void markAsUpdating()
325     {
326         assert(_state == NotUpdated);
327         _state = HTTPDirectory::UpdateInProgress;
328     }
329
330     void markAsEnabled()
331     {
332         // assert because this should only get invoked on newly created
333         // directory objects which are inside the sub-tree(s) to be updated
334         assert(_state == DoNotUpdate);
335         _state = NotUpdated;
336     }
337
338     void markSubtreeAsNeedingUpdate()
339     {
340         if (_state == Updated) {
341             _state = NotUpdated; // reset back to not-updated
342         }
343
344         ChildInfoList::iterator cit;
345         for (cit = children.begin(); cit != children.end(); ++cit) {
346             if (cit->type == ChildInfo::DirectoryType) {
347                 SGPath p(relativePath());
348                 p.append(cit->name);
349                 HTTPDirectory* childDir = _repository->getOrCreateDirectory(p.str());
350                 childDir->markSubtreeAsNeedingUpdate();
351             }
352         } // of child iteration
353     }
354
355     void markSubtreeAsEnabled()
356     {
357         if (_state == DoNotUpdate) {
358             markAsEnabled();
359         }
360
361         ChildInfoList::iterator cit;
362         for (cit = children.begin(); cit != children.end(); ++cit) {
363             if (cit->type == ChildInfo::DirectoryType) {
364                 SGPath p(relativePath());
365                 p.append(cit->name);
366                 HTTPDirectory* childDir = _repository->getOrCreateDirectory(p.str());
367                 childDir->markSubtreeAsEnabled();
368             }
369         } // of child iteration
370     }
371
372
373     void markAncestorChainAsEnabled()
374     {
375         if (_state == DoNotUpdate) {
376             markAsEnabled();
377         }
378
379         if (_relativePath.isNull()) {
380             return;
381         }
382
383         std::string prPath = _relativePath.dir();
384         if (prPath.empty()) {
385             _repository->rootDir->markAncestorChainAsEnabled();
386         } else {
387             HTTPDirectory* prDir = _repository->getOrCreateDirectory(prPath);
388             prDir->markAncestorChainAsEnabled();
389         }
390     }
391
392     void updateIfWaiting(const std::string& hash, size_t sz)
393     {
394         if (_state == NotUpdated) {
395             _repository->updateDir(this, hash, sz);
396             return;
397         }
398
399         if ((_state == DoNotUpdate) || (_state == UpdateInProgress)) {
400             return;
401         }
402
403         ChildInfoList::iterator cit;
404         for (cit = children.begin(); cit != children.end(); ++cit) {
405             if (cit->type == ChildInfo::DirectoryType) {
406                 SGPath p(relativePath());
407                 p.append(cit->name);
408                 HTTPDirectory* childDir = _repository->getOrCreateDirectory(p.str());
409                 childDir->updateIfWaiting(cit->hash, cit->sizeInBytes);
410             }
411         } // of child iteration
412     }
413
414     void removeOrphans(const string_list& orphans)
415     {
416         string_list::const_iterator it;
417         for (it = orphans.begin(); it != orphans.end(); ++it) {
418             removeChild(*it);
419         }
420     }
421
422     string_list indexChildren() const
423     {
424         string_list r;
425         r.reserve(children.size());
426         ChildInfoList::const_iterator it;
427         for (it=children.begin(); it != children.end(); ++it) {
428             r.push_back(it->name);
429         }
430         return r;
431     }
432
433     void scheduleUpdates(const string_list& names)
434     {
435         string_list::const_iterator it;
436         for (it = names.begin(); it != names.end(); ++it) {
437             ChildInfoList::iterator cit = findIndexChild(*it);
438             if (cit == children.end()) {
439                 SG_LOG(SG_TERRASYNC, SG_WARN, "scheduleUpdate, unknown child:" << *it);
440                 continue;
441             }
442
443             SG_LOG(SG_TERRASYNC,SG_DEBUG, "scheduling update for " << *it );
444             if (cit->type == ChildInfo::FileType) {
445                 _repository->updateFile(this, *it, cit->sizeInBytes);
446             } else {
447                 SGPath p(relativePath());
448                 p.append(*it);
449                 HTTPDirectory* childDir = _repository->getOrCreateDirectory(p.str());
450                 if (childDir->_state == DoNotUpdate) {
451                     SG_LOG(SG_TERRASYNC, SG_WARN, "scheduleUpdate, child:" << *it << " is marked do not update so skipping");
452                     continue;
453                 }
454
455                 _repository->updateDir(childDir, cit->hash, cit->sizeInBytes);
456             }
457         }
458     }
459
460     SGPath absolutePath() const
461     {
462         SGPath r(_repository->basePath);
463         r.append(_relativePath.str());
464         return r;
465     }
466
467     SGPath relativePath() const
468     {
469         return _relativePath;
470     }
471
472     void didUpdateFile(const std::string& file, const std::string& hash, size_t sz)
473     {
474         // check hash matches what we expected
475         ChildInfoList::iterator it = findIndexChild(file);
476         if (it == children.end()) {
477             SG_LOG(SG_TERRASYNC, SG_WARN, "updated file but not found in dir:" << _relativePath << " " << file);
478         } else {
479             SGPath fpath(absolutePath());
480             fpath.append(file);
481
482             if (it->hash != hash) {
483                 // we don't erase the file on a hash mismatch, becuase if we're syncing during the
484                 // middle of a server-side update, the downloaded file may actually become valid.
485                 _repository->failedToUpdateChild(_relativePath, HTTPRepository::REPO_ERROR_CHECKSUM);
486             } else {
487                 _repository->updatedFileContents(fpath, hash);
488                 _repository->totalDownloaded += sz;
489             } // of hash matches
490         } // of found in child list
491     }
492
493     void didFailToUpdateFile(const std::string& file,
494                              HTTPRepository::ResultCode status)
495     {
496         SGPath fpath(_relativePath);
497         fpath.append(file);
498         _repository->failedToUpdateChild(fpath, status);
499     }
500 private:
501
502     struct ChildWithName
503     {
504         ChildWithName(const std::string& n) : name(n) {}
505         std::string name;
506
507         bool operator()(const ChildInfo& info) const
508         { return info.name == name; }
509     };
510
511     ChildInfoList::iterator findIndexChild(const std::string& name)
512     {
513         return std::find_if(children.begin(), children.end(), ChildWithName(name));
514     }
515
516     bool parseDirIndex(ChildInfoList& children)
517     {
518         SGPath p(absolutePath());
519         p.append(".dirindex");
520         if (!p.exists()) {
521             return false;
522         }
523
524         std::ifstream indexStream( p.c_str(), std::ios::in );
525
526         if ( !indexStream.is_open() ) {
527             throw sg_io_exception("cannot open dirIndex file", p);
528         }
529
530         while (!indexStream.eof() ) {
531             std::string line;
532             std::getline( indexStream, line );
533             line = simgear::strutils::strip(line);
534
535             // skip blank line or comment beginning with '#'
536             if( line.empty() || line[0] == '#' )
537                 continue;
538
539             string_list tokens = simgear::strutils::split( line, ":" );
540
541             std::string typeData = tokens[0];
542
543             if( typeData == "version" ) {
544                 if( tokens.size() < 2 ) {
545                     SG_LOG(SG_TERRASYNC, SG_WARN, "malformed .dirindex file: missing version number in line '" << line << "'" );
546                     break;
547                 }
548                 if( tokens[1] != "1" ) {
549                     SG_LOG(SG_TERRASYNC, SG_WARN, "invalid .dirindex file: wrong version number '" << tokens[1] << "' (expected 1)" );
550                     break;
551                 }
552                 continue; // version is good, continue
553             }
554
555             if( typeData == "path" ) {
556                 continue; // ignore path, next line
557             }
558
559             if( tokens.size() < 3 ) {
560                 SG_LOG(SG_TERRASYNC, SG_WARN, "malformed .dirindex file: not enough tokens in line '" << line << "' (ignoring line)" );
561                 continue;
562             }
563
564             if (typeData != "f" && typeData != "d" ) {
565                 SG_LOG(SG_TERRASYNC, SG_WARN, "malformed .dirindex file: invalid type in line '" << line << "', expected 'd' or 'f', (ignoring line)" );
566                 continue;
567             }
568             children.push_back(ChildInfo(typeData == "f" ? ChildInfo::FileType : ChildInfo::DirectoryType, tokens[1], tokens[2]));
569
570             if (tokens.size() > 3) {
571                 children.back().setSize(tokens[3]);
572             }
573         }
574
575         return true;
576     }
577
578     void removeChild(const std::string& name)
579     {
580         SGPath p(absolutePath());
581         p.append(name);
582         bool ok;
583
584         SGPath fpath(_relativePath);
585         fpath.append(name);
586
587         if (p.isDir()) {
588             ok = _repository->deleteDirectory(fpath.str());
589         } else {
590             // remove the hash cache entry
591             _repository->updatedFileContents(p, std::string());
592             ok = p.remove();
593         }
594
595         if (!ok) {
596             SG_LOG(SG_TERRASYNC, SG_WARN, "removal failed for:" << p);
597             throw sg_io_exception("Failed to remove existing file/dir:", p);
598         }
599     }
600
601     std::string hashForChild(const ChildInfo& child) const
602     {
603         SGPath p(absolutePath());
604         p.append(child.name);
605         if (child.type == ChildInfo::DirectoryType) {
606             p.append(".dirindex");
607         }
608         return _repository->hashForPath(p);
609     }
610
611   HTTPRepoPrivate* _repository;
612   SGPath _relativePath; // in URL and file-system space
613
614     typedef enum
615     {
616         NotUpdated,
617         UpdateInProgress,
618         Updated,
619         UpdateFailed,
620         DoNotUpdate
621     } State;
622
623     State _state;
624 };
625
626 HTTPRepository::HTTPRepository(const SGPath& base, HTTP::Client *cl) :
627     _d(new HTTPRepoPrivate(this))
628 {
629     _d->http = cl;
630     _d->basePath = base;
631     _d->rootDir = new HTTPDirectory(_d.get(), "");
632     _d->parseHashCache();
633 }
634
635 HTTPRepository::~HTTPRepository()
636 {
637 }
638
639 void HTTPRepository::setBaseUrl(const std::string &url)
640 {
641   _d->baseUrl = url;
642 }
643
644 std::string HTTPRepository::baseUrl() const
645 {
646   return _d->baseUrl;
647 }
648
649 HTTP::Client* HTTPRepository::http() const
650 {
651   return _d->http;
652 }
653
654 SGPath HTTPRepository::fsBase() const
655 {
656   return SGPath();
657 }
658
659 void HTTPRepository::update()
660 {
661     _d->rootDir->markSubtreeAsNeedingUpdate();
662     _d->updateWaiting();
663 }
664
665 void HTTPRepository::setEntireRepositoryMode()
666 {
667     if (!_d->updateEverything) {
668         // this is a one-way decision
669         _d->updateEverything = true;
670     }
671
672     // probably overkill but not expensive so let's check everything
673     // we have in case someone did something funky and switched from partial
674     // to 'whole repo' updating.
675     _d->rootDir->markSubtreeAsEnabled();
676 }
677
678
679 void HTTPRepository::addSubpath(const std::string& relPath)
680 {
681     if (_d->updateEverything) {
682         SG_LOG(SG_TERRASYNC, SG_WARN, "called HTTPRepository::addSubpath but updating everything");
683         return;
684     }
685
686     _d->updatePaths.push_back(relPath);
687
688     HTTPDirectory* dir = _d->getOrCreateDirectory(relPath);
689     dir->markSubtreeAsEnabled();
690     dir->markAncestorChainAsEnabled();
691
692     _d->updateWaiting();
693 }
694
695 bool HTTPRepository::isDoingSync() const
696 {
697     if (_d->status != REPO_NO_ERROR) {
698         return false;
699     }
700
701     return _d->isUpdating;
702 }
703
704 size_t HTTPRepository::bytesToDownload() const
705 {
706     size_t result = 0;
707
708     HTTPRepoPrivate::RequestVector::const_iterator r;
709     for (r = _d->queuedRequests.begin(); r != _d->queuedRequests.end(); ++r) {
710         result += (*r)->contentSize();
711     }
712
713     for (r = _d->activeRequests.begin(); r != _d->activeRequests.end(); ++r) {
714         result += (*r)->contentSize() - (*r)->responseBytesReceived();
715     }
716
717     return result;
718 }
719
720 size_t HTTPRepository::bytesDownloaded() const
721 {
722     size_t result = _d->totalDownloaded;
723
724     HTTPRepoPrivate::RequestVector::const_iterator r;
725     for (r = _d->activeRequests.begin(); r != _d->activeRequests.end(); ++r) {
726         result += (*r)->responseBytesReceived();
727     }
728
729     return result;
730 }
731
732 HTTPRepository::ResultCode
733 HTTPRepository::failure() const
734 {
735     if ((_d->status == REPO_NO_ERROR) && !_d->failures.empty()) {
736         return REPO_PARTIAL_UPDATE;
737     }
738
739     return _d->status;
740 }
741
742     class FileGetRequest : public HTTPRepoGetRequest
743     {
744     public:
745         FileGetRequest(HTTPDirectory* d, const std::string& file) :
746             HTTPRepoGetRequest(d, makeUrl(d, file)),
747             fileName(file)
748         {
749             pathInRepo = _directory->absolutePath();
750             pathInRepo.append(fileName);
751         }
752
753     protected:
754         virtual void gotBodyData(const char* s, int n)
755         {
756             if (!file.get()) {
757                 file.reset(new SGBinaryFile(pathInRepo.str()));
758                 if (!file->open(SG_IO_OUT)) {
759                   SG_LOG(SG_TERRASYNC, SG_WARN, "unable to create file " << pathInRepo);
760                   _directory->repository()->http->cancelRequest(this, "Unable to create output file");
761                 }
762
763                 sha1_init(&hashContext);
764             }
765
766             sha1_write(&hashContext, s, n);
767             file->write(s, n);
768         }
769
770         virtual void onDone()
771         {
772             file->close();
773
774             if (responseCode() == 200) {
775                 std::string hash = strutils::encodeHex(sha1_result(&hashContext), HASH_LENGTH);
776                 _directory->didUpdateFile(fileName, hash, contentSize());
777                 SG_LOG(SG_TERRASYNC, SG_DEBUG, "got file " << fileName << " in " << _directory->absolutePath());
778             } else if (responseCode() == 404) {
779                 SG_LOG(SG_TERRASYNC, SG_WARN, "terrasync file not found on server: " << fileName << " for " << _directory->absolutePath());
780                 _directory->didFailToUpdateFile(fileName, HTTPRepository::REPO_ERROR_FILE_NOT_FOUND);
781             } else {
782                 SG_LOG(SG_TERRASYNC, SG_WARN, "terrasync file download error on server: " << fileName << " for " << _directory->absolutePath() << ": " << responseCode() );
783                 _directory->didFailToUpdateFile(fileName, HTTPRepository::REPO_ERROR_HTTP);
784             }
785
786             _directory->repository()->finishedRequest(this);
787         }
788
789         virtual void onFail()
790         {
791             file.reset();
792             if (pathInRepo.exists()) {
793                 pathInRepo.remove();
794             }
795
796             if (_directory) {
797                 _directory->didFailToUpdateFile(fileName, HTTPRepository::REPO_ERROR_SOCKET);
798                 _directory->repository()->finishedRequest(this);
799             }
800         }
801     private:
802         static std::string makeUrl(HTTPDirectory* d, const std::string& file)
803         {
804             return d->url() + "/" + file;
805         }
806
807         std::string fileName; // if empty, we're getting the directory itself
808         SGPath pathInRepo;
809         simgear::sha1nfo hashContext;
810         std::auto_ptr<SGBinaryFile> file;
811     };
812
813     class DirGetRequest : public HTTPRepoGetRequest
814     {
815     public:
816         DirGetRequest(HTTPDirectory* d, const std::string& targetHash) :
817             HTTPRepoGetRequest(d, makeUrl(d)),
818             _isRootDir(false),
819             _targetHash(targetHash)
820         {
821             sha1_init(&hashContext);
822         }
823
824         void setIsRootDir()
825         {
826             _isRootDir = true;
827         }
828
829         bool isRootDir() const
830         {
831             return _isRootDir;
832         }
833
834     protected:
835         virtual void gotBodyData(const char* s, int n)
836         {
837             body += std::string(s, n);
838             sha1_write(&hashContext, s, n);
839         }
840
841         virtual void onDone()
842         {
843             if (responseCode() == 200) {
844                 std::string hash = strutils::encodeHex(sha1_result(&hashContext), HASH_LENGTH);
845                 if (!_targetHash.empty() && (hash != _targetHash)) {
846                     _directory->failedToUpdate(HTTPRepository::REPO_ERROR_CHECKSUM);
847                     _directory->repository()->finishedRequest(this);
848                     return;
849                 }
850
851                 std::string curHash = _directory->repository()->hashForPath(path());
852                 if (hash != curHash) {
853                     simgear::Dir d(_directory->absolutePath());
854                     if (!d.exists()) {
855                         if (!d.create(0700)) {
856                             throw sg_io_exception("Unable to create directory", d.path());
857                         }
858                     }
859
860                     // dir index data has changed, so write to disk and update
861                     // the hash accordingly
862                     std::ofstream of(pathInRepo().c_str(), std::ios::trunc | std::ios::out);
863                     if (!of.is_open()) {
864                         throw sg_io_exception("Failed to open directory index file for writing", pathInRepo().c_str());
865                     }
866
867                     of.write(body.data(), body.size());
868                     of.close();
869                     _directory->dirIndexUpdated(hash);
870                 } else {
871                     _directory->markAsUpToDate();
872                 }
873
874                 _directory->repository()->totalDownloaded += contentSize();
875
876                 try {
877                     // either way we've confirmed the index is valid so update
878                     // children now
879                     SGTimeStamp st;
880                     st.stamp();
881                     _directory->updateChildrenBasedOnHash();
882                     SG_LOG(SG_TERRASYNC, SG_INFO, "after update of:" << _directory->absolutePath() << " child update took:" << st.elapsedMSec());
883                 } catch (sg_exception& ) {
884                     _directory->failedToUpdate(HTTPRepository::REPO_ERROR_IO);
885                 }
886             } else if (responseCode() == 404) {
887                 _directory->failedToUpdate(HTTPRepository::REPO_ERROR_FILE_NOT_FOUND);
888             } else {
889                 _directory->failedToUpdate(HTTPRepository::REPO_ERROR_HTTP);
890             }
891
892             _directory->repository()->finishedRequest(this);
893         }
894
895         virtual void onFail()
896         {
897             if (_directory) {
898                 _directory->failedToUpdate(HTTPRepository::REPO_ERROR_SOCKET);
899                 _directory->repository()->finishedRequest(this);
900             }
901         }
902     private:
903         static std::string makeUrl(HTTPDirectory* d)
904         {
905             return d->url() + "/.dirindex";
906         }
907
908         SGPath pathInRepo() const
909         {
910             SGPath p(_directory->absolutePath());
911             p.append(".dirindex");
912             return p;
913         }
914
915         simgear::sha1nfo hashContext;
916         std::string body;
917         bool _isRootDir; ///< is this the repository root?
918         std::string _targetHash;
919     };
920
921     HTTPRepoPrivate::~HTTPRepoPrivate()
922     {
923         // take a copy since cancelRequest will fail and hence remove
924         // remove activeRequests, invalidating any iterator to it.
925         RequestVector copyOfActive(activeRequests);
926         RequestVector::iterator rq;
927         for (rq = copyOfActive.begin(); rq != copyOfActive.end(); ++rq) {
928             http->cancelRequest(*rq, "Repository object deleted");
929         }
930
931         DirectoryVector::iterator it;
932         for (it=directories.begin(); it != directories.end(); ++it) {
933             delete *it;
934         }
935     }
936
937     HTTP::Request_ptr HTTPRepoPrivate::updateFile(HTTPDirectory* dir, const std::string& name, size_t sz)
938     {
939         RepoRequestPtr r(new FileGetRequest(dir, name));
940         r->setContentSize(sz);
941         makeRequest(r);
942         return r;
943     }
944
945     HTTP::Request_ptr HTTPRepoPrivate::updateDir(HTTPDirectory* dir, const std::string& hash, size_t sz)
946     {
947         dir->markAsUpdating();
948         RepoRequestPtr r(new DirGetRequest(dir, hash));
949         r->setContentSize(sz);
950         makeRequest(r);
951         return r;
952     }
953
954
955     class HashEntryWithPath
956     {
957     public:
958         HashEntryWithPath(const std::string& p) : path(p) {}
959         bool operator()(const HTTPRepoPrivate::HashCacheEntry& entry) const
960         { return entry.filePath == path; }
961     private:
962         std::string path;
963     };
964
965     std::string HTTPRepoPrivate::hashForPath(const SGPath& p)
966     {
967         HashCache::iterator it = std::find_if(hashes.begin(), hashes.end(), HashEntryWithPath(p.str()));
968         if (it != hashes.end()) {
969             // ensure data on disk hasn't changed.
970             // we could also use the file type here if we were paranoid
971             if ((p.sizeInBytes() == it->lengthBytes) && (p.modTime() == it->modTime)) {
972                 return it->hashHex;
973             }
974
975             // entry in the cache, but it's stale so remove and fall through
976             hashes.erase(it);
977         }
978
979         std::string hash = computeHashForPath(p);
980         updatedFileContents(p, hash);
981         return hash;
982     }
983
984     std::string HTTPRepoPrivate::computeHashForPath(const SGPath& p)
985     {
986         if (!p.exists())
987             return std::string();
988         sha1nfo info;
989         sha1_init(&info);
990         char* buf = static_cast<char*>(malloc(1024 * 1024));
991         size_t readLen;
992         SGBinaryFile f(p.str());
993         if (!f.open(SG_IO_IN)) {
994             throw sg_io_exception("Couldn't open file for compute hash", p);
995         }
996         while ((readLen = f.read(buf, 1024 * 1024)) > 0) {
997             sha1_write(&info, buf, readLen);
998         }
999
1000         f.close();
1001         free(buf);
1002         std::string hashBytes((char*) sha1_result(&info), HASH_LENGTH);
1003         return strutils::encodeHex(hashBytes);
1004     }
1005
1006     void HTTPRepoPrivate::updatedFileContents(const SGPath& p, const std::string& newHash)
1007     {
1008         // remove the existing entry
1009         HashCache::iterator it = std::find_if(hashes.begin(), hashes.end(), HashEntryWithPath(p.str()));
1010         if (it != hashes.end()) {
1011             hashes.erase(it);
1012             hashCacheDirty = true;
1013         }
1014
1015         if (newHash.empty()) {
1016             return; // we're done
1017         }
1018
1019         // use a cloned SGPath and reset its caching to force one stat() call
1020         SGPath p2(p);
1021         p2.set_cached(false);
1022         p2.set_cached(true);
1023
1024         HashCacheEntry entry;
1025         entry.filePath = p.str();
1026         entry.hashHex = newHash;
1027         entry.modTime = p2.modTime();
1028         entry.lengthBytes = p2.sizeInBytes();
1029         hashes.push_back(entry);
1030
1031         hashCacheDirty = true;
1032     }
1033
1034     void HTTPRepoPrivate::writeHashCache()
1035     {
1036         if (!hashCacheDirty) {
1037             return;
1038         }
1039
1040         SGPath cachePath = basePath;
1041         cachePath.append(".hashes");
1042
1043         std::ofstream stream(cachePath.c_str(),std::ios::out | std::ios::trunc);
1044         HashCache::const_iterator it;
1045         for (it = hashes.begin(); it != hashes.end(); ++it) {
1046             stream << it->filePath << ":" << it->modTime << ":"
1047             << it->lengthBytes << ":" << it->hashHex << "\n";
1048         }
1049         stream.close();
1050         hashCacheDirty = false;
1051     }
1052
1053     void HTTPRepoPrivate::parseHashCache()
1054     {
1055         hashes.clear();
1056         SGPath cachePath = basePath;
1057         cachePath.append(".hashes");
1058         if (!cachePath.exists()) {
1059             return;
1060         }
1061
1062         std::ifstream stream(cachePath.c_str(), std::ios::in);
1063
1064         while (!stream.eof()) {
1065             std::string line;
1066             std::getline(stream,line);
1067             line = simgear::strutils::strip(line);
1068             if( line.empty() || line[0] == '#' )
1069                 continue;
1070
1071             string_list tokens = simgear::strutils::split( line, ":" );
1072             if( tokens.size() < 4 ) {
1073                 SG_LOG(SG_TERRASYNC, SG_WARN, "invalid entry in '" << cachePath.str() << "': '" << line << "' (ignoring line)");
1074                 continue;
1075             }
1076             const std::string nameData = simgear::strutils::strip(tokens[0]);
1077             const std::string timeData = simgear::strutils::strip(tokens[1]);
1078             const std::string sizeData = simgear::strutils::strip(tokens[2]);
1079             const std::string hashData = simgear::strutils::strip(tokens[3]);
1080
1081             if (nameData.empty() || timeData.empty() || sizeData.empty() || hashData.empty() ) {
1082                 SG_LOG(SG_TERRASYNC, SG_WARN, "invalid entry in '" << cachePath.str() << "': '" << line << "' (ignoring line)");
1083                 continue;
1084             }
1085
1086             HashCacheEntry entry;
1087             entry.filePath = nameData;
1088             entry.hashHex = hashData;
1089             entry.modTime = strtol(timeData.c_str(), NULL, 10);
1090             entry.lengthBytes = strtol(sizeData.c_str(), NULL, 10);
1091             hashes.push_back(entry);
1092         }
1093     }
1094
1095     class DirectoryWithPath
1096     {
1097     public:
1098         DirectoryWithPath(const std::string& p) : path(p) {}
1099         bool operator()(const HTTPDirectory* entry) const
1100         { return entry->relativePath().str() == path; }
1101     private:
1102         std::string path;
1103     };
1104
1105     HTTPDirectory* HTTPRepoPrivate::getOrCreateDirectory(const std::string& path)
1106     {
1107         DirectoryWithPath p(path);
1108         DirectoryVector::iterator it = std::find_if(directories.begin(), directories.end(), p);
1109         if (it != directories.end()) {
1110             return *it;
1111         }
1112
1113         HTTPDirectory* d = new HTTPDirectory(this, path);
1114         directories.push_back(d);
1115         if (updateEverything) {
1116             d->markAsEnabled();
1117         } else {
1118             string_list::const_iterator s;
1119             bool shouldUpdate = false;
1120
1121             for (s = updatePaths.begin(); s != updatePaths.end(); ++s) {
1122                 size_t minLen = std::min(path.size(), s->size());
1123                 if (s->compare(0, minLen, path, 0, minLen) == 0) {
1124                     shouldUpdate = true;
1125                     break;
1126                 }
1127             } // of paths iteration
1128
1129             if (shouldUpdate) {
1130                 d->markAsEnabled();
1131             }
1132         }
1133
1134         return d;
1135     }
1136
1137     bool HTTPRepoPrivate::deleteDirectory(const std::string& path)
1138     {
1139         DirectoryWithPath p(path);
1140         DirectoryVector::iterator it = std::find_if(directories.begin(), directories.end(), p);
1141         if (it != directories.end()) {
1142             HTTPDirectory* d = *it;
1143             directories.erase(it);
1144             Dir dir(d->absolutePath());
1145             bool result = dir.remove(true);
1146             delete d;
1147
1148             // update the hash cache too
1149             updatedFileContents(d->absolutePath(), std::string());
1150
1151             return result;
1152         }
1153
1154         return false;
1155     }
1156
1157     void HTTPRepoPrivate::makeRequest(RepoRequestPtr req)
1158     {
1159         if (activeRequests.size() > 4) {
1160             queuedRequests.push_back(req);
1161         } else {
1162             activeRequests.push_back(req);
1163             http->makeRequest(req);
1164         }
1165     }
1166
1167     void HTTPRepoPrivate::finishedRequest(const RepoRequestPtr& req)
1168     {
1169         RequestVector::iterator it = std::find(activeRequests.begin(), activeRequests.end(), req);
1170         // in some cases, for example a checksum failure, we clear the active
1171         // and queued request vectors, so the ::find above can fail
1172         if (it != activeRequests.end()) {
1173             activeRequests.erase(it);
1174         }
1175
1176         if (!queuedRequests.empty()) {
1177             RepoRequestPtr rr = queuedRequests.front();
1178             queuedRequests.erase(queuedRequests.begin());
1179             activeRequests.push_back(rr);
1180             http->makeRequest(rr);
1181         }
1182
1183         writeHashCache();
1184
1185         if (activeRequests.empty() && queuedRequests.empty()) {
1186             isUpdating = false;
1187         }
1188     }
1189
1190     void HTTPRepoPrivate::failedToGetRootIndex(HTTPRepository::ResultCode st)
1191     {
1192         SG_LOG(SG_TERRASYNC, SG_WARN, "Failed to get root of repo:" << baseUrl);
1193         status = st;
1194     }
1195
1196     void HTTPRepoPrivate::failedToUpdateChild(const SGPath& relativePath,
1197                                               HTTPRepository::ResultCode fileStatus)
1198     {
1199         if (fileStatus == HTTPRepository::REPO_ERROR_CHECKSUM) {
1200             // stop updating, and mark repository as failed, becuase this
1201             // usually indicates we need to start a fresh update from the
1202             // root.
1203             // (we could issue a retry here, but we leave that to higher layers)
1204             status = fileStatus;
1205
1206             queuedRequests.clear();
1207
1208             RequestVector copyOfActive(activeRequests);
1209             RequestVector::iterator rq;
1210             for (rq = copyOfActive.begin(); rq != copyOfActive.end(); ++rq) {
1211                 //SG_LOG(SG_TERRASYNC, SG_DEBUG, "cancelling request for:" << (*rq)->url());
1212                 http->cancelRequest(*rq, "Repository updated failed");
1213             }
1214
1215
1216             SG_LOG(SG_TERRASYNC, SG_WARN, "failed to update repository:" << baseUrl
1217                    << ", possibly modified during sync");
1218         }
1219
1220         Failure f;
1221         f.path = relativePath;
1222         f.error = fileStatus;
1223         failures.push_back(f);
1224
1225         SG_LOG(SG_TERRASYNC, SG_WARN, "failed to update entry:" << relativePath << " code:" << fileStatus);
1226     }
1227
1228     void HTTPRepoPrivate::updateWaiting()
1229     {
1230         if (!isUpdating) {
1231             status = HTTPRepository::REPO_NO_ERROR;
1232             isUpdating = true;
1233             failures.clear();
1234         }
1235
1236         // find to-be-updated sub-trees and kick them off
1237         rootDir->updateIfWaiting(std::string(), 0);
1238
1239         // maybe there was nothing to do
1240         if (activeRequests.empty()) {
1241             status = HTTPRepository::REPO_NO_ERROR;
1242             isUpdating = false;
1243         }
1244     }
1245
1246 } // of namespace simgear