]> git.mxchange.org Git - simgear.git/blob - simgear/io/HTTPRepository.cxx
d27850dd724c22f217ce8d283e40ec017ef77ff6
[simgear.git] / simgear / io / HTTPRepository.cxx
1 // HTTPRepository.cxx -- plain HTTP TerraSync remote client
2 //
3 // Copyright (C) 20126  James Turner <zakalawe@mac.com>
4 //
5 // This program is free software; you can redistribute it and/or
6 // modify it under the terms of the GNU General Public License as
7 // published by the Free Software Foundation; either version 2 of the
8 // License, or (at your option) any later version.
9 //
10 // This program is distributed in the hope that it will be useful, but
11 // WITHOUT ANY WARRANTY; without even the implied warranty of
12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13 // General Public License for more details.
14 //
15 // You should have received a copy of the GNU General Public License
16 // along with this program; if not, write to the Free Software
17 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
18
19 #include "HTTPRepository.hxx"
20
21 #include <simgear_config.h>
22
23 #include <iostream>
24 #include <cassert>
25 #include <algorithm>
26 #include <sstream>
27 #include <map>
28 #include <set>
29 #include <fstream>
30 #include <limits>
31 #include <cstdlib>
32
33 #include <fcntl.h>
34
35 #include "simgear/debug/logstream.hxx"
36 #include "simgear/misc/strutils.hxx"
37 #include <simgear/misc/sg_dir.hxx>
38 #include <simgear/io/HTTPClient.hxx>
39 #include <simgear/io/sg_file.hxx>
40 #include <simgear/misc/sgstream.hxx>
41 #include <simgear/structure/exception.hxx>
42 #include <simgear/timing/timestamp.hxx>
43
44 #include <simgear/misc/sg_hash.hxx>
45
46 namespace simgear
47 {
48
49     class HTTPDirectory;
50
51     class HTTPRepoGetRequest : public HTTP::Request
52     {
53     public:
54         HTTPRepoGetRequest(HTTPDirectory* d, const std::string& u) :
55             HTTP::Request(u),
56             _directory(d)
57         {
58         }
59
60         virtual void cancel();
61
62         size_t contentSize() const
63         {
64             return _contentSize;
65         }
66
67         void setContentSize(size_t sz)
68         {
69             _contentSize = sz;
70         }
71     protected:
72         HTTPDirectory* _directory;
73         size_t _contentSize;
74     };
75
76     typedef SGSharedPtr<HTTPRepoGetRequest> RepoRequestPtr;
77
78 class HTTPRepoPrivate
79 {
80 public:
81     struct HashCacheEntry
82     {
83         std::string filePath;
84         time_t modTime;
85         size_t lengthBytes;
86         std::string hashHex;
87
88     };
89
90     typedef std::vector<HashCacheEntry> HashCache;
91     HashCache hashes;
92     bool hashCacheDirty;
93
94     struct Failure
95     {
96         SGPath path;
97         AbstractRepository::ResultCode error;
98     };
99
100     typedef std::vector<Failure> FailureList;
101     FailureList failures;
102
103     HTTPRepoPrivate(HTTPRepository* parent) :
104         hashCacheDirty(false),
105         p(parent),
106         isUpdating(false),
107         status(AbstractRepository::REPO_NO_ERROR),
108         totalDownloaded(0)
109     { ; }
110
111     ~HTTPRepoPrivate();
112
113     HTTPRepository* p; // link back to outer
114     HTTP::Client* http;
115     std::string baseUrl;
116     SGPath basePath;
117     bool isUpdating;
118     AbstractRepository::ResultCode status;
119     HTTPDirectory* rootDir;
120     size_t totalDownloaded;
121
122     HTTP::Request_ptr updateFile(HTTPDirectory* dir, const std::string& name,
123                                  size_t sz);
124     HTTP::Request_ptr updateDir(HTTPDirectory* dir, const std::string& hash,
125                                 size_t sz);
126
127     std::string hashForPath(const SGPath& p);
128     void updatedFileContents(const SGPath& p, const std::string& newHash);
129     void parseHashCache();
130     std::string computeHashForPath(const SGPath& p);
131     void writeHashCache();
132
133     void failedToGetRootIndex(AbstractRepository::ResultCode st);
134     void failedToUpdateChild(const SGPath& relativePath,
135                              AbstractRepository::ResultCode fileStatus);
136
137     typedef std::vector<RepoRequestPtr> RequestVector;
138     RequestVector queuedRequests,
139         activeRequests;
140
141     void makeRequest(RepoRequestPtr req);
142     void finishedRequest(const RepoRequestPtr& req);
143
144     HTTPDirectory* getOrCreateDirectory(const std::string& path);
145     bool deleteDirectory(const std::string& path);
146
147     typedef std::vector<HTTPDirectory*> DirectoryVector;
148     DirectoryVector directories;
149
150 };
151
152 class HTTPDirectory
153 {
154     struct ChildInfo
155     {
156         enum Type
157         {
158             FileType,
159             DirectoryType
160         };
161
162         ChildInfo(Type ty, const std::string & nameData, const std::string & hashData) :
163             type(ty),
164             name(nameData),
165             hash(hashData),
166             sizeInBytes(0)
167         {
168         }
169
170         ChildInfo(const ChildInfo& other) :
171             type(other.type),
172             name(other.name),
173             hash(other.hash),
174             sizeInBytes(other.sizeInBytes)
175         { }
176
177         void setSize(const std::string & sizeData)
178         {
179             sizeInBytes = ::strtol(sizeData.c_str(), NULL, 10);
180         }
181
182         bool operator<(const ChildInfo& other) const
183         {
184             return name < other.name;
185         }
186
187         Type type;
188         std::string name, hash;
189         size_t sizeInBytes;
190     };
191
192     typedef std::vector<ChildInfo> ChildInfoList;
193     ChildInfoList children;
194
195 public:
196     HTTPDirectory(HTTPRepoPrivate* repo, const std::string& path) :
197         _repository(repo),
198         _relativePath(path)
199   {
200       assert(repo);
201
202       SGPath p(absolutePath());
203       if (p.exists()) {
204           try {
205               // already exists on disk
206               parseDirIndex(children);
207               std::sort(children.begin(), children.end());
208           } catch (sg_exception& ) {
209               // parsing cache failed
210               children.clear();
211           }
212       }
213   }
214
215     HTTPRepoPrivate* repository() const
216     {
217         return _repository;
218     }
219
220     std::string url() const
221     {
222         if (_relativePath.str().empty()) {
223             return _repository->baseUrl;
224         }
225
226         return _repository->baseUrl + "/" + _relativePath.str();
227     }
228
229     void dirIndexUpdated(const std::string& hash)
230     {
231         SGPath fpath(_relativePath);
232         fpath.append(".dirindex");
233         _repository->updatedFileContents(fpath, hash);
234
235         children.clear();
236         parseDirIndex(children);
237         std::sort(children.begin(), children.end());
238     }
239
240     void failedToUpdate(AbstractRepository::ResultCode status)
241     {
242         if (_relativePath.isNull()) {
243             // root dir failed
244             _repository->failedToGetRootIndex(status);
245         } else {
246             _repository->failedToUpdateChild(_relativePath, status);
247         }
248     }
249
250     void updateChildrenBasedOnHash()
251     {
252         //SG_LOG(SG_TERRASYNC, SG_DEBUG, "updated children for:" << relativePath());
253
254         string_list indexNames = indexChildren(),
255             toBeUpdated, orphans;
256         simgear::Dir d(absolutePath());
257         SG_LOG(SG_TERRASYNC, SG_DEBUG, "Dir created for: '" << absolutePath() );
258         PathList fsChildren = d.children(0);
259         SG_LOG(SG_TERRASYNC, SG_DEBUG, "Dir has children: '" << fsChildren.size() );
260         PathList::const_iterator it = fsChildren.begin();
261
262
263         for (; it != fsChildren.end(); ++it) {
264             SG_LOG(SG_TERRASYNC, SG_DEBUG, "processing child: '" << it->str() << "', file=" << it->file() << ",isDir=" << it->isDir() );
265             ChildInfo info(it->isDir() ? ChildInfo::DirectoryType : ChildInfo::FileType,
266                            it->file(), "");
267             std::string hash = hashForChild(info);
268             SG_LOG(SG_TERRASYNC, SG_DEBUG, "hash is: '" << hash << "'" );
269
270             ChildInfoList::iterator c = findIndexChild(it->file());
271             if (c == children.end()) {
272                 SG_LOG(SG_TERRASYNC, SG_DEBUG, "is orphan '" << it->file() << "'" );
273                 orphans.push_back(it->file());
274             } else if (c->hash != hash) {
275                 SG_LOG(SG_TERRASYNC, SG_DEBUG, "hash mismatch'" << it->file() << "', c->name=" << c->name );
276                 // file exists, but hash mismatch, schedule update
277                 if (!hash.empty()) {
278                     SG_LOG(SG_TERRASYNC, SG_DEBUG, "file exists but hash is wrong for:" << c->name);
279                     SG_LOG(SG_TERRASYNC, SG_DEBUG, "on disk:" << hash << " vs in info:" << c->hash);
280                 }
281
282                 toBeUpdated.push_back(c->name);
283             } else {
284                 // file exists and hash is valid. If it's a directory,
285                 // perform a recursive check.
286                 SG_LOG(SG_TERRASYNC, SG_DEBUG, "file exists hash is good:" << c->name);
287                 if (c->type == ChildInfo::DirectoryType) {
288                     SG_LOG(SG_TERRASYNC, SG_DEBUG, "going recursive for:" << c->name);
289                     SGPath p(relativePath());
290                     p.append(c->name);
291                     HTTPDirectory* childDir = _repository->getOrCreateDirectory(p.str());
292                     childDir->updateChildrenBasedOnHash();
293                 }
294             }
295
296             // remove existing file system children from the index list,
297             // so we can detect new children
298             SG_LOG(SG_TERRASYNC, SG_DEBUG, "looking for name in indexNames:" << c->name);
299             string_list::iterator it = std::find(indexNames.begin(), indexNames.end(), c->name);
300             if (it != indexNames.end()) {
301                 SG_LOG(SG_TERRASYNC, SG_DEBUG, "found name in indexNames, erasing:" << c->name);
302                 indexNames.erase(it);
303             }
304         } // of real children iteration
305
306         // all remaining names in indexChilden are new children
307         toBeUpdated.insert(toBeUpdated.end(), indexNames.begin(), indexNames.end());
308
309         removeOrphans(orphans);
310         scheduleUpdates(toBeUpdated);
311     }
312
313     void removeOrphans(const string_list& orphans)
314     {
315         string_list::const_iterator it;
316         for (it = orphans.begin(); it != orphans.end(); ++it) {
317             removeChild(*it);
318         }
319     }
320
321     string_list indexChildren() const
322     {
323         string_list r;
324         r.reserve(children.size());
325         ChildInfoList::const_iterator it;
326         for (it=children.begin(); it != children.end(); ++it) {
327             r.push_back(it->name);
328         }
329         return r;
330     }
331
332     void scheduleUpdates(const string_list& names)
333     {
334         string_list::const_iterator it;
335         for (it = names.begin(); it != names.end(); ++it) {
336             ChildInfoList::iterator cit = findIndexChild(*it);
337             if (cit == children.end()) {
338                 SG_LOG(SG_TERRASYNC, SG_WARN, "scheduleUpdate, unknown child:" << *it);
339                 continue;
340             }
341
342             if (cit->type == ChildInfo::FileType) {
343                 _repository->updateFile(this, *it, cit->sizeInBytes);
344             } else {
345                 SGPath p(relativePath());
346                 p.append(*it);
347                 HTTPDirectory* childDir = _repository->getOrCreateDirectory(p.str());
348                 _repository->updateDir(childDir, cit->hash, cit->sizeInBytes);
349             }
350         }
351     }
352
353     SGPath absolutePath() const
354     {
355         SGPath r(_repository->basePath);
356         r.append(_relativePath.str());
357         return r;
358     }
359
360     SGPath relativePath() const
361     {
362         return _relativePath;
363     }
364
365     void didUpdateFile(const std::string& file, const std::string& hash, size_t sz)
366     {
367         // check hash matches what we expected
368         ChildInfoList::iterator it = findIndexChild(file);
369         if (it == children.end()) {
370             SG_LOG(SG_TERRASYNC, SG_WARN, "updated file but not found in dir:" << _relativePath << " " << file);
371         } else {
372             SGPath fpath(_relativePath);
373             fpath.append(file);
374
375             if (it->hash != hash) {
376                 _repository->failedToUpdateChild(_relativePath, AbstractRepository::REPO_ERROR_CHECKSUM);
377             } else {
378                 _repository->updatedFileContents(fpath, hash);
379                 _repository->totalDownloaded += sz;
380                 //SG_LOG(SG_TERRASYNC, SG_INFO, "did update:" << fpath);
381             } // of hash matches
382         } // of found in child list
383     }
384
385     void didFailToUpdateFile(const std::string& file,
386                              AbstractRepository::ResultCode status)
387     {
388         SGPath fpath(_relativePath);
389         fpath.append(file);
390         _repository->failedToUpdateChild(fpath, status);
391     }
392 private:
393
394     struct ChildWithName
395     {
396         ChildWithName(const std::string& n) : name(n) {}
397         std::string name;
398
399         bool operator()(const ChildInfo& info) const
400         { return info.name == name; }
401     };
402
403     ChildInfoList::iterator findIndexChild(const std::string& name)
404     {
405         return std::find_if(children.begin(), children.end(), ChildWithName(name));
406     }
407
408     bool parseDirIndex(ChildInfoList& children)
409     {
410         SGPath p(absolutePath());
411         p.append(".dirindex");
412         if (!p.exists()) {
413             return false;
414         }
415
416         std::ifstream indexStream( p.c_str(), std::ios::in );
417
418         if ( !indexStream.is_open() ) {
419             throw sg_io_exception("cannot open dirIndex file", p);
420         }
421
422         while (!indexStream.eof() ) {
423             std::string line;
424             std::getline( indexStream, line );
425             line = simgear::strutils::strip(line);
426
427             // skip blank line or comment beginning with '#'
428             if( line.empty() || line[0] == '#' )
429                 continue;
430
431             string_list tokens = simgear::strutils::split( line, ":" );
432
433             std::string typeData = tokens[0];
434
435             if( typeData == "version" ) {
436                 if( tokens.size() < 2 ) {
437                     SG_LOG(SG_TERRASYNC, SG_WARN, "malformed .dirindex file: missing version number in line '" << line << "'" );
438                     break;
439                 }
440                 if( tokens[1] != "1" ) {
441                     SG_LOG(SG_TERRASYNC, SG_WARN, "invalid .dirindex file: wrong version number '" << tokens[1] << "' (expected 1)" );
442                     break;
443                 }
444             }
445
446             if( typeData == "path" ) {
447                 continue; // ignore path, next line
448             }
449
450             if( tokens.size() < 3 ) {
451                 SG_LOG(SG_TERRASYNC, SG_WARN, "malformed .dirindex file: not enough tokens in line '" << line << "' (ignoring line)" );
452                 continue;
453             }
454
455             if (typeData != "f" && typeData != "d" ) {
456                 SG_LOG(SG_TERRASYNC, SG_WARN, "malformed .dirindex file: invalid type in line '" << line << "', expected 'd' or 'f', (ignoring line)" );
457                 continue;
458             }
459             children.push_back(ChildInfo(typeData == "f" ? ChildInfo::FileType : ChildInfo::DirectoryType, tokens[1], tokens[2]));
460
461             if (tokens.size() > 3) {
462                 children.back().setSize(tokens[3]);
463             }
464         }
465
466         return true;
467     }
468
469     void removeChild(const std::string& name)
470     {
471         SGPath p(absolutePath());
472         p.append(name);
473         bool ok;
474
475         SGPath fpath(_relativePath);
476         fpath.append(name);
477
478         if (p.isDir()) {
479             ok = _repository->deleteDirectory(fpath.str());
480         } else {
481             // remove the hash cache entry
482             _repository->updatedFileContents(fpath, std::string());
483             ok = p.remove();
484         }
485
486         if (!ok) {
487             SG_LOG(SG_TERRASYNC, SG_WARN, "removal failed for:" << p);
488             throw sg_io_exception("Failed to remove existing file/dir:", p);
489         }
490     }
491
492     std::string hashForChild(const ChildInfo& child) const
493     {
494         SGPath p(absolutePath());
495         p.append(child.name);
496         if (child.type == ChildInfo::DirectoryType) {
497             p.append(".dirindex");
498         }
499         return _repository->hashForPath(p);
500     }
501
502   HTTPRepoPrivate* _repository;
503   SGPath _relativePath; // in URL and file-system space
504
505
506 };
507
508 HTTPRepository::HTTPRepository(const SGPath& base, HTTP::Client *cl) :
509     _d(new HTTPRepoPrivate(this))
510 {
511     _d->http = cl;
512     _d->basePath = base;
513     _d->rootDir = new HTTPDirectory(_d.get(), "");
514     _d->parseHashCache();
515 }
516
517 HTTPRepository::~HTTPRepository()
518 {
519 }
520
521 void HTTPRepository::setBaseUrl(const std::string &url)
522 {
523   _d->baseUrl = url;
524 }
525
526 std::string HTTPRepository::baseUrl() const
527 {
528   return _d->baseUrl;
529 }
530
531 HTTP::Client* HTTPRepository::http() const
532 {
533   return _d->http;
534 }
535
536 SGPath HTTPRepository::fsBase() const
537 {
538   return SGPath();
539 }
540
541 void HTTPRepository::update()
542 {
543     if (_d->isUpdating) {
544         return;
545     }
546
547     _d->status = REPO_NO_ERROR;
548     _d->isUpdating = true;
549     _d->failures.clear();
550     _d->updateDir(_d->rootDir, std::string(), 0);
551 }
552
553 bool HTTPRepository::isDoingSync() const
554 {
555     if (_d->status != REPO_NO_ERROR) {
556         return false;
557     }
558
559     return _d->isUpdating;
560 }
561
562 size_t HTTPRepository::bytesToDownload() const
563 {
564     size_t result = 0;
565
566     HTTPRepoPrivate::RequestVector::const_iterator r;
567     for (r = _d->queuedRequests.begin(); r != _d->queuedRequests.end(); ++r) {
568         result += (*r)->contentSize();
569     }
570
571     for (r = _d->activeRequests.begin(); r != _d->activeRequests.end(); ++r) {
572         result += (*r)->contentSize() - (*r)->responseBytesReceived();
573     }
574
575     return result;
576 }
577
578 size_t HTTPRepository::bytesDownloaded() const
579 {
580     size_t result = _d->totalDownloaded;
581
582     HTTPRepoPrivate::RequestVector::const_iterator r;
583     for (r = _d->activeRequests.begin(); r != _d->activeRequests.end(); ++r) {
584         result += (*r)->responseBytesReceived();
585     }
586
587     return result;
588 }
589
590 AbstractRepository::ResultCode
591 HTTPRepository::failure() const
592 {
593     if ((_d->status == REPO_NO_ERROR) && !_d->failures.empty()) {
594         return REPO_PARTIAL_UPDATE;
595     }
596
597     return _d->status;
598 }
599
600     void HTTPRepoGetRequest::cancel()
601     {
602         _directory->repository()->http->cancelRequest(this, "Reposiotry cancelled");
603         _directory = 0;
604     }
605
606     class FileGetRequest : public HTTPRepoGetRequest
607     {
608     public:
609         FileGetRequest(HTTPDirectory* d, const std::string& file) :
610             HTTPRepoGetRequest(d, makeUrl(d, file)),
611             fileName(file)
612         {
613             pathInRepo = _directory->absolutePath();
614             pathInRepo.append(fileName);
615             //SG_LOG(SG_TERRASYNC, SG_INFO, "will GET file " << url());
616         }
617
618     protected:
619         virtual void gotBodyData(const char* s, int n)
620         {
621             if (!file.get()) {
622                 file.reset(new SGFile(pathInRepo.str()));
623                 if (!file->open(SG_IO_OUT)) {
624                   SG_LOG(SG_TERRASYNC, SG_WARN, "unable to create file " << pathInRepo);
625                   _directory->repository()->http->cancelRequest(this, "Unable to create output file");
626                 }
627
628                 sha1_init(&hashContext);
629             }
630
631             sha1_write(&hashContext, s, n);
632             file->write(s, n);
633         }
634
635         virtual void onDone()
636         {
637             file->close();
638             if (responseCode() == 200) {
639                 std::string hash = strutils::encodeHex(sha1_result(&hashContext), HASH_LENGTH);
640                 _directory->didUpdateFile(fileName, hash, contentSize());
641                 //SG_LOG(SG_TERRASYNC, SG_INFO, "got file " << fileName << " in " << _directory->absolutePath());
642             } else if (responseCode() == 404) {
643                 _directory->didFailToUpdateFile(fileName, AbstractRepository::REPO_ERROR_FILE_NOT_FOUND);
644             } else {
645                 _directory->didFailToUpdateFile(fileName, AbstractRepository::REPO_ERROR_HTTP);
646             }
647
648             _directory->repository()->finishedRequest(this);
649         }
650
651         virtual void onFail()
652         {
653             file.reset();
654             if (pathInRepo.exists()) {
655                 pathInRepo.remove();
656             }
657             
658             if (_directory) {
659                 _directory->didFailToUpdateFile(fileName, AbstractRepository::REPO_ERROR_SOCKET);
660                 _directory->repository()->finishedRequest(this);
661             }
662         }
663     private:
664         static std::string makeUrl(HTTPDirectory* d, const std::string& file)
665         {
666             return d->url() + "/" + file;
667         }
668
669         std::string fileName; // if empty, we're getting the directory itself
670         SGPath pathInRepo;
671         simgear::sha1nfo hashContext;
672         std::auto_ptr<SGFile> file;
673     };
674
675     class DirGetRequest : public HTTPRepoGetRequest
676     {
677     public:
678         DirGetRequest(HTTPDirectory* d, const std::string& targetHash) :
679             HTTPRepoGetRequest(d, makeUrl(d)),
680             _isRootDir(false),
681             _targetHash(targetHash)
682         {
683             sha1_init(&hashContext);
684            //SG_LOG(SG_TERRASYNC, SG_INFO, "will GET dir " << url());
685         }
686
687         void setIsRootDir()
688         {
689             _isRootDir = true;
690         }
691
692         bool isRootDir() const
693         {
694             return _isRootDir;
695         }
696
697     protected:
698         virtual void gotBodyData(const char* s, int n)
699         {
700             body += std::string(s, n);
701             sha1_write(&hashContext, s, n);
702         }
703
704         virtual void onDone()
705         {
706             if (responseCode() == 200) {
707                 std::string hash = strutils::encodeHex(sha1_result(&hashContext), HASH_LENGTH);
708                 if (!_targetHash.empty() && (hash != _targetHash)) {
709                     _directory->failedToUpdate(AbstractRepository::REPO_ERROR_CHECKSUM);
710                     _directory->repository()->finishedRequest(this);
711                     return;
712                 }
713
714                 std::string curHash = _directory->repository()->hashForPath(path());
715                 if (hash != curHash) {
716                     simgear::Dir d(_directory->absolutePath());
717                     if (!d.exists()) {
718                         if (!d.create(0700)) {
719                             throw sg_io_exception("Unable to create directory", d.path());
720                         }
721                     }
722
723                     // dir index data has changed, so write to disk and update
724                     // the hash accordingly
725                     std::ofstream of(pathInRepo().c_str(), std::ios::trunc | std::ios::out);
726                     if (!of.is_open()) {
727                         throw sg_io_exception("Failed to open directory index file for writing", pathInRepo().c_str());
728                     }
729
730                     of.write(body.data(), body.size());
731                     of.close();
732                     _directory->dirIndexUpdated(hash);
733
734                     //SG_LOG(SG_TERRASYNC, SG_INFO, "updated dir index " << _directory->absolutePath());
735                 }
736
737                 _directory->repository()->totalDownloaded += contentSize();
738
739                 try {
740                     // either way we've confirmed the index is valid so update
741                     // children now
742                     SGTimeStamp st;
743                     st.stamp();
744                     _directory->updateChildrenBasedOnHash();
745                     SG_LOG(SG_TERRASYNC, SG_INFO, "after update of:" << _directory->absolutePath() << " child update took:" << st.elapsedMSec());
746                 } catch (sg_exception& ) {
747                     _directory->failedToUpdate(AbstractRepository::REPO_ERROR_IO);
748                 }
749             } else if (responseCode() == 404) {
750                 _directory->failedToUpdate(AbstractRepository::REPO_ERROR_FILE_NOT_FOUND);
751             } else {
752                 _directory->failedToUpdate(AbstractRepository::REPO_ERROR_HTTP);
753             }
754
755             _directory->repository()->finishedRequest(this);
756         }
757
758         virtual void onFail()
759         {
760             if (_directory) {
761                 _directory->failedToUpdate(AbstractRepository::REPO_ERROR_SOCKET);
762                 _directory->repository()->finishedRequest(this);
763             }
764         }
765     private:
766         static std::string makeUrl(HTTPDirectory* d)
767         {
768             return d->url() + "/.dirindex";
769         }
770
771         SGPath pathInRepo() const
772         {
773             SGPath p(_directory->absolutePath());
774             p.append(".dirindex");
775             return p;
776         }
777
778         simgear::sha1nfo hashContext;
779         std::string body;
780         bool _isRootDir; ///< is this the repository root?
781         std::string _targetHash;
782     };
783
784     HTTPRepoPrivate::~HTTPRepoPrivate()
785     {
786         DirectoryVector::iterator it;
787         for (it=directories.begin(); it != directories.end(); ++it) {
788             delete *it;
789         }
790
791         RequestVector::iterator r;
792         for (r=activeRequests.begin(); r != activeRequests.end(); ++r) {
793             (*r)->cancel();
794         }
795     }
796
797     HTTP::Request_ptr HTTPRepoPrivate::updateFile(HTTPDirectory* dir, const std::string& name, size_t sz)
798     {
799         RepoRequestPtr r(new FileGetRequest(dir, name));
800         r->setContentSize(sz);
801         makeRequest(r);
802         return r;
803     }
804
805     HTTP::Request_ptr HTTPRepoPrivate::updateDir(HTTPDirectory* dir, const std::string& hash, size_t sz)
806     {
807         RepoRequestPtr r(new DirGetRequest(dir, hash));
808         r->setContentSize(sz);
809         makeRequest(r);
810         return r;
811     }
812
813
814     class HashEntryWithPath
815     {
816     public:
817         HashEntryWithPath(const std::string& p) : path(p) {}
818         bool operator()(const HTTPRepoPrivate::HashCacheEntry& entry) const
819         { return entry.filePath == path; }
820     private:
821         std::string path;
822     };
823
824     std::string HTTPRepoPrivate::hashForPath(const SGPath& p)
825     {
826         HashCache::iterator it = std::find_if(hashes.begin(), hashes.end(), HashEntryWithPath(p.str()));
827         if (it != hashes.end()) {
828             // ensure data on disk hasn't changed.
829             // we could also use the file type here if we were paranoid
830             if ((p.sizeInBytes() == it->lengthBytes) && (p.modTime() == it->modTime)) {
831                 return it->hashHex;
832             }
833
834             // entry in the cache, but it's stale so remove and fall through
835             hashes.erase(it);
836         }
837
838         std::string hash = computeHashForPath(p);
839         updatedFileContents(p, hash);
840         return hash;
841     }
842
843     std::string HTTPRepoPrivate::computeHashForPath(const SGPath& p)
844     {
845         if (!p.exists())
846             return std::string();
847         sha1nfo info;
848         sha1_init(&info);
849         char* buf = static_cast<char*>(malloc(1024 * 1024));
850         size_t readLen;
851         SGFile f(p.str());
852         if (!f.open(SG_IO_IN)) {
853             throw sg_io_exception("Couldn't open file for compute hash", p);
854         }
855         while ((readLen = f.read(buf, 1024 * 1024)) > 0) {
856             sha1_write(&info, buf, readLen);
857         }
858
859         f.close();
860         free(buf);
861         std::string hashBytes((char*) sha1_result(&info), HASH_LENGTH);
862         return strutils::encodeHex(hashBytes);
863     }
864
865     void HTTPRepoPrivate::updatedFileContents(const SGPath& p, const std::string& newHash)
866     {
867         // remove the existing entry
868         HashCache::iterator it = std::find_if(hashes.begin(), hashes.end(), HashEntryWithPath(p.str()));
869         if (it != hashes.end()) {
870             hashes.erase(it);
871             hashCacheDirty = true;
872         }
873
874         if (newHash.empty()) {
875             return; // we're done
876         }
877
878         // use a cloned SGPath and reset its caching to force one stat() call
879         SGPath p2(p);
880         p2.set_cached(false);
881         p2.set_cached(true);
882
883         HashCacheEntry entry;
884         entry.filePath = p.str();
885         entry.hashHex = newHash;
886         entry.modTime = p2.modTime();
887         entry.lengthBytes = p2.sizeInBytes();
888         hashes.push_back(entry);
889
890         hashCacheDirty = true;
891     }
892
893     void HTTPRepoPrivate::writeHashCache()
894     {
895         if (!hashCacheDirty) {
896             return;
897         }
898
899         SGPath cachePath = basePath;
900         cachePath.append(".hashes");
901
902         std::ofstream stream(cachePath.c_str(),std::ios::out | std::ios::trunc);
903         HashCache::const_iterator it;
904         for (it = hashes.begin(); it != hashes.end(); ++it) {
905             stream << it->filePath << ":" << it->modTime << ":"
906             << it->lengthBytes << ":" << it->hashHex << "\n";
907         }
908         stream.close();
909         hashCacheDirty = false;
910     }
911
912     void HTTPRepoPrivate::parseHashCache()
913     {
914         hashes.clear();
915         SGPath cachePath = basePath;
916         cachePath.append(".hashes");
917         if (!cachePath.exists()) {
918             return;
919         }
920
921         std::ifstream stream(cachePath.c_str(), std::ios::in);
922
923         while (!stream.eof()) {
924             std::string line;
925             std::getline(stream,line);
926             line = simgear::strutils::strip(line);
927             if( line.empty() || line[0] == '#' )
928                 continue;
929
930             string_list tokens = simgear::strutils::split( line, ":" );
931             if( tokens.size() < 4 ) {
932                 SG_LOG(SG_TERRASYNC, SG_WARN, "invalid entry in '" << cachePath.str() << "': '" << line << "' (ignoring line)");
933                 continue;
934             }
935             const std::string nameData = simgear::strutils::strip(tokens[0]);
936             const std::string timeData = simgear::strutils::strip(tokens[1]);
937             const std::string sizeData = simgear::strutils::strip(tokens[2]);
938             const std::string hashData = simgear::strutils::strip(tokens[3]);
939
940             if (nameData.empty() || timeData.empty() || sizeData.empty() || hashData.empty() ) {
941                 SG_LOG(SG_TERRASYNC, SG_WARN, "invalid entry in '" << cachePath.str() << "': '" << line << "' (ignoring line)");
942                 continue;
943             }
944
945             HashCacheEntry entry;
946             entry.filePath = nameData;
947             entry.hashHex = hashData;
948             entry.modTime = strtol(timeData.c_str(), NULL, 10);
949             entry.lengthBytes = strtol(sizeData.c_str(), NULL, 10);
950             hashes.push_back(entry);
951         }
952     }
953
954     class DirectoryWithPath
955     {
956     public:
957         DirectoryWithPath(const std::string& p) : path(p) {}
958         bool operator()(const HTTPDirectory* entry) const
959         { return entry->relativePath().str() == path; }
960     private:
961         std::string path;
962     };
963
964     HTTPDirectory* HTTPRepoPrivate::getOrCreateDirectory(const std::string& path)
965     {
966         DirectoryWithPath p(path);
967         DirectoryVector::iterator it = std::find_if(directories.begin(), directories.end(), p);
968         if (it != directories.end()) {
969             return *it;
970         }
971
972         HTTPDirectory* d = new HTTPDirectory(this, path);
973         directories.push_back(d);
974         return d;
975     }
976
977     bool HTTPRepoPrivate::deleteDirectory(const std::string& path)
978     {
979         DirectoryWithPath p(path);
980         DirectoryVector::iterator it = std::find_if(directories.begin(), directories.end(), p);
981         if (it != directories.end()) {
982             HTTPDirectory* d = *it;
983             directories.erase(it);
984             Dir dir(d->absolutePath());
985             bool result = dir.remove(true);
986             delete d;
987
988             // update the hash cache too
989             updatedFileContents(path, std::string());
990
991             return result;
992         }
993
994         return false;
995     }
996
997     void HTTPRepoPrivate::makeRequest(RepoRequestPtr req)
998     {
999         if (activeRequests.size() > 4) {
1000             queuedRequests.push_back(req);
1001         } else {
1002             activeRequests.push_back(req);
1003             http->makeRequest(req);
1004         }
1005     }
1006
1007     void HTTPRepoPrivate::finishedRequest(const RepoRequestPtr& req)
1008     {
1009         RequestVector::iterator it = std::find(activeRequests.begin(), activeRequests.end(), req);
1010         if (it == activeRequests.end()) {
1011             throw sg_exception("lost request somehow", req->url());
1012         }
1013         activeRequests.erase(it);
1014
1015         if (!queuedRequests.empty()) {
1016             RepoRequestPtr rr = queuedRequests.front();
1017             queuedRequests.erase(queuedRequests.begin());
1018             activeRequests.push_back(rr);
1019             http->makeRequest(rr);
1020         }
1021
1022         writeHashCache();
1023
1024         if (activeRequests.empty() && queuedRequests.empty()) {
1025             isUpdating = false;
1026         }
1027     }
1028
1029     void HTTPRepoPrivate::failedToGetRootIndex(AbstractRepository::ResultCode st)
1030     {
1031         SG_LOG(SG_TERRASYNC, SG_WARN, "Failed to get root of repo:" << baseUrl);
1032         status = st;
1033     }
1034
1035     void HTTPRepoPrivate::failedToUpdateChild(const SGPath& relativePath,
1036                                               AbstractRepository::ResultCode fileStatus)
1037     {
1038         Failure f;
1039         f.path = relativePath;
1040         f.error = fileStatus;
1041         failures.push_back(f);
1042
1043         SG_LOG(SG_TERRASYNC, SG_WARN, "failed to update entry:" << relativePath << " code:" << fileStatus);
1044     }
1045
1046
1047
1048 } // of namespace simgear