]> git.mxchange.org Git - simgear.git/blobdiff - simgear/io/HTTPRepository.cxx
HTTP: Always use absolute paths for hashes
[simgear.git] / simgear / io / HTTPRepository.cxx
index 477232eacbe55bb6c7f12c0d1503722c9557105c..05635dac4c62ac15c5710d49aebd53c408fa4f4c 100644 (file)
@@ -21,7 +21,6 @@
 #include <simgear_config.h>
 
 #include <iostream>
-#include <cstring>
 #include <cassert>
 #include <algorithm>
 #include <sstream>
 #include <simgear/io/sg_file.hxx>
 #include <simgear/misc/sgstream.hxx>
 #include <simgear/structure/exception.hxx>
+#include <simgear/timing/timestamp.hxx>
 
 #include <simgear/misc/sg_hash.hxx>
 
-#if defined(SG_WINDOWS)
-
-/*
- * public domain strtok_r() by Charlie Gordon
- *
- *   from comp.lang.c  9/14/2007
- *
- *      http://groups.google.com/group/comp.lang.c/msg/2ab1ecbb86646684
- *
- *     (Declaration that it's public domain):
- *      http://groups.google.com/group/comp.lang.c/msg/7c7b39328fefab9c
- */
-
-char* strtok_r(
-    char *str,
-    const char *delim,
-    char **nextp)
-{
-    char *ret;
-
-    if (str == NULL)
-    {
-        str = *nextp;
-    }
-
-    str += strspn(str, delim);
-
-    if (*str == '\0')
-    {
-        return NULL;
-    }
-
-    ret = str;
-
-    str += strcspn(str, delim);
-
-    if (*str)
-    {
-        *str++ = '\0';
-    }
-
-    *nextp = str;
-
-    return ret;
-}
-#endif
-
 namespace simgear
 {
 
@@ -104,13 +57,18 @@ namespace simgear
         {
         }
 
-        virtual void cancel()
+        size_t contentSize() const
         {
-            _directory = 0;
-            abort("Repository cancelled request");
+            return _contentSize;
+        }
+
+        void setContentSize(size_t sz)
+        {
+            _contentSize = sz;
         }
     protected:
         HTTPDirectory* _directory;
+        size_t _contentSize;
     };
 
     typedef SGSharedPtr<HTTPRepoGetRequest> RepoRequestPtr;
@@ -129,20 +87,24 @@ public:
 
     typedef std::vector<HashCacheEntry> HashCache;
     HashCache hashes;
+    bool hashCacheDirty;
 
     struct Failure
     {
         SGPath path;
-        AbstractRepository::ResultCode error;
+        HTTPRepository::ResultCode error;
     };
 
     typedef std::vector<Failure> FailureList;
     FailureList failures;
 
     HTTPRepoPrivate(HTTPRepository* parent) :
-    p(parent),
-    isUpdating(false),
-    status(AbstractRepository::REPO_NO_ERROR)
+        hashCacheDirty(false),
+        p(parent),
+        isUpdating(false),
+        updateEverything(false),
+        status(HTTPRepository::REPO_NO_ERROR),
+        totalDownloaded(0)
     { ; }
 
     ~HTTPRepoPrivate();
@@ -152,11 +114,18 @@ public:
     std::string baseUrl;
     SGPath basePath;
     bool isUpdating;
-    AbstractRepository::ResultCode status;
+    bool updateEverything;
+    string_list updatePaths;
+    HTTPRepository::ResultCode status;
     HTTPDirectory* rootDir;
+    size_t totalDownloaded;
+
+    void updateWaiting();
 
-    HTTP::Request_ptr updateFile(HTTPDirectory* dir, const std::string& name);
-    HTTP::Request_ptr updateDir(HTTPDirectory* dir, const std::string& hash);
+    HTTP::Request_ptr updateFile(HTTPDirectory* dir, const std::string& name,
+                                 size_t sz);
+    HTTP::Request_ptr updateDir(HTTPDirectory* dir, const std::string& hash,
+                                size_t sz);
 
     std::string hashForPath(const SGPath& p);
     void updatedFileContents(const SGPath& p, const std::string& newHash);
@@ -164,13 +133,15 @@ public:
     std::string computeHashForPath(const SGPath& p);
     void writeHashCache();
 
-    void failedToGetRootIndex(AbstractRepository::ResultCode st);
+    void failedToGetRootIndex(HTTPRepository::ResultCode st);
     void failedToUpdateChild(const SGPath& relativePath,
-                             AbstractRepository::ResultCode fileStatus);
+                             HTTPRepository::ResultCode fileStatus);
 
     typedef std::vector<RepoRequestPtr> RequestVector;
-    RequestVector requests;
+    RequestVector queuedRequests,
+        activeRequests;
 
+    void makeRequest(RepoRequestPtr req);
     void finishedRequest(const RepoRequestPtr& req);
 
     HTTPDirectory* getOrCreateDirectory(const std::string& path);
@@ -191,10 +162,10 @@ class HTTPDirectory
             DirectoryType
         };
 
-        ChildInfo(Type ty, const char* nameData, const char* hashData) :
+        ChildInfo(Type ty, const std::string & nameData, const std::string & hashData) :
             type(ty),
             name(nameData),
-            hash(hashData ? hashData : ""),
+            hash(hashData),
             sizeInBytes(0)
         {
         }
@@ -206,9 +177,9 @@ class HTTPDirectory
             sizeInBytes(other.sizeInBytes)
         { }
 
-        void setSize(const char* sizeData)
+        void setSize(const std::string & sizeData)
         {
-            sizeInBytes = ::strtol(sizeData, NULL, 10);
+            sizeInBytes = ::strtol(sizeData.c_str(), NULL, 10);
         }
 
         bool operator<(const ChildInfo& other) const
@@ -224,10 +195,12 @@ class HTTPDirectory
     typedef std::vector<ChildInfo> ChildInfoList;
     ChildInfoList children;
 
+
 public:
     HTTPDirectory(HTTPRepoPrivate* repo, const std::string& path) :
         _repository(repo),
-        _relativePath(path)
+        _relativePath(path),
+        _state(DoNotUpdate)
   {
       assert(repo);
 
@@ -235,9 +208,9 @@ public:
       if (p.exists()) {
           try {
               // already exists on disk
-              bool ok = parseDirIndex(children);
+              parseDirIndex(children);
               std::sort(children.begin(), children.end());
-          } catch (sg_exception& e) {
+          } catch (sg_exception& ) {
               // parsing cache failed
               children.clear();
           }
@@ -260,17 +233,20 @@ public:
 
     void dirIndexUpdated(const std::string& hash)
     {
-        SGPath fpath(_relativePath);
+        SGPath fpath(absolutePath());
         fpath.append(".dirindex");
         _repository->updatedFileContents(fpath, hash);
 
+        _state = Updated;
+
         children.clear();
         parseDirIndex(children);
         std::sort(children.begin(), children.end());
     }
 
-    void failedToUpdate(AbstractRepository::ResultCode status)
+    void failedToUpdate(HTTPRepository::ResultCode status)
     {
+        _state = UpdateFailed;
         if (_relativePath.isNull()) {
             // root dir failed
             _repository->failedToGetRootIndex(status);
@@ -281,7 +257,11 @@ public:
 
     void updateChildrenBasedOnHash()
     {
-        //SG_LOG(SG_TERRASYNC, SG_DEBUG, "updated children for:" << relativePath());
+        // if we got here for a dir which is still updating or excluded
+        // from updates, just bail out right now.
+        if (_state != Updated) {
+            return;
+        }
 
         string_list indexNames = indexChildren(),
             toBeUpdated, orphans;
@@ -289,39 +269,44 @@ public:
         PathList fsChildren = d.children(0);
         PathList::const_iterator it = fsChildren.begin();
 
+
         for (; it != fsChildren.end(); ++it) {
             ChildInfo info(it->isDir() ? ChildInfo::DirectoryType : ChildInfo::FileType,
-                           it->file().c_str(), NULL);
+                           it->file(), "");
             std::string hash = hashForChild(info);
 
             ChildInfoList::iterator c = findIndexChild(it->file());
             if (c == children.end()) {
+                SG_LOG(SG_TERRASYNC, SG_DEBUG, "is orphan '" << it->file() << "'" );
                 orphans.push_back(it->file());
             } else if (c->hash != hash) {
+                SG_LOG(SG_TERRASYNC, SG_DEBUG, "hash mismatch'" << it->file() );
                 // file exists, but hash mismatch, schedule update
                 if (!hash.empty()) {
-                    //SG_LOG(SG_TERRASYNC, SG_INFO, "file exists but hash is wrong for:" << c->name);
-                    //SG_LOG(SG_TERRASYNC, SG_INFO, "on disk:" << hash << " vs in info:" << c->hash);
+                    SG_LOG(SG_TERRASYNC, SG_DEBUG, "file exists but hash is wrong for:" << it->file() );
+                    SG_LOG(SG_TERRASYNC, SG_DEBUG, "on disk:" << hash << " vs in info:" << c->hash);
                 }
 
-                toBeUpdated.push_back(c->name);
+                toBeUpdated.push_back(it->file() );
             } else {
                 // file exists and hash is valid. If it's a directory,
                 // perform a recursive check.
+                SG_LOG(SG_TERRASYNC, SG_DEBUG, "file exists hash is good:" << it->file() );
                 if (c->type == ChildInfo::DirectoryType) {
                     SGPath p(relativePath());
-                    p.append(c->name);
+                    p.append(it->file());
                     HTTPDirectory* childDir = _repository->getOrCreateDirectory(p.str());
+                    if (childDir->_state == NotUpdated) {
+                        childDir->_state = Updated;
+                    }
                     childDir->updateChildrenBasedOnHash();
                 }
             }
 
             // remove existing file system children from the index list,
             // so we can detect new children
-            string_list::iterator it = std::find(indexNames.begin(), indexNames.end(), c->name);
-            if (it != indexNames.end()) {
-                indexNames.erase(it);
-            }
+            // https://en.wikibooks.org/wiki/More_C%2B%2B_Idioms/Erase-Remove
+            indexNames.erase(std::remove(indexNames.begin(), indexNames.end(), it->file()), indexNames.end());
         } // of real children iteration
 
         // all remaining names in indexChilden are new children
@@ -331,6 +316,101 @@ public:
         scheduleUpdates(toBeUpdated);
     }
 
+    void markAsUpToDate()
+    {
+        _state = Updated;
+    }
+
+    void markAsUpdating()
+    {
+        assert(_state == NotUpdated);
+        _state = HTTPDirectory::UpdateInProgress;
+    }
+
+    void markAsEnabled()
+    {
+        // assert because this should only get invoked on newly created
+        // directory objects which are inside the sub-tree(s) to be updated
+        assert(_state == DoNotUpdate);
+        _state = NotUpdated;
+    }
+
+    void markSubtreeAsNeedingUpdate()
+    {
+        if (_state == Updated) {
+            _state = NotUpdated; // reset back to not-updated
+        }
+
+        ChildInfoList::iterator cit;
+        for (cit = children.begin(); cit != children.end(); ++cit) {
+            if (cit->type == ChildInfo::DirectoryType) {
+                SGPath p(relativePath());
+                p.append(cit->name);
+                HTTPDirectory* childDir = _repository->getOrCreateDirectory(p.str());
+                childDir->markSubtreeAsNeedingUpdate();
+            }
+        } // of child iteration
+    }
+
+    void markSubtreeAsEnabled()
+    {
+        if (_state == DoNotUpdate) {
+            markAsEnabled();
+        }
+
+        ChildInfoList::iterator cit;
+        for (cit = children.begin(); cit != children.end(); ++cit) {
+            if (cit->type == ChildInfo::DirectoryType) {
+                SGPath p(relativePath());
+                p.append(cit->name);
+                HTTPDirectory* childDir = _repository->getOrCreateDirectory(p.str());
+                childDir->markSubtreeAsEnabled();
+            }
+        } // of child iteration
+    }
+
+
+    void markAncestorChainAsEnabled()
+    {
+        if (_state == DoNotUpdate) {
+            markAsEnabled();
+        }
+
+        if (_relativePath.isNull()) {
+            return;
+        }
+
+        std::string prPath = _relativePath.dir();
+        if (prPath.empty()) {
+            _repository->rootDir->markAncestorChainAsEnabled();
+        } else {
+            HTTPDirectory* prDir = _repository->getOrCreateDirectory(prPath);
+            prDir->markAncestorChainAsEnabled();
+        }
+    }
+
+    void updateIfWaiting(const std::string& hash, size_t sz)
+    {
+        if (_state == NotUpdated) {
+            _repository->updateDir(this, hash, sz);
+            return;
+        }
+
+        if ((_state == DoNotUpdate) || (_state == UpdateInProgress)) {
+            return;
+        }
+
+        ChildInfoList::iterator cit;
+        for (cit = children.begin(); cit != children.end(); ++cit) {
+            if (cit->type == ChildInfo::DirectoryType) {
+                SGPath p(relativePath());
+                p.append(cit->name);
+                HTTPDirectory* childDir = _repository->getOrCreateDirectory(p.str());
+                childDir->updateIfWaiting(cit->hash, cit->sizeInBytes);
+            }
+        } // of child iteration
+    }
+
     void removeOrphans(const string_list& orphans)
     {
         string_list::const_iterator it;
@@ -360,13 +440,19 @@ public:
                 continue;
             }
 
+            SG_LOG(SG_TERRASYNC,SG_DEBUG, "scheduling update for " << *it );
             if (cit->type == ChildInfo::FileType) {
-                _repository->updateFile(this, *it);
+                _repository->updateFile(this, *it, cit->sizeInBytes);
             } else {
                 SGPath p(relativePath());
                 p.append(*it);
                 HTTPDirectory* childDir = _repository->getOrCreateDirectory(p.str());
-                _repository->updateDir(childDir, cit->hash);
+                if (childDir->_state == DoNotUpdate) {
+                    SG_LOG(SG_TERRASYNC, SG_WARN, "scheduleUpdate, child:" << *it << " is marked do not update so skipping");
+                    continue;
+                }
+
+                _repository->updateDir(childDir, cit->hash, cit->sizeInBytes);
             }
         }
     }
@@ -383,27 +469,29 @@ public:
         return _relativePath;
     }
 
-    void didUpdateFile(const std::string& file, const std::string& hash)
+    void didUpdateFile(const std::string& file, const std::string& hash, size_t sz)
     {
         // check hash matches what we expected
         ChildInfoList::iterator it = findIndexChild(file);
         if (it == children.end()) {
             SG_LOG(SG_TERRASYNC, SG_WARN, "updated file but not found in dir:" << _relativePath << " " << file);
         } else {
-            SGPath fpath(_relativePath);
+            SGPath fpath(absolutePath());
             fpath.append(file);
 
             if (it->hash != hash) {
-                _repository->failedToUpdateChild(_relativePath, AbstractRepository::REPO_ERROR_CHECKSUM);
+                // we don't erase the file on a hash mismatch, becuase if we're syncing during the
+                // middle of a server-side update, the downloaded file may actually become valid.
+                _repository->failedToUpdateChild(_relativePath, HTTPRepository::REPO_ERROR_CHECKSUM);
             } else {
                 _repository->updatedFileContents(fpath, hash);
-                //SG_LOG(SG_TERRASYNC, SG_INFO, "did update:" << fpath);
+                _repository->totalDownloaded += sz;
             } // of hash matches
         } // of found in child list
     }
 
     void didFailToUpdateFile(const std::string& file,
-                             AbstractRepository::ResultCode status)
+                             HTTPRepository::ResultCode status)
     {
         SGPath fpath(_relativePath);
         fpath.append(file);
@@ -439,42 +527,48 @@ private:
             throw sg_io_exception("cannot open dirIndex file", p);
         }
 
-        char lineBuffer[512];
-        char* lastToken;
-
         while (!indexStream.eof() ) {
-            indexStream.getline(lineBuffer, 512);
-            lastToken = 0;
-            char* typeData = ::strtok_r(lineBuffer, ":", &lastToken);
-            if (!typeData) {
-                continue; // skip blank line
+            std::string line;
+            std::getline( indexStream, line );
+            line = simgear::strutils::strip(line);
+
+            // skip blank line or comment beginning with '#'
+            if( line.empty() || line[0] == '#' )
+                continue;
+
+            string_list tokens = simgear::strutils::split( line, ":" );
+
+            std::string typeData = tokens[0];
+
+            if( typeData == "version" ) {
+                if( tokens.size() < 2 ) {
+                    SG_LOG(SG_TERRASYNC, SG_WARN, "malformed .dirindex file: missing version number in line '" << line << "'" );
+                    break;
+                }
+                if( tokens[1] != "1" ) {
+                    SG_LOG(SG_TERRASYNC, SG_WARN, "invalid .dirindex file: wrong version number '" << tokens[1] << "' (expected 1)" );
+                    break;
+                }
+                continue; // version is good, continue
             }
 
-            if (!typeData) {
-                // malformed entry
-                throw sg_io_exception("Malformed dir index file", p);
+            if( typeData == "path" ) {
+                continue; // ignore path, next line
             }
 
-            if (!strcmp(typeData, "version")) {
-                continue;
-            } else if (!strcmp(typeData, "path")) {
+            if( tokens.size() < 3 ) {
+                SG_LOG(SG_TERRASYNC, SG_WARN, "malformed .dirindex file: not enough tokens in line '" << line << "' (ignoring line)" );
                 continue;
             }
 
-            char* nameData = ::strtok_r(NULL, ":", &lastToken);
-            char* hashData = ::strtok_r(NULL, ":", &lastToken);
-            char* sizeData = ::strtok_r(NULL, ":", &lastToken);
-
-            if (typeData[0] == 'f') {
-                children.push_back(ChildInfo(ChildInfo::FileType, nameData, hashData));
-            } else if (typeData[0] == 'd') {
-                children.push_back(ChildInfo(ChildInfo::DirectoryType, nameData, hashData));
-            } else {
-                throw sg_io_exception("Malformed line code in dir index file", p);
+            if (typeData != "f" && typeData != "d" ) {
+                SG_LOG(SG_TERRASYNC, SG_WARN, "malformed .dirindex file: invalid type in line '" << line << "', expected 'd' or 'f', (ignoring line)" );
+                continue;
             }
+            children.push_back(ChildInfo(typeData == "f" ? ChildInfo::FileType : ChildInfo::DirectoryType, tokens[1], tokens[2]));
 
-            if (sizeData) {
-                children.back().setSize(sizeData);
+            if (tokens.size() > 3) {
+                children.back().setSize(tokens[3]);
             }
         }
 
@@ -494,7 +588,7 @@ private:
             ok = _repository->deleteDirectory(fpath.str());
         } else {
             // remove the hash cache entry
-            _repository->updatedFileContents(fpath, std::string());
+            _repository->updatedFileContents(p, std::string());
             ok = p.remove();
         }
 
@@ -517,7 +611,16 @@ private:
   HTTPRepoPrivate* _repository;
   SGPath _relativePath; // in URL and file-system space
 
-
+    typedef enum
+    {
+        NotUpdated,
+        UpdateInProgress,
+        Updated,
+        UpdateFailed,
+        DoNotUpdate
+    } State;
+
+    State _state;
 };
 
 HTTPRepository::HTTPRepository(const SGPath& base, HTTP::Client *cl) :
@@ -526,6 +629,7 @@ HTTPRepository::HTTPRepository(const SGPath& base, HTTP::Client *cl) :
     _d->http = cl;
     _d->basePath = base;
     _d->rootDir = new HTTPDirectory(_d.get(), "");
+    _d->parseHashCache();
 }
 
 HTTPRepository::~HTTPRepository()
@@ -554,14 +658,38 @@ SGPath HTTPRepository::fsBase() const
 
 void HTTPRepository::update()
 {
-    if (_d->isUpdating) {
+    _d->rootDir->markSubtreeAsNeedingUpdate();
+    _d->updateWaiting();
+}
+
+void HTTPRepository::setEntireRepositoryMode()
+{
+    if (!_d->updateEverything) {
+        // this is a one-way decision
+        _d->updateEverything = true;
+    }
+
+    // probably overkill but not expensive so let's check everything
+    // we have in case someone did something funky and switched from partial
+    // to 'whole repo' updating.
+    _d->rootDir->markSubtreeAsEnabled();
+}
+
+
+void HTTPRepository::addSubpath(const std::string& relPath)
+{
+    if (_d->updateEverything) {
+        SG_LOG(SG_TERRASYNC, SG_WARN, "called HTTPRepository::addSubpath but updating everything");
         return;
     }
 
-    _d->status = REPO_NO_ERROR;
-    _d->isUpdating = true;
-    _d->failures.clear();
-    _d->updateDir(_d->rootDir, std::string());
+    _d->updatePaths.push_back(relPath);
+
+    HTTPDirectory* dir = _d->getOrCreateDirectory(relPath);
+    dir->markSubtreeAsEnabled();
+    dir->markAncestorChainAsEnabled();
+
+    _d->updateWaiting();
 }
 
 bool HTTPRepository::isDoingSync() const
@@ -573,7 +701,35 @@ bool HTTPRepository::isDoingSync() const
     return _d->isUpdating;
 }
 
-AbstractRepository::ResultCode
+size_t HTTPRepository::bytesToDownload() const
+{
+    size_t result = 0;
+
+    HTTPRepoPrivate::RequestVector::const_iterator r;
+    for (r = _d->queuedRequests.begin(); r != _d->queuedRequests.end(); ++r) {
+        result += (*r)->contentSize();
+    }
+
+    for (r = _d->activeRequests.begin(); r != _d->activeRequests.end(); ++r) {
+        result += (*r)->contentSize() - (*r)->responseBytesReceived();
+    }
+
+    return result;
+}
+
+size_t HTTPRepository::bytesDownloaded() const
+{
+    size_t result = _d->totalDownloaded;
+
+    HTTPRepoPrivate::RequestVector::const_iterator r;
+    for (r = _d->activeRequests.begin(); r != _d->activeRequests.end(); ++r) {
+        result += (*r)->responseBytesReceived();
+    }
+
+    return result;
+}
+
+HTTPRepository::ResultCode
 HTTPRepository::failure() const
 {
     if ((_d->status == REPO_NO_ERROR) && !_d->failures.empty()) {
@@ -592,17 +748,16 @@ HTTPRepository::failure() const
         {
             pathInRepo = _directory->absolutePath();
             pathInRepo.append(fileName);
-            //SG_LOG(SG_TERRASYNC, SG_INFO, "will GET file " << url());
         }
 
     protected:
         virtual void gotBodyData(const char* s, int n)
         {
             if (!file.get()) {
-                file.reset(new SGFile(pathInRepo.str()));
+                file.reset(new SGBinaryFile(pathInRepo.str()));
                 if (!file->open(SG_IO_OUT)) {
                   SG_LOG(SG_TERRASYNC, SG_WARN, "unable to create file " << pathInRepo);
-                  abort("Unable to create output file");
+                  _directory->repository()->http->cancelRequest(this, "Unable to create output file");
                 }
 
                 sha1_init(&hashContext);
@@ -615,14 +770,17 @@ HTTPRepository::failure() const
         virtual void onDone()
         {
             file->close();
+
             if (responseCode() == 200) {
                 std::string hash = strutils::encodeHex(sha1_result(&hashContext), HASH_LENGTH);
-                _directory->didUpdateFile(fileName, hash);
-                //SG_LOG(SG_TERRASYNC, SG_INFO, "got file " << fileName << " in " << _directory->absolutePath());
+                _directory->didUpdateFile(fileName, hash, contentSize());
+                SG_LOG(SG_TERRASYNC, SG_DEBUG, "got file " << fileName << " in " << _directory->absolutePath());
             } else if (responseCode() == 404) {
-                _directory->didFailToUpdateFile(fileName, AbstractRepository::REPO_ERROR_FILE_NOT_FOUND);
+                SG_LOG(SG_TERRASYNC, SG_WARN, "terrasync file not found on server: " << fileName << " for " << _directory->absolutePath());
+                _directory->didFailToUpdateFile(fileName, HTTPRepository::REPO_ERROR_FILE_NOT_FOUND);
             } else {
-                _directory->didFailToUpdateFile(fileName, AbstractRepository::REPO_ERROR_HTTP);
+                SG_LOG(SG_TERRASYNC, SG_WARN, "terrasync file download error on server: " << fileName << " for " << _directory->absolutePath() << ": " << responseCode() );
+                _directory->didFailToUpdateFile(fileName, HTTPRepository::REPO_ERROR_HTTP);
             }
 
             _directory->repository()->finishedRequest(this);
@@ -631,9 +789,12 @@ HTTPRepository::failure() const
         virtual void onFail()
         {
             file.reset();
-            pathInRepo.remove();
+            if (pathInRepo.exists()) {
+                pathInRepo.remove();
+            }
+
             if (_directory) {
-                _directory->didFailToUpdateFile(fileName, AbstractRepository::REPO_ERROR_SOCKET);
+                _directory->didFailToUpdateFile(fileName, HTTPRepository::REPO_ERROR_SOCKET);
                 _directory->repository()->finishedRequest(this);
             }
         }
@@ -646,7 +807,7 @@ HTTPRepository::failure() const
         std::string fileName; // if empty, we're getting the directory itself
         SGPath pathInRepo;
         simgear::sha1nfo hashContext;
-        std::auto_ptr<SGFile> file;
+        std::auto_ptr<SGBinaryFile> file;
     };
 
     class DirGetRequest : public HTTPRepoGetRequest
@@ -658,7 +819,6 @@ HTTPRepository::failure() const
             _targetHash(targetHash)
         {
             sha1_init(&hashContext);
-           //SG_LOG(SG_TERRASYNC, SG_INFO, "will GET dir " << url());
         }
 
         void setIsRootDir()
@@ -683,7 +843,7 @@ HTTPRepository::failure() const
             if (responseCode() == 200) {
                 std::string hash = strutils::encodeHex(sha1_result(&hashContext), HASH_LENGTH);
                 if (!_targetHash.empty() && (hash != _targetHash)) {
-                    _directory->failedToUpdate(AbstractRepository::REPO_ERROR_CHECKSUM);
+                    _directory->failedToUpdate(HTTPRepository::REPO_ERROR_CHECKSUM);
                     _directory->repository()->finishedRequest(this);
                     return;
                 }
@@ -707,21 +867,26 @@ HTTPRepository::failure() const
                     of.write(body.data(), body.size());
                     of.close();
                     _directory->dirIndexUpdated(hash);
-
-                    //SG_LOG(SG_TERRASYNC, SG_INFO, "updated dir index " << _directory->absolutePath());
+                } else {
+                    _directory->markAsUpToDate();
                 }
 
+                _directory->repository()->totalDownloaded += contentSize();
+
                 try {
                     // either way we've confirmed the index is valid so update
                     // children now
+                    SGTimeStamp st;
+                    st.stamp();
                     _directory->updateChildrenBasedOnHash();
-                } catch (sg_exception& e) {
-                    _directory->failedToUpdate(AbstractRepository::REPO_ERROR_IO);
+                    SG_LOG(SG_TERRASYNC, SG_INFO, "after update of:" << _directory->absolutePath() << " child update took:" << st.elapsedMSec());
+                } catch (sg_exception& ) {
+                    _directory->failedToUpdate(HTTPRepository::REPO_ERROR_IO);
                 }
             } else if (responseCode() == 404) {
-                _directory->failedToUpdate(AbstractRepository::REPO_ERROR_FILE_NOT_FOUND);
+                _directory->failedToUpdate(HTTPRepository::REPO_ERROR_FILE_NOT_FOUND);
             } else {
-                _directory->failedToUpdate(AbstractRepository::REPO_ERROR_HTTP);
+                _directory->failedToUpdate(HTTPRepository::REPO_ERROR_HTTP);
             }
 
             _directory->repository()->finishedRequest(this);
@@ -730,7 +895,7 @@ HTTPRepository::failure() const
         virtual void onFail()
         {
             if (_directory) {
-                _directory->failedToUpdate(AbstractRepository::REPO_ERROR_SOCKET);
+                _directory->failedToUpdate(HTTPRepository::REPO_ERROR_SOCKET);
                 _directory->repository()->finishedRequest(this);
             }
         }
@@ -755,30 +920,34 @@ HTTPRepository::failure() const
 
     HTTPRepoPrivate::~HTTPRepoPrivate()
     {
+        // take a copy since cancelRequest will fail and hence remove
+        // remove activeRequests, invalidating any iterator to it.
+        RequestVector copyOfActive(activeRequests);
+        RequestVector::iterator rq;
+        for (rq = copyOfActive.begin(); rq != copyOfActive.end(); ++rq) {
+            http->cancelRequest(*rq, "Repository object deleted");
+        }
+
         DirectoryVector::iterator it;
         for (it=directories.begin(); it != directories.end(); ++it) {
             delete *it;
         }
-
-        RequestVector::iterator r;
-        for (r=requests.begin(); r != requests.end(); ++r) {
-            (*r)->cancel();
-        }
     }
 
-    HTTP::Request_ptr HTTPRepoPrivate::updateFile(HTTPDirectory* dir, const std::string& name)
+    HTTP::Request_ptr HTTPRepoPrivate::updateFile(HTTPDirectory* dir, const std::string& name, size_t sz)
     {
         RepoRequestPtr r(new FileGetRequest(dir, name));
-        requests.push_back(r);
-        http->makeRequest(r);
+        r->setContentSize(sz);
+        makeRequest(r);
         return r;
     }
 
-    HTTP::Request_ptr HTTPRepoPrivate::updateDir(HTTPDirectory* dir, const std::string& hash)
+    HTTP::Request_ptr HTTPRepoPrivate::updateDir(HTTPDirectory* dir, const std::string& hash, size_t sz)
     {
+        dir->markAsUpdating();
         RepoRequestPtr r(new DirGetRequest(dir, hash));
-        requests.push_back(r);
-        http->makeRequest(r);
+        r->setContentSize(sz);
+        makeRequest(r);
         return r;
     }
 
@@ -820,7 +989,7 @@ HTTPRepository::failure() const
         sha1_init(&info);
         char* buf = static_cast<char*>(malloc(1024 * 1024));
         size_t readLen;
-        SGFile f(p.str());
+        SGBinaryFile f(p.str());
         if (!f.open(SG_IO_IN)) {
             throw sg_io_exception("Couldn't open file for compute hash", p);
         }
@@ -840,6 +1009,7 @@ HTTPRepository::failure() const
         HashCache::iterator it = std::find_if(hashes.begin(), hashes.end(), HashEntryWithPath(p.str()));
         if (it != hashes.end()) {
             hashes.erase(it);
+            hashCacheDirty = true;
         }
 
         if (newHash.empty()) {
@@ -858,11 +1028,15 @@ HTTPRepository::failure() const
         entry.lengthBytes = p2.sizeInBytes();
         hashes.push_back(entry);
 
-        writeHashCache();
+        hashCacheDirty = true;
     }
 
     void HTTPRepoPrivate::writeHashCache()
     {
+        if (!hashCacheDirty) {
+            return;
+        }
+
         SGPath cachePath = basePath;
         cachePath.append(".hashes");
 
@@ -873,6 +1047,7 @@ HTTPRepository::failure() const
             << it->lengthBytes << ":" << it->hashHex << "\n";
         }
         stream.close();
+        hashCacheDirty = false;
     }
 
     void HTTPRepoPrivate::parseHashCache()
@@ -885,25 +1060,34 @@ HTTPRepository::failure() const
         }
 
         std::ifstream stream(cachePath.c_str(), std::ios::in);
-        char buf[2048];
-        char* lastToken;
 
         while (!stream.eof()) {
-            stream.getline(buf, 2048);
-            lastToken = 0;
-            char* nameData = ::strtok_r(buf, ":", &lastToken);
-            char* timeData = ::strtok_r(NULL, ":", &lastToken);
-            char* sizeData = ::strtok_r(NULL, ":", &lastToken);
-            char* hashData = ::strtok_r(NULL, ":", &lastToken);
-            if (!nameData || !timeData || !sizeData || !hashData) {
+            std::string line;
+            std::getline(stream,line);
+            line = simgear::strutils::strip(line);
+            if( line.empty() || line[0] == '#' )
+                continue;
+
+            string_list tokens = simgear::strutils::split( line, ":" );
+            if( tokens.size() < 4 ) {
+                SG_LOG(SG_TERRASYNC, SG_WARN, "invalid entry in '" << cachePath.str() << "': '" << line << "' (ignoring line)");
+                continue;
+            }
+            const std::string nameData = simgear::strutils::strip(tokens[0]);
+            const std::string timeData = simgear::strutils::strip(tokens[1]);
+            const std::string sizeData = simgear::strutils::strip(tokens[2]);
+            const std::string hashData = simgear::strutils::strip(tokens[3]);
+
+            if (nameData.empty() || timeData.empty() || sizeData.empty() || hashData.empty() ) {
+                SG_LOG(SG_TERRASYNC, SG_WARN, "invalid entry in '" << cachePath.str() << "': '" << line << "' (ignoring line)");
                 continue;
             }
 
             HashCacheEntry entry;
             entry.filePath = nameData;
             entry.hashHex = hashData;
-            entry.modTime = strtol(timeData, NULL, 10);
-            entry.lengthBytes = strtol(sizeData, NULL, 10);
+            entry.modTime = strtol(timeData.c_str(), NULL, 10);
+            entry.lengthBytes = strtol(sizeData.c_str(), NULL, 10);
             hashes.push_back(entry);
         }
     }
@@ -928,6 +1112,25 @@ HTTPRepository::failure() const
 
         HTTPDirectory* d = new HTTPDirectory(this, path);
         directories.push_back(d);
+        if (updateEverything) {
+            d->markAsEnabled();
+        } else {
+            string_list::const_iterator s;
+            bool shouldUpdate = false;
+
+            for (s = updatePaths.begin(); s != updatePaths.end(); ++s) {
+                size_t minLen = std::min(path.size(), s->size());
+                if (s->compare(0, minLen, path, 0, minLen) == 0) {
+                    shouldUpdate = true;
+                    break;
+                }
+            } // of paths iteration
+
+            if (shouldUpdate) {
+                d->markAsEnabled();
+            }
+        }
+
         return d;
     }
 
@@ -943,7 +1146,7 @@ HTTPRepository::failure() const
             delete d;
 
             // update the hash cache too
-            updatedFileContents(path, std::string());
+            updatedFileContents(d->absolutePath(), std::string());
 
             return result;
         }
@@ -951,27 +1154,69 @@ HTTPRepository::failure() const
         return false;
     }
 
+    void HTTPRepoPrivate::makeRequest(RepoRequestPtr req)
+    {
+        if (activeRequests.size() > 4) {
+            queuedRequests.push_back(req);
+        } else {
+            activeRequests.push_back(req);
+            http->makeRequest(req);
+        }
+    }
+
     void HTTPRepoPrivate::finishedRequest(const RepoRequestPtr& req)
     {
-        RequestVector::iterator it = std::find(requests.begin(), requests.end(), req);
-        if (it == requests.end()) {
-            throw sg_exception("lost request somehow", req->url());
+        RequestVector::iterator it = std::find(activeRequests.begin(), activeRequests.end(), req);
+        // in some cases, for example a checksum failure, we clear the active
+        // and queued request vectors, so the ::find above can fail
+        if (it != activeRequests.end()) {
+            activeRequests.erase(it);
+        }
+
+        if (!queuedRequests.empty()) {
+            RepoRequestPtr rr = queuedRequests.front();
+            queuedRequests.erase(queuedRequests.begin());
+            activeRequests.push_back(rr);
+            http->makeRequest(rr);
         }
-        requests.erase(it);
-        if (requests.empty()) {
+
+        writeHashCache();
+
+        if (activeRequests.empty() && queuedRequests.empty()) {
             isUpdating = false;
         }
     }
 
-    void HTTPRepoPrivate::failedToGetRootIndex(AbstractRepository::ResultCode st)
+    void HTTPRepoPrivate::failedToGetRootIndex(HTTPRepository::ResultCode st)
     {
         SG_LOG(SG_TERRASYNC, SG_WARN, "Failed to get root of repo:" << baseUrl);
         status = st;
     }
 
     void HTTPRepoPrivate::failedToUpdateChild(const SGPath& relativePath,
-                                              AbstractRepository::ResultCode fileStatus)
+                                              HTTPRepository::ResultCode fileStatus)
     {
+        if (fileStatus == HTTPRepository::REPO_ERROR_CHECKSUM) {
+            // stop updating, and mark repository as failed, becuase this
+            // usually indicates we need to start a fresh update from the
+            // root.
+            // (we could issue a retry here, but we leave that to higher layers)
+            status = fileStatus;
+
+            queuedRequests.clear();
+
+            RequestVector copyOfActive(activeRequests);
+            RequestVector::iterator rq;
+            for (rq = copyOfActive.begin(); rq != copyOfActive.end(); ++rq) {
+                //SG_LOG(SG_TERRASYNC, SG_DEBUG, "cancelling request for:" << (*rq)->url());
+                http->cancelRequest(*rq, "Repository updated failed");
+            }
+
+
+            SG_LOG(SG_TERRASYNC, SG_WARN, "failed to update repository:" << baseUrl
+                   << ", possibly modified during sync");
+        }
+
         Failure f;
         f.path = relativePath;
         f.error = fileStatus;
@@ -980,6 +1225,22 @@ HTTPRepository::failure() const
         SG_LOG(SG_TERRASYNC, SG_WARN, "failed to update entry:" << relativePath << " code:" << fileStatus);
     }
 
+    void HTTPRepoPrivate::updateWaiting()
+    {
+        if (!isUpdating) {
+            status = HTTPRepository::REPO_NO_ERROR;
+            isUpdating = true;
+            failures.clear();
+        }
+
+        // find to-be-updated sub-trees and kick them off
+        rootDir->updateIfWaiting(std::string(), 0);
 
+        // maybe there was nothing to do
+        if (activeRequests.empty()) {
+            status = HTTPRepository::REPO_NO_ERROR;
+            isUpdating = false;
+        }
+    }
 
 } // of namespace simgear