]> git.mxchange.org Git - simgear.git/blobdiff - simgear/io/HTTPRepository.cxx
HTTP: Always use absolute paths for hashes
[simgear.git] / simgear / io / HTTPRepository.cxx
index fe9b1349a5b5e2ce4dcec5f56182e0ec7a8a5e22..05635dac4c62ac15c5710d49aebd53c408fa4f4c 100644 (file)
@@ -21,7 +21,6 @@
 #include <simgear_config.h>
 
 #include <iostream>
-#include <cstring>
 #include <cassert>
 #include <algorithm>
 #include <sstream>
 
 #include <simgear/misc/sg_hash.hxx>
 
-#if defined(SG_WINDOWS)
-
-/*
- * public domain strtok_r() by Charlie Gordon
- *
- *   from comp.lang.c  9/14/2007
- *
- *      http://groups.google.com/group/comp.lang.c/msg/2ab1ecbb86646684
- *
- *     (Declaration that it's public domain):
- *      http://groups.google.com/group/comp.lang.c/msg/7c7b39328fefab9c
- */
-
-char* strtok_r(
-    char *str,
-    const char *delim,
-    char **nextp)
-{
-    char *ret;
-
-    if (str == NULL)
-    {
-        str = *nextp;
-    }
-
-    str += strspn(str, delim);
-
-    if (*str == '\0')
-    {
-        return NULL;
-    }
-
-    ret = str;
-
-    str += strcspn(str, delim);
-
-    if (*str)
-    {
-        *str++ = '\0';
-    }
-
-    *nextp = str;
-
-    return ret;
-}
-#endif
-
 namespace simgear
 {
 
@@ -105,8 +57,6 @@ namespace simgear
         {
         }
 
-        virtual void cancel();
-
         size_t contentSize() const
         {
             return _contentSize;
@@ -142,7 +92,7 @@ public:
     struct Failure
     {
         SGPath path;
-        AbstractRepository::ResultCode error;
+        HTTPRepository::ResultCode error;
     };
 
     typedef std::vector<Failure> FailureList;
@@ -152,7 +102,8 @@ public:
         hashCacheDirty(false),
         p(parent),
         isUpdating(false),
-        status(AbstractRepository::REPO_NO_ERROR),
+        updateEverything(false),
+        status(HTTPRepository::REPO_NO_ERROR),
         totalDownloaded(0)
     { ; }
 
@@ -163,10 +114,14 @@ public:
     std::string baseUrl;
     SGPath basePath;
     bool isUpdating;
-    AbstractRepository::ResultCode status;
+    bool updateEverything;
+    string_list updatePaths;
+    HTTPRepository::ResultCode status;
     HTTPDirectory* rootDir;
     size_t totalDownloaded;
 
+    void updateWaiting();
+
     HTTP::Request_ptr updateFile(HTTPDirectory* dir, const std::string& name,
                                  size_t sz);
     HTTP::Request_ptr updateDir(HTTPDirectory* dir, const std::string& hash,
@@ -178,9 +133,9 @@ public:
     std::string computeHashForPath(const SGPath& p);
     void writeHashCache();
 
-    void failedToGetRootIndex(AbstractRepository::ResultCode st);
+    void failedToGetRootIndex(HTTPRepository::ResultCode st);
     void failedToUpdateChild(const SGPath& relativePath,
-                             AbstractRepository::ResultCode fileStatus);
+                             HTTPRepository::ResultCode fileStatus);
 
     typedef std::vector<RepoRequestPtr> RequestVector;
     RequestVector queuedRequests,
@@ -207,10 +162,10 @@ class HTTPDirectory
             DirectoryType
         };
 
-        ChildInfo(Type ty, const char* nameData, const char* hashData) :
+        ChildInfo(Type ty, const std::string & nameData, const std::string & hashData) :
             type(ty),
             name(nameData),
-            hash(hashData ? hashData : ""),
+            hash(hashData),
             sizeInBytes(0)
         {
         }
@@ -222,9 +177,9 @@ class HTTPDirectory
             sizeInBytes(other.sizeInBytes)
         { }
 
-        void setSize(const char* sizeData)
+        void setSize(const std::string & sizeData)
         {
-            sizeInBytes = ::strtol(sizeData, NULL, 10);
+            sizeInBytes = ::strtol(sizeData.c_str(), NULL, 10);
         }
 
         bool operator<(const ChildInfo& other) const
@@ -240,10 +195,12 @@ class HTTPDirectory
     typedef std::vector<ChildInfo> ChildInfoList;
     ChildInfoList children;
 
+
 public:
     HTTPDirectory(HTTPRepoPrivate* repo, const std::string& path) :
         _repository(repo),
-        _relativePath(path)
+        _relativePath(path),
+        _state(DoNotUpdate)
   {
       assert(repo);
 
@@ -253,7 +210,7 @@ public:
               // already exists on disk
               parseDirIndex(children);
               std::sort(children.begin(), children.end());
-          } catch (sg_exception& e) {
+          } catch (sg_exception& ) {
               // parsing cache failed
               children.clear();
           }
@@ -276,17 +233,20 @@ public:
 
     void dirIndexUpdated(const std::string& hash)
     {
-        SGPath fpath(_relativePath);
+        SGPath fpath(absolutePath());
         fpath.append(".dirindex");
         _repository->updatedFileContents(fpath, hash);
 
+        _state = Updated;
+
         children.clear();
         parseDirIndex(children);
         std::sort(children.begin(), children.end());
     }
 
-    void failedToUpdate(AbstractRepository::ResultCode status)
+    void failedToUpdate(HTTPRepository::ResultCode status)
     {
+        _state = UpdateFailed;
         if (_relativePath.isNull()) {
             // root dir failed
             _repository->failedToGetRootIndex(status);
@@ -297,7 +257,11 @@ public:
 
     void updateChildrenBasedOnHash()
     {
-        //SG_LOG(SG_TERRASYNC, SG_DEBUG, "updated children for:" << relativePath());
+        // if we got here for a dir which is still updating or excluded
+        // from updates, just bail out right now.
+        if (_state != Updated) {
+            return;
+        }
 
         string_list indexNames = indexChildren(),
             toBeUpdated, orphans;
@@ -305,39 +269,44 @@ public:
         PathList fsChildren = d.children(0);
         PathList::const_iterator it = fsChildren.begin();
 
+
         for (; it != fsChildren.end(); ++it) {
             ChildInfo info(it->isDir() ? ChildInfo::DirectoryType : ChildInfo::FileType,
-                           it->file().c_str(), NULL);
+                           it->file(), "");
             std::string hash = hashForChild(info);
 
             ChildInfoList::iterator c = findIndexChild(it->file());
             if (c == children.end()) {
+                SG_LOG(SG_TERRASYNC, SG_DEBUG, "is orphan '" << it->file() << "'" );
                 orphans.push_back(it->file());
             } else if (c->hash != hash) {
+                SG_LOG(SG_TERRASYNC, SG_DEBUG, "hash mismatch'" << it->file() );
                 // file exists, but hash mismatch, schedule update
                 if (!hash.empty()) {
-                    //SG_LOG(SG_TERRASYNC, SG_INFO, "file exists but hash is wrong for:" << c->name);
-                    //SG_LOG(SG_TERRASYNC, SG_INFO, "on disk:" << hash << " vs in info:" << c->hash);
+                    SG_LOG(SG_TERRASYNC, SG_DEBUG, "file exists but hash is wrong for:" << it->file() );
+                    SG_LOG(SG_TERRASYNC, SG_DEBUG, "on disk:" << hash << " vs in info:" << c->hash);
                 }
 
-                toBeUpdated.push_back(c->name);
+                toBeUpdated.push_back(it->file() );
             } else {
                 // file exists and hash is valid. If it's a directory,
                 // perform a recursive check.
+                SG_LOG(SG_TERRASYNC, SG_DEBUG, "file exists hash is good:" << it->file() );
                 if (c->type == ChildInfo::DirectoryType) {
                     SGPath p(relativePath());
-                    p.append(c->name);
+                    p.append(it->file());
                     HTTPDirectory* childDir = _repository->getOrCreateDirectory(p.str());
+                    if (childDir->_state == NotUpdated) {
+                        childDir->_state = Updated;
+                    }
                     childDir->updateChildrenBasedOnHash();
                 }
             }
 
             // remove existing file system children from the index list,
             // so we can detect new children
-            string_list::iterator it = std::find(indexNames.begin(), indexNames.end(), c->name);
-            if (it != indexNames.end()) {
-                indexNames.erase(it);
-            }
+            // https://en.wikibooks.org/wiki/More_C%2B%2B_Idioms/Erase-Remove
+            indexNames.erase(std::remove(indexNames.begin(), indexNames.end(), it->file()), indexNames.end());
         } // of real children iteration
 
         // all remaining names in indexChilden are new children
@@ -347,6 +316,101 @@ public:
         scheduleUpdates(toBeUpdated);
     }
 
+    void markAsUpToDate()
+    {
+        _state = Updated;
+    }
+
+    void markAsUpdating()
+    {
+        assert(_state == NotUpdated);
+        _state = HTTPDirectory::UpdateInProgress;
+    }
+
+    void markAsEnabled()
+    {
+        // assert because this should only get invoked on newly created
+        // directory objects which are inside the sub-tree(s) to be updated
+        assert(_state == DoNotUpdate);
+        _state = NotUpdated;
+    }
+
+    void markSubtreeAsNeedingUpdate()
+    {
+        if (_state == Updated) {
+            _state = NotUpdated; // reset back to not-updated
+        }
+
+        ChildInfoList::iterator cit;
+        for (cit = children.begin(); cit != children.end(); ++cit) {
+            if (cit->type == ChildInfo::DirectoryType) {
+                SGPath p(relativePath());
+                p.append(cit->name);
+                HTTPDirectory* childDir = _repository->getOrCreateDirectory(p.str());
+                childDir->markSubtreeAsNeedingUpdate();
+            }
+        } // of child iteration
+    }
+
+    void markSubtreeAsEnabled()
+    {
+        if (_state == DoNotUpdate) {
+            markAsEnabled();
+        }
+
+        ChildInfoList::iterator cit;
+        for (cit = children.begin(); cit != children.end(); ++cit) {
+            if (cit->type == ChildInfo::DirectoryType) {
+                SGPath p(relativePath());
+                p.append(cit->name);
+                HTTPDirectory* childDir = _repository->getOrCreateDirectory(p.str());
+                childDir->markSubtreeAsEnabled();
+            }
+        } // of child iteration
+    }
+
+
+    void markAncestorChainAsEnabled()
+    {
+        if (_state == DoNotUpdate) {
+            markAsEnabled();
+        }
+
+        if (_relativePath.isNull()) {
+            return;
+        }
+
+        std::string prPath = _relativePath.dir();
+        if (prPath.empty()) {
+            _repository->rootDir->markAncestorChainAsEnabled();
+        } else {
+            HTTPDirectory* prDir = _repository->getOrCreateDirectory(prPath);
+            prDir->markAncestorChainAsEnabled();
+        }
+    }
+
+    void updateIfWaiting(const std::string& hash, size_t sz)
+    {
+        if (_state == NotUpdated) {
+            _repository->updateDir(this, hash, sz);
+            return;
+        }
+
+        if ((_state == DoNotUpdate) || (_state == UpdateInProgress)) {
+            return;
+        }
+
+        ChildInfoList::iterator cit;
+        for (cit = children.begin(); cit != children.end(); ++cit) {
+            if (cit->type == ChildInfo::DirectoryType) {
+                SGPath p(relativePath());
+                p.append(cit->name);
+                HTTPDirectory* childDir = _repository->getOrCreateDirectory(p.str());
+                childDir->updateIfWaiting(cit->hash, cit->sizeInBytes);
+            }
+        } // of child iteration
+    }
+
     void removeOrphans(const string_list& orphans)
     {
         string_list::const_iterator it;
@@ -376,12 +440,18 @@ public:
                 continue;
             }
 
+            SG_LOG(SG_TERRASYNC,SG_DEBUG, "scheduling update for " << *it );
             if (cit->type == ChildInfo::FileType) {
                 _repository->updateFile(this, *it, cit->sizeInBytes);
             } else {
                 SGPath p(relativePath());
                 p.append(*it);
                 HTTPDirectory* childDir = _repository->getOrCreateDirectory(p.str());
+                if (childDir->_state == DoNotUpdate) {
+                    SG_LOG(SG_TERRASYNC, SG_WARN, "scheduleUpdate, child:" << *it << " is marked do not update so skipping");
+                    continue;
+                }
+
                 _repository->updateDir(childDir, cit->hash, cit->sizeInBytes);
             }
         }
@@ -406,21 +476,22 @@ public:
         if (it == children.end()) {
             SG_LOG(SG_TERRASYNC, SG_WARN, "updated file but not found in dir:" << _relativePath << " " << file);
         } else {
-            SGPath fpath(_relativePath);
+            SGPath fpath(absolutePath());
             fpath.append(file);
 
             if (it->hash != hash) {
-                _repository->failedToUpdateChild(_relativePath, AbstractRepository::REPO_ERROR_CHECKSUM);
+                // we don't erase the file on a hash mismatch, becuase if we're syncing during the
+                // middle of a server-side update, the downloaded file may actually become valid.
+                _repository->failedToUpdateChild(_relativePath, HTTPRepository::REPO_ERROR_CHECKSUM);
             } else {
                 _repository->updatedFileContents(fpath, hash);
                 _repository->totalDownloaded += sz;
-                //SG_LOG(SG_TERRASYNC, SG_INFO, "did update:" << fpath);
             } // of hash matches
         } // of found in child list
     }
 
     void didFailToUpdateFile(const std::string& file,
-                             AbstractRepository::ResultCode status)
+                             HTTPRepository::ResultCode status)
     {
         SGPath fpath(_relativePath);
         fpath.append(file);
@@ -456,42 +527,48 @@ private:
             throw sg_io_exception("cannot open dirIndex file", p);
         }
 
-        char lineBuffer[512];
-        char* lastToken;
-
         while (!indexStream.eof() ) {
-            indexStream.getline(lineBuffer, 512);
-            lastToken = 0;
-            char* typeData = ::strtok_r(lineBuffer, ":", &lastToken);
-            if (!typeData) {
-                continue; // skip blank line
+            std::string line;
+            std::getline( indexStream, line );
+            line = simgear::strutils::strip(line);
+
+            // skip blank line or comment beginning with '#'
+            if( line.empty() || line[0] == '#' )
+                continue;
+
+            string_list tokens = simgear::strutils::split( line, ":" );
+
+            std::string typeData = tokens[0];
+
+            if( typeData == "version" ) {
+                if( tokens.size() < 2 ) {
+                    SG_LOG(SG_TERRASYNC, SG_WARN, "malformed .dirindex file: missing version number in line '" << line << "'" );
+                    break;
+                }
+                if( tokens[1] != "1" ) {
+                    SG_LOG(SG_TERRASYNC, SG_WARN, "invalid .dirindex file: wrong version number '" << tokens[1] << "' (expected 1)" );
+                    break;
+                }
+                continue; // version is good, continue
             }
 
-            if (!typeData) {
-                // malformed entry
-                throw sg_io_exception("Malformed dir index file", p);
+            if( typeData == "path" ) {
+                continue; // ignore path, next line
             }
 
-            if (!strcmp(typeData, "version")) {
-                continue;
-            } else if (!strcmp(typeData, "path")) {
+            if( tokens.size() < 3 ) {
+                SG_LOG(SG_TERRASYNC, SG_WARN, "malformed .dirindex file: not enough tokens in line '" << line << "' (ignoring line)" );
                 continue;
             }
 
-            char* nameData = ::strtok_r(NULL, ":", &lastToken);
-            char* hashData = ::strtok_r(NULL, ":", &lastToken);
-            char* sizeData = ::strtok_r(NULL, ":", &lastToken);
-
-            if (typeData[0] == 'f') {
-                children.push_back(ChildInfo(ChildInfo::FileType, nameData, hashData));
-            } else if (typeData[0] == 'd') {
-                children.push_back(ChildInfo(ChildInfo::DirectoryType, nameData, hashData));
-            } else {
-                throw sg_io_exception("Malformed line code in dir index file", p);
+            if (typeData != "f" && typeData != "d" ) {
+                SG_LOG(SG_TERRASYNC, SG_WARN, "malformed .dirindex file: invalid type in line '" << line << "', expected 'd' or 'f', (ignoring line)" );
+                continue;
             }
+            children.push_back(ChildInfo(typeData == "f" ? ChildInfo::FileType : ChildInfo::DirectoryType, tokens[1], tokens[2]));
 
-            if (sizeData) {
-                children.back().setSize(sizeData);
+            if (tokens.size() > 3) {
+                children.back().setSize(tokens[3]);
             }
         }
 
@@ -511,7 +588,7 @@ private:
             ok = _repository->deleteDirectory(fpath.str());
         } else {
             // remove the hash cache entry
-            _repository->updatedFileContents(fpath, std::string());
+            _repository->updatedFileContents(p, std::string());
             ok = p.remove();
         }
 
@@ -534,7 +611,16 @@ private:
   HTTPRepoPrivate* _repository;
   SGPath _relativePath; // in URL and file-system space
 
-
+    typedef enum
+    {
+        NotUpdated,
+        UpdateInProgress,
+        Updated,
+        UpdateFailed,
+        DoNotUpdate
+    } State;
+
+    State _state;
 };
 
 HTTPRepository::HTTPRepository(const SGPath& base, HTTP::Client *cl) :
@@ -572,14 +658,38 @@ SGPath HTTPRepository::fsBase() const
 
 void HTTPRepository::update()
 {
-    if (_d->isUpdating) {
+    _d->rootDir->markSubtreeAsNeedingUpdate();
+    _d->updateWaiting();
+}
+
+void HTTPRepository::setEntireRepositoryMode()
+{
+    if (!_d->updateEverything) {
+        // this is a one-way decision
+        _d->updateEverything = true;
+    }
+
+    // probably overkill but not expensive so let's check everything
+    // we have in case someone did something funky and switched from partial
+    // to 'whole repo' updating.
+    _d->rootDir->markSubtreeAsEnabled();
+}
+
+
+void HTTPRepository::addSubpath(const std::string& relPath)
+{
+    if (_d->updateEverything) {
+        SG_LOG(SG_TERRASYNC, SG_WARN, "called HTTPRepository::addSubpath but updating everything");
         return;
     }
 
-    _d->status = REPO_NO_ERROR;
-    _d->isUpdating = true;
-    _d->failures.clear();
-    _d->updateDir(_d->rootDir, std::string(), 0);
+    _d->updatePaths.push_back(relPath);
+
+    HTTPDirectory* dir = _d->getOrCreateDirectory(relPath);
+    dir->markSubtreeAsEnabled();
+    dir->markAncestorChainAsEnabled();
+
+    _d->updateWaiting();
 }
 
 bool HTTPRepository::isDoingSync() const
@@ -619,7 +729,7 @@ size_t HTTPRepository::bytesDownloaded() const
     return result;
 }
 
-AbstractRepository::ResultCode
+HTTPRepository::ResultCode
 HTTPRepository::failure() const
 {
     if ((_d->status == REPO_NO_ERROR) && !_d->failures.empty()) {
@@ -629,12 +739,6 @@ HTTPRepository::failure() const
     return _d->status;
 }
 
-    void HTTPRepoGetRequest::cancel()
-    {
-        _directory->repository()->http->cancelRequest(this, "Reposiotry cancelled");
-        _directory = 0;
-    }
-
     class FileGetRequest : public HTTPRepoGetRequest
     {
     public:
@@ -644,14 +748,13 @@ HTTPRepository::failure() const
         {
             pathInRepo = _directory->absolutePath();
             pathInRepo.append(fileName);
-            //SG_LOG(SG_TERRASYNC, SG_INFO, "will GET file " << url());
         }
 
     protected:
         virtual void gotBodyData(const char* s, int n)
         {
             if (!file.get()) {
-                file.reset(new SGFile(pathInRepo.str()));
+                file.reset(new SGBinaryFile(pathInRepo.str()));
                 if (!file->open(SG_IO_OUT)) {
                   SG_LOG(SG_TERRASYNC, SG_WARN, "unable to create file " << pathInRepo);
                   _directory->repository()->http->cancelRequest(this, "Unable to create output file");
@@ -667,14 +770,17 @@ HTTPRepository::failure() const
         virtual void onDone()
         {
             file->close();
+
             if (responseCode() == 200) {
                 std::string hash = strutils::encodeHex(sha1_result(&hashContext), HASH_LENGTH);
                 _directory->didUpdateFile(fileName, hash, contentSize());
-                //SG_LOG(SG_TERRASYNC, SG_INFO, "got file " << fileName << " in " << _directory->absolutePath());
+                SG_LOG(SG_TERRASYNC, SG_DEBUG, "got file " << fileName << " in " << _directory->absolutePath());
             } else if (responseCode() == 404) {
-                _directory->didFailToUpdateFile(fileName, AbstractRepository::REPO_ERROR_FILE_NOT_FOUND);
+                SG_LOG(SG_TERRASYNC, SG_WARN, "terrasync file not found on server: " << fileName << " for " << _directory->absolutePath());
+                _directory->didFailToUpdateFile(fileName, HTTPRepository::REPO_ERROR_FILE_NOT_FOUND);
             } else {
-                _directory->didFailToUpdateFile(fileName, AbstractRepository::REPO_ERROR_HTTP);
+                SG_LOG(SG_TERRASYNC, SG_WARN, "terrasync file download error on server: " << fileName << " for " << _directory->absolutePath() << ": " << responseCode() );
+                _directory->didFailToUpdateFile(fileName, HTTPRepository::REPO_ERROR_HTTP);
             }
 
             _directory->repository()->finishedRequest(this);
@@ -686,9 +792,9 @@ HTTPRepository::failure() const
             if (pathInRepo.exists()) {
                 pathInRepo.remove();
             }
-            
+
             if (_directory) {
-                _directory->didFailToUpdateFile(fileName, AbstractRepository::REPO_ERROR_SOCKET);
+                _directory->didFailToUpdateFile(fileName, HTTPRepository::REPO_ERROR_SOCKET);
                 _directory->repository()->finishedRequest(this);
             }
         }
@@ -701,7 +807,7 @@ HTTPRepository::failure() const
         std::string fileName; // if empty, we're getting the directory itself
         SGPath pathInRepo;
         simgear::sha1nfo hashContext;
-        std::auto_ptr<SGFile> file;
+        std::auto_ptr<SGBinaryFile> file;
     };
 
     class DirGetRequest : public HTTPRepoGetRequest
@@ -713,7 +819,6 @@ HTTPRepository::failure() const
             _targetHash(targetHash)
         {
             sha1_init(&hashContext);
-           //SG_LOG(SG_TERRASYNC, SG_INFO, "will GET dir " << url());
         }
 
         void setIsRootDir()
@@ -738,7 +843,7 @@ HTTPRepository::failure() const
             if (responseCode() == 200) {
                 std::string hash = strutils::encodeHex(sha1_result(&hashContext), HASH_LENGTH);
                 if (!_targetHash.empty() && (hash != _targetHash)) {
-                    _directory->failedToUpdate(AbstractRepository::REPO_ERROR_CHECKSUM);
+                    _directory->failedToUpdate(HTTPRepository::REPO_ERROR_CHECKSUM);
                     _directory->repository()->finishedRequest(this);
                     return;
                 }
@@ -762,8 +867,8 @@ HTTPRepository::failure() const
                     of.write(body.data(), body.size());
                     of.close();
                     _directory->dirIndexUpdated(hash);
-
-                    //SG_LOG(SG_TERRASYNC, SG_INFO, "updated dir index " << _directory->absolutePath());
+                } else {
+                    _directory->markAsUpToDate();
                 }
 
                 _directory->repository()->totalDownloaded += contentSize();
@@ -775,13 +880,13 @@ HTTPRepository::failure() const
                     st.stamp();
                     _directory->updateChildrenBasedOnHash();
                     SG_LOG(SG_TERRASYNC, SG_INFO, "after update of:" << _directory->absolutePath() << " child update took:" << st.elapsedMSec());
-                } catch (sg_exception& e) {
-                    _directory->failedToUpdate(AbstractRepository::REPO_ERROR_IO);
+                } catch (sg_exception& ) {
+                    _directory->failedToUpdate(HTTPRepository::REPO_ERROR_IO);
                 }
             } else if (responseCode() == 404) {
-                _directory->failedToUpdate(AbstractRepository::REPO_ERROR_FILE_NOT_FOUND);
+                _directory->failedToUpdate(HTTPRepository::REPO_ERROR_FILE_NOT_FOUND);
             } else {
-                _directory->failedToUpdate(AbstractRepository::REPO_ERROR_HTTP);
+                _directory->failedToUpdate(HTTPRepository::REPO_ERROR_HTTP);
             }
 
             _directory->repository()->finishedRequest(this);
@@ -790,7 +895,7 @@ HTTPRepository::failure() const
         virtual void onFail()
         {
             if (_directory) {
-                _directory->failedToUpdate(AbstractRepository::REPO_ERROR_SOCKET);
+                _directory->failedToUpdate(HTTPRepository::REPO_ERROR_SOCKET);
                 _directory->repository()->finishedRequest(this);
             }
         }
@@ -815,15 +920,18 @@ HTTPRepository::failure() const
 
     HTTPRepoPrivate::~HTTPRepoPrivate()
     {
+        // take a copy since cancelRequest will fail and hence remove
+        // remove activeRequests, invalidating any iterator to it.
+        RequestVector copyOfActive(activeRequests);
+        RequestVector::iterator rq;
+        for (rq = copyOfActive.begin(); rq != copyOfActive.end(); ++rq) {
+            http->cancelRequest(*rq, "Repository object deleted");
+        }
+
         DirectoryVector::iterator it;
         for (it=directories.begin(); it != directories.end(); ++it) {
             delete *it;
         }
-
-        RequestVector::iterator r;
-        for (r=activeRequests.begin(); r != activeRequests.end(); ++r) {
-            (*r)->cancel();
-        }
     }
 
     HTTP::Request_ptr HTTPRepoPrivate::updateFile(HTTPDirectory* dir, const std::string& name, size_t sz)
@@ -836,6 +944,7 @@ HTTPRepository::failure() const
 
     HTTP::Request_ptr HTTPRepoPrivate::updateDir(HTTPDirectory* dir, const std::string& hash, size_t sz)
     {
+        dir->markAsUpdating();
         RepoRequestPtr r(new DirGetRequest(dir, hash));
         r->setContentSize(sz);
         makeRequest(r);
@@ -880,7 +989,7 @@ HTTPRepository::failure() const
         sha1_init(&info);
         char* buf = static_cast<char*>(malloc(1024 * 1024));
         size_t readLen;
-        SGFile f(p.str());
+        SGBinaryFile f(p.str());
         if (!f.open(SG_IO_IN)) {
             throw sg_io_exception("Couldn't open file for compute hash", p);
         }
@@ -951,25 +1060,34 @@ HTTPRepository::failure() const
         }
 
         std::ifstream stream(cachePath.c_str(), std::ios::in);
-        char buf[2048];
-        char* lastToken;
 
         while (!stream.eof()) {
-            stream.getline(buf, 2048);
-            lastToken = 0;
-            char* nameData = ::strtok_r(buf, ":", &lastToken);
-            char* timeData = ::strtok_r(NULL, ":", &lastToken);
-            char* sizeData = ::strtok_r(NULL, ":", &lastToken);
-            char* hashData = ::strtok_r(NULL, ":", &lastToken);
-            if (!nameData || !timeData || !sizeData || !hashData) {
+            std::string line;
+            std::getline(stream,line);
+            line = simgear::strutils::strip(line);
+            if( line.empty() || line[0] == '#' )
+                continue;
+
+            string_list tokens = simgear::strutils::split( line, ":" );
+            if( tokens.size() < 4 ) {
+                SG_LOG(SG_TERRASYNC, SG_WARN, "invalid entry in '" << cachePath.str() << "': '" << line << "' (ignoring line)");
+                continue;
+            }
+            const std::string nameData = simgear::strutils::strip(tokens[0]);
+            const std::string timeData = simgear::strutils::strip(tokens[1]);
+            const std::string sizeData = simgear::strutils::strip(tokens[2]);
+            const std::string hashData = simgear::strutils::strip(tokens[3]);
+
+            if (nameData.empty() || timeData.empty() || sizeData.empty() || hashData.empty() ) {
+                SG_LOG(SG_TERRASYNC, SG_WARN, "invalid entry in '" << cachePath.str() << "': '" << line << "' (ignoring line)");
                 continue;
             }
 
             HashCacheEntry entry;
             entry.filePath = nameData;
             entry.hashHex = hashData;
-            entry.modTime = strtol(timeData, NULL, 10);
-            entry.lengthBytes = strtol(sizeData, NULL, 10);
+            entry.modTime = strtol(timeData.c_str(), NULL, 10);
+            entry.lengthBytes = strtol(sizeData.c_str(), NULL, 10);
             hashes.push_back(entry);
         }
     }
@@ -994,6 +1112,25 @@ HTTPRepository::failure() const
 
         HTTPDirectory* d = new HTTPDirectory(this, path);
         directories.push_back(d);
+        if (updateEverything) {
+            d->markAsEnabled();
+        } else {
+            string_list::const_iterator s;
+            bool shouldUpdate = false;
+
+            for (s = updatePaths.begin(); s != updatePaths.end(); ++s) {
+                size_t minLen = std::min(path.size(), s->size());
+                if (s->compare(0, minLen, path, 0, minLen) == 0) {
+                    shouldUpdate = true;
+                    break;
+                }
+            } // of paths iteration
+
+            if (shouldUpdate) {
+                d->markAsEnabled();
+            }
+        }
+
         return d;
     }
 
@@ -1009,7 +1146,7 @@ HTTPRepository::failure() const
             delete d;
 
             // update the hash cache too
-            updatedFileContents(path, std::string());
+            updatedFileContents(d->absolutePath(), std::string());
 
             return result;
         }
@@ -1030,10 +1167,11 @@ HTTPRepository::failure() const
     void HTTPRepoPrivate::finishedRequest(const RepoRequestPtr& req)
     {
         RequestVector::iterator it = std::find(activeRequests.begin(), activeRequests.end(), req);
-        if (it == activeRequests.end()) {
-            throw sg_exception("lost request somehow", req->url());
+        // in some cases, for example a checksum failure, we clear the active
+        // and queued request vectors, so the ::find above can fail
+        if (it != activeRequests.end()) {
+            activeRequests.erase(it);
         }
-        activeRequests.erase(it);
 
         if (!queuedRequests.empty()) {
             RepoRequestPtr rr = queuedRequests.front();
@@ -1049,15 +1187,36 @@ HTTPRepository::failure() const
         }
     }
 
-    void HTTPRepoPrivate::failedToGetRootIndex(AbstractRepository::ResultCode st)
+    void HTTPRepoPrivate::failedToGetRootIndex(HTTPRepository::ResultCode st)
     {
         SG_LOG(SG_TERRASYNC, SG_WARN, "Failed to get root of repo:" << baseUrl);
         status = st;
     }
 
     void HTTPRepoPrivate::failedToUpdateChild(const SGPath& relativePath,
-                                              AbstractRepository::ResultCode fileStatus)
+                                              HTTPRepository::ResultCode fileStatus)
     {
+        if (fileStatus == HTTPRepository::REPO_ERROR_CHECKSUM) {
+            // stop updating, and mark repository as failed, becuase this
+            // usually indicates we need to start a fresh update from the
+            // root.
+            // (we could issue a retry here, but we leave that to higher layers)
+            status = fileStatus;
+
+            queuedRequests.clear();
+
+            RequestVector copyOfActive(activeRequests);
+            RequestVector::iterator rq;
+            for (rq = copyOfActive.begin(); rq != copyOfActive.end(); ++rq) {
+                //SG_LOG(SG_TERRASYNC, SG_DEBUG, "cancelling request for:" << (*rq)->url());
+                http->cancelRequest(*rq, "Repository updated failed");
+            }
+
+
+            SG_LOG(SG_TERRASYNC, SG_WARN, "failed to update repository:" << baseUrl
+                   << ", possibly modified during sync");
+        }
+
         Failure f;
         f.path = relativePath;
         f.error = fileStatus;
@@ -1066,6 +1225,22 @@ HTTPRepository::failure() const
         SG_LOG(SG_TERRASYNC, SG_WARN, "failed to update entry:" << relativePath << " code:" << fileStatus);
     }
 
+    void HTTPRepoPrivate::updateWaiting()
+    {
+        if (!isUpdating) {
+            status = HTTPRepository::REPO_NO_ERROR;
+            isUpdating = true;
+            failures.clear();
+        }
+
+        // find to-be-updated sub-trees and kick them off
+        rootDir->updateIfWaiting(std::string(), 0);
 
+        // maybe there was nothing to do
+        if (activeRequests.empty()) {
+            status = HTTPRepository::REPO_NO_ERROR;
+            isUpdating = false;
+        }
+    }
 
 } // of namespace simgear