]> git.mxchange.org Git - simgear.git/blobdiff - simgear/io/HTTPRepository.cxx
HTTP: Always use absolute paths for hashes
[simgear.git] / simgear / io / HTTPRepository.cxx
index e5d8246984cd4275c61b8f15728994bf71089bd4..05635dac4c62ac15c5710d49aebd53c408fa4f4c 100644 (file)
@@ -57,8 +57,6 @@ namespace simgear
         {
         }
 
-        virtual void cancel();
-
         size_t contentSize() const
         {
             return _contentSize;
@@ -94,7 +92,7 @@ public:
     struct Failure
     {
         SGPath path;
-        AbstractRepository::ResultCode error;
+        HTTPRepository::ResultCode error;
     };
 
     typedef std::vector<Failure> FailureList;
@@ -104,7 +102,8 @@ public:
         hashCacheDirty(false),
         p(parent),
         isUpdating(false),
-        status(AbstractRepository::REPO_NO_ERROR),
+        updateEverything(false),
+        status(HTTPRepository::REPO_NO_ERROR),
         totalDownloaded(0)
     { ; }
 
@@ -115,10 +114,14 @@ public:
     std::string baseUrl;
     SGPath basePath;
     bool isUpdating;
-    AbstractRepository::ResultCode status;
+    bool updateEverything;
+    string_list updatePaths;
+    HTTPRepository::ResultCode status;
     HTTPDirectory* rootDir;
     size_t totalDownloaded;
 
+    void updateWaiting();
+
     HTTP::Request_ptr updateFile(HTTPDirectory* dir, const std::string& name,
                                  size_t sz);
     HTTP::Request_ptr updateDir(HTTPDirectory* dir, const std::string& hash,
@@ -130,9 +133,9 @@ public:
     std::string computeHashForPath(const SGPath& p);
     void writeHashCache();
 
-    void failedToGetRootIndex(AbstractRepository::ResultCode st);
+    void failedToGetRootIndex(HTTPRepository::ResultCode st);
     void failedToUpdateChild(const SGPath& relativePath,
-                             AbstractRepository::ResultCode fileStatus);
+                             HTTPRepository::ResultCode fileStatus);
 
     typedef std::vector<RepoRequestPtr> RequestVector;
     RequestVector queuedRequests,
@@ -192,10 +195,12 @@ class HTTPDirectory
     typedef std::vector<ChildInfo> ChildInfoList;
     ChildInfoList children;
 
+
 public:
     HTTPDirectory(HTTPRepoPrivate* repo, const std::string& path) :
         _repository(repo),
-        _relativePath(path)
+        _relativePath(path),
+        _state(DoNotUpdate)
   {
       assert(repo);
 
@@ -228,17 +233,20 @@ public:
 
     void dirIndexUpdated(const std::string& hash)
     {
-        SGPath fpath(_relativePath);
+        SGPath fpath(absolutePath());
         fpath.append(".dirindex");
         _repository->updatedFileContents(fpath, hash);
 
+        _state = Updated;
+
         children.clear();
         parseDirIndex(children);
         std::sort(children.begin(), children.end());
     }
 
-    void failedToUpdate(AbstractRepository::ResultCode status)
+    void failedToUpdate(HTTPRepository::ResultCode status)
     {
+        _state = UpdateFailed;
         if (_relativePath.isNull()) {
             // root dir failed
             _repository->failedToGetRootIndex(status);
@@ -249,7 +257,11 @@ public:
 
     void updateChildrenBasedOnHash()
     {
-        //SG_LOG(SG_TERRASYNC, SG_DEBUG, "updated children for:" << relativePath());
+        // if we got here for a dir which is still updating or excluded
+        // from updates, just bail out right now.
+        if (_state != Updated) {
+            return;
+        }
 
         string_list indexNames = indexChildren(),
             toBeUpdated, orphans;
@@ -284,6 +296,9 @@ public:
                     SGPath p(relativePath());
                     p.append(it->file());
                     HTTPDirectory* childDir = _repository->getOrCreateDirectory(p.str());
+                    if (childDir->_state == NotUpdated) {
+                        childDir->_state = Updated;
+                    }
                     childDir->updateChildrenBasedOnHash();
                 }
             }
@@ -301,6 +316,101 @@ public:
         scheduleUpdates(toBeUpdated);
     }
 
+    void markAsUpToDate()
+    {
+        _state = Updated;
+    }
+
+    void markAsUpdating()
+    {
+        assert(_state == NotUpdated);
+        _state = HTTPDirectory::UpdateInProgress;
+    }
+
+    void markAsEnabled()
+    {
+        // assert because this should only get invoked on newly created
+        // directory objects which are inside the sub-tree(s) to be updated
+        assert(_state == DoNotUpdate);
+        _state = NotUpdated;
+    }
+
+    void markSubtreeAsNeedingUpdate()
+    {
+        if (_state == Updated) {
+            _state = NotUpdated; // reset back to not-updated
+        }
+
+        ChildInfoList::iterator cit;
+        for (cit = children.begin(); cit != children.end(); ++cit) {
+            if (cit->type == ChildInfo::DirectoryType) {
+                SGPath p(relativePath());
+                p.append(cit->name);
+                HTTPDirectory* childDir = _repository->getOrCreateDirectory(p.str());
+                childDir->markSubtreeAsNeedingUpdate();
+            }
+        } // of child iteration
+    }
+
+    void markSubtreeAsEnabled()
+    {
+        if (_state == DoNotUpdate) {
+            markAsEnabled();
+        }
+
+        ChildInfoList::iterator cit;
+        for (cit = children.begin(); cit != children.end(); ++cit) {
+            if (cit->type == ChildInfo::DirectoryType) {
+                SGPath p(relativePath());
+                p.append(cit->name);
+                HTTPDirectory* childDir = _repository->getOrCreateDirectory(p.str());
+                childDir->markSubtreeAsEnabled();
+            }
+        } // of child iteration
+    }
+
+
+    void markAncestorChainAsEnabled()
+    {
+        if (_state == DoNotUpdate) {
+            markAsEnabled();
+        }
+
+        if (_relativePath.isNull()) {
+            return;
+        }
+
+        std::string prPath = _relativePath.dir();
+        if (prPath.empty()) {
+            _repository->rootDir->markAncestorChainAsEnabled();
+        } else {
+            HTTPDirectory* prDir = _repository->getOrCreateDirectory(prPath);
+            prDir->markAncestorChainAsEnabled();
+        }
+    }
+
+    void updateIfWaiting(const std::string& hash, size_t sz)
+    {
+        if (_state == NotUpdated) {
+            _repository->updateDir(this, hash, sz);
+            return;
+        }
+
+        if ((_state == DoNotUpdate) || (_state == UpdateInProgress)) {
+            return;
+        }
+
+        ChildInfoList::iterator cit;
+        for (cit = children.begin(); cit != children.end(); ++cit) {
+            if (cit->type == ChildInfo::DirectoryType) {
+                SGPath p(relativePath());
+                p.append(cit->name);
+                HTTPDirectory* childDir = _repository->getOrCreateDirectory(p.str());
+                childDir->updateIfWaiting(cit->hash, cit->sizeInBytes);
+            }
+        } // of child iteration
+    }
+
     void removeOrphans(const string_list& orphans)
     {
         string_list::const_iterator it;
@@ -337,6 +447,11 @@ public:
                 SGPath p(relativePath());
                 p.append(*it);
                 HTTPDirectory* childDir = _repository->getOrCreateDirectory(p.str());
+                if (childDir->_state == DoNotUpdate) {
+                    SG_LOG(SG_TERRASYNC, SG_WARN, "scheduleUpdate, child:" << *it << " is marked do not update so skipping");
+                    continue;
+                }
+
                 _repository->updateDir(childDir, cit->hash, cit->sizeInBytes);
             }
         }
@@ -361,21 +476,22 @@ public:
         if (it == children.end()) {
             SG_LOG(SG_TERRASYNC, SG_WARN, "updated file but not found in dir:" << _relativePath << " " << file);
         } else {
-            SGPath fpath(_relativePath);
+            SGPath fpath(absolutePath());
             fpath.append(file);
 
             if (it->hash != hash) {
-                _repository->failedToUpdateChild(_relativePath, AbstractRepository::REPO_ERROR_CHECKSUM);
+                // we don't erase the file on a hash mismatch, becuase if we're syncing during the
+                // middle of a server-side update, the downloaded file may actually become valid.
+                _repository->failedToUpdateChild(_relativePath, HTTPRepository::REPO_ERROR_CHECKSUM);
             } else {
                 _repository->updatedFileContents(fpath, hash);
                 _repository->totalDownloaded += sz;
-                //SG_LOG(SG_TERRASYNC, SG_INFO, "did update:" << fpath);
             } // of hash matches
         } // of found in child list
     }
 
     void didFailToUpdateFile(const std::string& file,
-                             AbstractRepository::ResultCode status)
+                             HTTPRepository::ResultCode status)
     {
         SGPath fpath(_relativePath);
         fpath.append(file);
@@ -472,7 +588,7 @@ private:
             ok = _repository->deleteDirectory(fpath.str());
         } else {
             // remove the hash cache entry
-            _repository->updatedFileContents(fpath, std::string());
+            _repository->updatedFileContents(p, std::string());
             ok = p.remove();
         }
 
@@ -495,7 +611,16 @@ private:
   HTTPRepoPrivate* _repository;
   SGPath _relativePath; // in URL and file-system space
 
-
+    typedef enum
+    {
+        NotUpdated,
+        UpdateInProgress,
+        Updated,
+        UpdateFailed,
+        DoNotUpdate
+    } State;
+
+    State _state;
 };
 
 HTTPRepository::HTTPRepository(const SGPath& base, HTTP::Client *cl) :
@@ -533,14 +658,38 @@ SGPath HTTPRepository::fsBase() const
 
 void HTTPRepository::update()
 {
-    if (_d->isUpdating) {
+    _d->rootDir->markSubtreeAsNeedingUpdate();
+    _d->updateWaiting();
+}
+
+void HTTPRepository::setEntireRepositoryMode()
+{
+    if (!_d->updateEverything) {
+        // this is a one-way decision
+        _d->updateEverything = true;
+    }
+
+    // probably overkill but not expensive so let's check everything
+    // we have in case someone did something funky and switched from partial
+    // to 'whole repo' updating.
+    _d->rootDir->markSubtreeAsEnabled();
+}
+
+
+void HTTPRepository::addSubpath(const std::string& relPath)
+{
+    if (_d->updateEverything) {
+        SG_LOG(SG_TERRASYNC, SG_WARN, "called HTTPRepository::addSubpath but updating everything");
         return;
     }
 
-    _d->status = REPO_NO_ERROR;
-    _d->isUpdating = true;
-    _d->failures.clear();
-    _d->updateDir(_d->rootDir, std::string(), 0);
+    _d->updatePaths.push_back(relPath);
+
+    HTTPDirectory* dir = _d->getOrCreateDirectory(relPath);
+    dir->markSubtreeAsEnabled();
+    dir->markAncestorChainAsEnabled();
+
+    _d->updateWaiting();
 }
 
 bool HTTPRepository::isDoingSync() const
@@ -580,7 +729,7 @@ size_t HTTPRepository::bytesDownloaded() const
     return result;
 }
 
-AbstractRepository::ResultCode
+HTTPRepository::ResultCode
 HTTPRepository::failure() const
 {
     if ((_d->status == REPO_NO_ERROR) && !_d->failures.empty()) {
@@ -590,12 +739,6 @@ HTTPRepository::failure() const
     return _d->status;
 }
 
-    void HTTPRepoGetRequest::cancel()
-    {
-        _directory->repository()->http->cancelRequest(this, "Reposiotry cancelled");
-        _directory = 0;
-    }
-
     class FileGetRequest : public HTTPRepoGetRequest
     {
     public:
@@ -605,14 +748,13 @@ HTTPRepository::failure() const
         {
             pathInRepo = _directory->absolutePath();
             pathInRepo.append(fileName);
-            //SG_LOG(SG_TERRASYNC, SG_INFO, "will GET file " << url());
         }
 
     protected:
         virtual void gotBodyData(const char* s, int n)
         {
             if (!file.get()) {
-                file.reset(new SGFile(pathInRepo.str()));
+                file.reset(new SGBinaryFile(pathInRepo.str()));
                 if (!file->open(SG_IO_OUT)) {
                   SG_LOG(SG_TERRASYNC, SG_WARN, "unable to create file " << pathInRepo);
                   _directory->repository()->http->cancelRequest(this, "Unable to create output file");
@@ -628,16 +770,17 @@ HTTPRepository::failure() const
         virtual void onDone()
         {
             file->close();
+
             if (responseCode() == 200) {
                 std::string hash = strutils::encodeHex(sha1_result(&hashContext), HASH_LENGTH);
                 _directory->didUpdateFile(fileName, hash, contentSize());
                 SG_LOG(SG_TERRASYNC, SG_DEBUG, "got file " << fileName << " in " << _directory->absolutePath());
             } else if (responseCode() == 404) {
                 SG_LOG(SG_TERRASYNC, SG_WARN, "terrasync file not found on server: " << fileName << " for " << _directory->absolutePath());
-                _directory->didFailToUpdateFile(fileName, AbstractRepository::REPO_ERROR_FILE_NOT_FOUND);
+                _directory->didFailToUpdateFile(fileName, HTTPRepository::REPO_ERROR_FILE_NOT_FOUND);
             } else {
                 SG_LOG(SG_TERRASYNC, SG_WARN, "terrasync file download error on server: " << fileName << " for " << _directory->absolutePath() << ": " << responseCode() );
-                _directory->didFailToUpdateFile(fileName, AbstractRepository::REPO_ERROR_HTTP);
+                _directory->didFailToUpdateFile(fileName, HTTPRepository::REPO_ERROR_HTTP);
             }
 
             _directory->repository()->finishedRequest(this);
@@ -649,9 +792,9 @@ HTTPRepository::failure() const
             if (pathInRepo.exists()) {
                 pathInRepo.remove();
             }
-            
+
             if (_directory) {
-                _directory->didFailToUpdateFile(fileName, AbstractRepository::REPO_ERROR_SOCKET);
+                _directory->didFailToUpdateFile(fileName, HTTPRepository::REPO_ERROR_SOCKET);
                 _directory->repository()->finishedRequest(this);
             }
         }
@@ -664,7 +807,7 @@ HTTPRepository::failure() const
         std::string fileName; // if empty, we're getting the directory itself
         SGPath pathInRepo;
         simgear::sha1nfo hashContext;
-        std::auto_ptr<SGFile> file;
+        std::auto_ptr<SGBinaryFile> file;
     };
 
     class DirGetRequest : public HTTPRepoGetRequest
@@ -676,7 +819,6 @@ HTTPRepository::failure() const
             _targetHash(targetHash)
         {
             sha1_init(&hashContext);
-           //SG_LOG(SG_TERRASYNC, SG_INFO, "will GET dir " << url());
         }
 
         void setIsRootDir()
@@ -701,7 +843,7 @@ HTTPRepository::failure() const
             if (responseCode() == 200) {
                 std::string hash = strutils::encodeHex(sha1_result(&hashContext), HASH_LENGTH);
                 if (!_targetHash.empty() && (hash != _targetHash)) {
-                    _directory->failedToUpdate(AbstractRepository::REPO_ERROR_CHECKSUM);
+                    _directory->failedToUpdate(HTTPRepository::REPO_ERROR_CHECKSUM);
                     _directory->repository()->finishedRequest(this);
                     return;
                 }
@@ -725,8 +867,8 @@ HTTPRepository::failure() const
                     of.write(body.data(), body.size());
                     of.close();
                     _directory->dirIndexUpdated(hash);
-
-                    //SG_LOG(SG_TERRASYNC, SG_INFO, "updated dir index " << _directory->absolutePath());
+                } else {
+                    _directory->markAsUpToDate();
                 }
 
                 _directory->repository()->totalDownloaded += contentSize();
@@ -739,12 +881,12 @@ HTTPRepository::failure() const
                     _directory->updateChildrenBasedOnHash();
                     SG_LOG(SG_TERRASYNC, SG_INFO, "after update of:" << _directory->absolutePath() << " child update took:" << st.elapsedMSec());
                 } catch (sg_exception& ) {
-                    _directory->failedToUpdate(AbstractRepository::REPO_ERROR_IO);
+                    _directory->failedToUpdate(HTTPRepository::REPO_ERROR_IO);
                 }
             } else if (responseCode() == 404) {
-                _directory->failedToUpdate(AbstractRepository::REPO_ERROR_FILE_NOT_FOUND);
+                _directory->failedToUpdate(HTTPRepository::REPO_ERROR_FILE_NOT_FOUND);
             } else {
-                _directory->failedToUpdate(AbstractRepository::REPO_ERROR_HTTP);
+                _directory->failedToUpdate(HTTPRepository::REPO_ERROR_HTTP);
             }
 
             _directory->repository()->finishedRequest(this);
@@ -753,7 +895,7 @@ HTTPRepository::failure() const
         virtual void onFail()
         {
             if (_directory) {
-                _directory->failedToUpdate(AbstractRepository::REPO_ERROR_SOCKET);
+                _directory->failedToUpdate(HTTPRepository::REPO_ERROR_SOCKET);
                 _directory->repository()->finishedRequest(this);
             }
         }
@@ -778,15 +920,18 @@ HTTPRepository::failure() const
 
     HTTPRepoPrivate::~HTTPRepoPrivate()
     {
+        // take a copy since cancelRequest will fail and hence remove
+        // remove activeRequests, invalidating any iterator to it.
+        RequestVector copyOfActive(activeRequests);
+        RequestVector::iterator rq;
+        for (rq = copyOfActive.begin(); rq != copyOfActive.end(); ++rq) {
+            http->cancelRequest(*rq, "Repository object deleted");
+        }
+
         DirectoryVector::iterator it;
         for (it=directories.begin(); it != directories.end(); ++it) {
             delete *it;
         }
-
-        RequestVector::iterator r;
-        for (r=activeRequests.begin(); r != activeRequests.end(); ++r) {
-            (*r)->cancel();
-        }
     }
 
     HTTP::Request_ptr HTTPRepoPrivate::updateFile(HTTPDirectory* dir, const std::string& name, size_t sz)
@@ -799,6 +944,7 @@ HTTPRepository::failure() const
 
     HTTP::Request_ptr HTTPRepoPrivate::updateDir(HTTPDirectory* dir, const std::string& hash, size_t sz)
     {
+        dir->markAsUpdating();
         RepoRequestPtr r(new DirGetRequest(dir, hash));
         r->setContentSize(sz);
         makeRequest(r);
@@ -843,7 +989,7 @@ HTTPRepository::failure() const
         sha1_init(&info);
         char* buf = static_cast<char*>(malloc(1024 * 1024));
         size_t readLen;
-        SGFile f(p.str());
+        SGBinaryFile f(p.str());
         if (!f.open(SG_IO_IN)) {
             throw sg_io_exception("Couldn't open file for compute hash", p);
         }
@@ -966,6 +1112,25 @@ HTTPRepository::failure() const
 
         HTTPDirectory* d = new HTTPDirectory(this, path);
         directories.push_back(d);
+        if (updateEverything) {
+            d->markAsEnabled();
+        } else {
+            string_list::const_iterator s;
+            bool shouldUpdate = false;
+
+            for (s = updatePaths.begin(); s != updatePaths.end(); ++s) {
+                size_t minLen = std::min(path.size(), s->size());
+                if (s->compare(0, minLen, path, 0, minLen) == 0) {
+                    shouldUpdate = true;
+                    break;
+                }
+            } // of paths iteration
+
+            if (shouldUpdate) {
+                d->markAsEnabled();
+            }
+        }
+
         return d;
     }
 
@@ -981,7 +1146,7 @@ HTTPRepository::failure() const
             delete d;
 
             // update the hash cache too
-            updatedFileContents(path, std::string());
+            updatedFileContents(d->absolutePath(), std::string());
 
             return result;
         }
@@ -1002,10 +1167,11 @@ HTTPRepository::failure() const
     void HTTPRepoPrivate::finishedRequest(const RepoRequestPtr& req)
     {
         RequestVector::iterator it = std::find(activeRequests.begin(), activeRequests.end(), req);
-        if (it == activeRequests.end()) {
-            throw sg_exception("lost request somehow", req->url());
+        // in some cases, for example a checksum failure, we clear the active
+        // and queued request vectors, so the ::find above can fail
+        if (it != activeRequests.end()) {
+            activeRequests.erase(it);
         }
-        activeRequests.erase(it);
 
         if (!queuedRequests.empty()) {
             RepoRequestPtr rr = queuedRequests.front();
@@ -1021,15 +1187,36 @@ HTTPRepository::failure() const
         }
     }
 
-    void HTTPRepoPrivate::failedToGetRootIndex(AbstractRepository::ResultCode st)
+    void HTTPRepoPrivate::failedToGetRootIndex(HTTPRepository::ResultCode st)
     {
         SG_LOG(SG_TERRASYNC, SG_WARN, "Failed to get root of repo:" << baseUrl);
         status = st;
     }
 
     void HTTPRepoPrivate::failedToUpdateChild(const SGPath& relativePath,
-                                              AbstractRepository::ResultCode fileStatus)
+                                              HTTPRepository::ResultCode fileStatus)
     {
+        if (fileStatus == HTTPRepository::REPO_ERROR_CHECKSUM) {
+            // stop updating, and mark repository as failed, becuase this
+            // usually indicates we need to start a fresh update from the
+            // root.
+            // (we could issue a retry here, but we leave that to higher layers)
+            status = fileStatus;
+
+            queuedRequests.clear();
+
+            RequestVector copyOfActive(activeRequests);
+            RequestVector::iterator rq;
+            for (rq = copyOfActive.begin(); rq != copyOfActive.end(); ++rq) {
+                //SG_LOG(SG_TERRASYNC, SG_DEBUG, "cancelling request for:" << (*rq)->url());
+                http->cancelRequest(*rq, "Repository updated failed");
+            }
+
+
+            SG_LOG(SG_TERRASYNC, SG_WARN, "failed to update repository:" << baseUrl
+                   << ", possibly modified during sync");
+        }
+
         Failure f;
         f.path = relativePath;
         f.error = fileStatus;
@@ -1038,6 +1225,22 @@ HTTPRepository::failure() const
         SG_LOG(SG_TERRASYNC, SG_WARN, "failed to update entry:" << relativePath << " code:" << fileStatus);
     }
 
+    void HTTPRepoPrivate::updateWaiting()
+    {
+        if (!isUpdating) {
+            status = HTTPRepository::REPO_NO_ERROR;
+            isUpdating = true;
+            failures.clear();
+        }
+
+        // find to-be-updated sub-trees and kick them off
+        rootDir->updateIfWaiting(std::string(), 0);
 
+        // maybe there was nothing to do
+        if (activeRequests.empty()) {
+            status = HTTPRepository::REPO_NO_ERROR;
+            isUpdating = false;
+        }
+    }
 
 } // of namespace simgear