From 4eb272f4f080bc447b364ed5f452ff9dc9bf74ef Mon Sep 17 00:00:00 2001 From: James Turner Date: Fri, 26 Feb 2016 21:18:26 +0200 Subject: [PATCH] Lots of work on HTTP repository failure handling. --- simgear/io/AbstractRepository.hxx | 3 +- simgear/io/HTTPRepository.cxx | 197 ++++++++++++++++++++---------- simgear/io/test_repository.cxx | 90 +++++++++++++- 3 files changed, 222 insertions(+), 68 deletions(-) diff --git a/simgear/io/AbstractRepository.hxx b/simgear/io/AbstractRepository.hxx index 33dc3725..d8fe26cf 100644 --- a/simgear/io/AbstractRepository.hxx +++ b/simgear/io/AbstractRepository.hxx @@ -56,7 +56,8 @@ public: REPO_ERROR_IO, REPO_ERROR_CHECKSUM, REPO_ERROR_FILE_NOT_FOUND, - REPO_ERROR_HTTP + REPO_ERROR_HTTP, + REPO_PARTIAL_UPDATE }; virtual ResultCode failure() const = 0; diff --git a/simgear/io/HTTPRepository.cxx b/simgear/io/HTTPRepository.cxx index f1a53ef3..477232ea 100644 --- a/simgear/io/HTTPRepository.cxx +++ b/simgear/io/HTTPRepository.cxx @@ -95,6 +95,26 @@ namespace simgear class HTTPDirectory; + class HTTPRepoGetRequest : public HTTP::Request + { + public: + HTTPRepoGetRequest(HTTPDirectory* d, const std::string& u) : + HTTP::Request(u), + _directory(d) + { + } + + virtual void cancel() + { + _directory = 0; + abort("Repository cancelled request"); + } + protected: + HTTPDirectory* _directory; + }; + + typedef SGSharedPtr RepoRequestPtr; + class HTTPRepoPrivate { public: @@ -110,12 +130,23 @@ public: typedef std::vector HashCache; HashCache hashes; + struct Failure + { + SGPath path; + AbstractRepository::ResultCode error; + }; + + typedef std::vector FailureList; + FailureList failures; + HTTPRepoPrivate(HTTPRepository* parent) : p(parent), isUpdating(false), status(AbstractRepository::REPO_NO_ERROR) { ; } + ~HTTPRepoPrivate(); + HTTPRepository* p; // link back to outer HTTP::Client* http; std::string baseUrl; @@ -125,7 +156,7 @@ public: HTTPDirectory* rootDir; HTTP::Request_ptr updateFile(HTTPDirectory* dir, const std::string& name); - HTTP::Request_ptr updateDir(HTTPDirectory* dir); + HTTP::Request_ptr updateDir(HTTPDirectory* dir, const std::string& hash); std::string hashForPath(const SGPath& p); void updatedFileContents(const SGPath& p, const std::string& newHash); @@ -137,10 +168,10 @@ public: void failedToUpdateChild(const SGPath& relativePath, AbstractRepository::ResultCode fileStatus); - typedef std::vector RequestVector; + typedef std::vector RequestVector; RequestVector requests; - void finishedRequest(const HTTP::Request_ptr& req); + void finishedRequest(const RepoRequestPtr& req); HTTPDirectory* getOrCreateDirectory(const std::string& path); bool deleteDirectory(const std::string& path); @@ -198,6 +229,8 @@ public: _repository(repo), _relativePath(path) { + assert(repo); + SGPath p(absolutePath()); if (p.exists()) { try { @@ -242,14 +275,13 @@ public: // root dir failed _repository->failedToGetRootIndex(status); } else { - SG_LOG(SG_TERRASYNC, SG_WARN, "failed to update dir:" << _relativePath); _repository->failedToUpdateChild(_relativePath, status); } } void updateChildrenBasedOnHash() { - SG_LOG(SG_TERRASYNC, SG_DEBUG, "updated children for:" << relativePath()); + //SG_LOG(SG_TERRASYNC, SG_DEBUG, "updated children for:" << relativePath()); string_list indexNames = indexChildren(), toBeUpdated, orphans; @@ -268,7 +300,8 @@ public: } else if (c->hash != hash) { // file exists, but hash mismatch, schedule update if (!hash.empty()) { - SG_LOG(SG_TERRASYNC, SG_INFO, "file exists but hash is wrong for:" << c->name); + //SG_LOG(SG_TERRASYNC, SG_INFO, "file exists but hash is wrong for:" << c->name); + //SG_LOG(SG_TERRASYNC, SG_INFO, "on disk:" << hash << " vs in info:" << c->hash); } toBeUpdated.push_back(c->name); @@ -333,7 +366,7 @@ public: SGPath p(relativePath()); p.append(*it); HTTPDirectory* childDir = _repository->getOrCreateDirectory(p.str()); - _repository->updateDir(childDir); + _repository->updateDir(childDir, cit->hash); } } } @@ -352,10 +385,21 @@ public: void didUpdateFile(const std::string& file, const std::string& hash) { - SGPath fpath(_relativePath); - fpath.append(file); - _repository->updatedFileContents(fpath, hash); - SG_LOG(SG_TERRASYNC, SG_INFO, "did update:" << fpath); + // check hash matches what we expected + ChildInfoList::iterator it = findIndexChild(file); + if (it == children.end()) { + SG_LOG(SG_TERRASYNC, SG_WARN, "updated file but not found in dir:" << _relativePath << " " << file); + } else { + SGPath fpath(_relativePath); + fpath.append(file); + + if (it->hash != hash) { + _repository->failedToUpdateChild(_relativePath, AbstractRepository::REPO_ERROR_CHECKSUM); + } else { + _repository->updatedFileContents(fpath, hash); + //SG_LOG(SG_TERRASYNC, SG_INFO, "did update:" << fpath); + } // of hash matches + } // of found in child list } void didFailToUpdateFile(const std::string& file, @@ -363,7 +407,6 @@ public: { SGPath fpath(_relativePath); fpath.append(file); - SG_LOG(SG_TERRASYNC, SG_WARN, "failed to update:" << fpath); _repository->failedToUpdateChild(fpath, status); } private: @@ -517,7 +560,8 @@ void HTTPRepository::update() _d->status = REPO_NO_ERROR; _d->isUpdating = true; - _d->updateDir(_d->rootDir); + _d->failures.clear(); + _d->updateDir(_d->rootDir, std::string()); } bool HTTPRepository::isDoingSync() const @@ -532,34 +576,39 @@ bool HTTPRepository::isDoingSync() const AbstractRepository::ResultCode HTTPRepository::failure() const { + if ((_d->status == REPO_NO_ERROR) && !_d->failures.empty()) { + return REPO_PARTIAL_UPDATE; + } + return _d->status; } - class FileGetRequest : public HTTP::Request + class FileGetRequest : public HTTPRepoGetRequest { public: FileGetRequest(HTTPDirectory* d, const std::string& file) : - HTTP::Request(makeUrl(d, file)), - directory(d), + HTTPRepoGetRequest(d, makeUrl(d, file)), fileName(file) { - SG_LOG(SG_TERRASYNC, SG_INFO, "will GET file " << url()); + pathInRepo = _directory->absolutePath(); + pathInRepo.append(fileName); + //SG_LOG(SG_TERRASYNC, SG_INFO, "will GET file " << url()); } protected: virtual void gotBodyData(const char* s, int n) { if (!file.get()) { - SGPath p(pathInRepo()); - file.reset(new SGFile(p.str())); + file.reset(new SGFile(pathInRepo.str())); if (!file->open(SG_IO_OUT)) { - SG_LOG(SG_TERRASYNC, SG_WARN, "unable to create file " << p); + SG_LOG(SG_TERRASYNC, SG_WARN, "unable to create file " << pathInRepo); abort("Unable to create output file"); } sha1_init(&hashContext); } + sha1_write(&hashContext, s, n); file->write(s, n); } @@ -567,24 +616,26 @@ HTTPRepository::failure() const { file->close(); if (responseCode() == 200) { - std::string hash = strutils::encodeHex((char*) sha1_result(&hashContext)); - directory->didUpdateFile(fileName, hash); - - SG_LOG(SG_TERRASYNC, SG_DEBUG, "got file " << fileName << " in " << directory->absolutePath()); + std::string hash = strutils::encodeHex(sha1_result(&hashContext), HASH_LENGTH); + _directory->didUpdateFile(fileName, hash); + //SG_LOG(SG_TERRASYNC, SG_INFO, "got file " << fileName << " in " << _directory->absolutePath()); } else if (responseCode() == 404) { - directory->didFailToUpdateFile(fileName, AbstractRepository::REPO_ERROR_FILE_NOT_FOUND); + _directory->didFailToUpdateFile(fileName, AbstractRepository::REPO_ERROR_FILE_NOT_FOUND); } else { - directory->didFailToUpdateFile(fileName, AbstractRepository::REPO_ERROR_HTTP); + _directory->didFailToUpdateFile(fileName, AbstractRepository::REPO_ERROR_HTTP); } - directory->repository()->finishedRequest(this); + _directory->repository()->finishedRequest(this); } virtual void onFail() { file.reset(); - directory->didFailToUpdateFile(fileName, AbstractRepository::REPO_ERROR_SOCKET); - directory->repository()->finishedRequest(this); + pathInRepo.remove(); + if (_directory) { + _directory->didFailToUpdateFile(fileName, AbstractRepository::REPO_ERROR_SOCKET); + _directory->repository()->finishedRequest(this); + } } private: static std::string makeUrl(HTTPDirectory* d, const std::string& file) @@ -592,30 +643,22 @@ HTTPRepository::failure() const return d->url() + "/" + file; } - SGPath pathInRepo() const - { - SGPath p(directory->absolutePath()); - p.append(fileName); - return p; - } - - HTTPDirectory* directory; std::string fileName; // if empty, we're getting the directory itself + SGPath pathInRepo; simgear::sha1nfo hashContext; std::auto_ptr file; }; - class DirGetRequest : public HTTP::Request + class DirGetRequest : public HTTPRepoGetRequest { public: - DirGetRequest(HTTPDirectory* d) : - HTTP::Request(makeUrl(d)), - directory(d), - _isRootDir(false) + DirGetRequest(HTTPDirectory* d, const std::string& targetHash) : + HTTPRepoGetRequest(d, makeUrl(d)), + _isRootDir(false), + _targetHash(targetHash) { sha1_init(&hashContext); - SG_LOG(SG_TERRASYNC, SG_INFO, "will GET dir " << url()); - + //SG_LOG(SG_TERRASYNC, SG_INFO, "will GET dir " << url()); } void setIsRootDir() @@ -638,11 +681,16 @@ HTTPRepository::failure() const virtual void onDone() { if (responseCode() == 200) { - std::string hash = strutils::encodeHex((char*) sha1_result(&hashContext)); - std::string curHash = directory->repository()->hashForPath(path()); - if (hash != curHash) { + std::string hash = strutils::encodeHex(sha1_result(&hashContext), HASH_LENGTH); + if (!_targetHash.empty() && (hash != _targetHash)) { + _directory->failedToUpdate(AbstractRepository::REPO_ERROR_CHECKSUM); + _directory->repository()->finishedRequest(this); + return; + } - simgear::Dir d(directory->absolutePath()); + std::string curHash = _directory->repository()->hashForPath(path()); + if (hash != curHash) { + simgear::Dir d(_directory->absolutePath()); if (!d.exists()) { if (!d.create(0700)) { throw sg_io_exception("Unable to create directory", d.path()); @@ -658,32 +706,33 @@ HTTPRepository::failure() const of.write(body.data(), body.size()); of.close(); - directory->dirIndexUpdated(hash); - - SG_LOG(SG_TERRASYNC, SG_DEBUG, "updated dir index " << directory->absolutePath()); + _directory->dirIndexUpdated(hash); + //SG_LOG(SG_TERRASYNC, SG_INFO, "updated dir index " << _directory->absolutePath()); } try { // either way we've confirmed the index is valid so update // children now - directory->updateChildrenBasedOnHash(); + _directory->updateChildrenBasedOnHash(); } catch (sg_exception& e) { - directory->failedToUpdate(AbstractRepository::REPO_ERROR_IO); + _directory->failedToUpdate(AbstractRepository::REPO_ERROR_IO); } } else if (responseCode() == 404) { - directory->failedToUpdate(AbstractRepository::REPO_ERROR_FILE_NOT_FOUND); + _directory->failedToUpdate(AbstractRepository::REPO_ERROR_FILE_NOT_FOUND); } else { - directory->failedToUpdate(AbstractRepository::REPO_ERROR_HTTP); + _directory->failedToUpdate(AbstractRepository::REPO_ERROR_HTTP); } - directory->repository()->finishedRequest(this); + _directory->repository()->finishedRequest(this); } virtual void onFail() { - directory->failedToUpdate(AbstractRepository::REPO_ERROR_SOCKET); - directory->repository()->finishedRequest(this); + if (_directory) { + _directory->failedToUpdate(AbstractRepository::REPO_ERROR_SOCKET); + _directory->repository()->finishedRequest(this); + } } private: static std::string makeUrl(HTTPDirectory* d) @@ -693,29 +742,41 @@ HTTPRepository::failure() const SGPath pathInRepo() const { - SGPath p(directory->absolutePath()); + SGPath p(_directory->absolutePath()); p.append(".dirindex"); return p; } - HTTPDirectory* directory; simgear::sha1nfo hashContext; std::string body; bool _isRootDir; ///< is this the repository root? + std::string _targetHash; }; + HTTPRepoPrivate::~HTTPRepoPrivate() + { + DirectoryVector::iterator it; + for (it=directories.begin(); it != directories.end(); ++it) { + delete *it; + } + + RequestVector::iterator r; + for (r=requests.begin(); r != requests.end(); ++r) { + (*r)->cancel(); + } + } HTTP::Request_ptr HTTPRepoPrivate::updateFile(HTTPDirectory* dir, const std::string& name) { - HTTP::Request_ptr r(new FileGetRequest(dir, name)); + RepoRequestPtr r(new FileGetRequest(dir, name)); requests.push_back(r); http->makeRequest(r); return r; } - HTTP::Request_ptr HTTPRepoPrivate::updateDir(HTTPDirectory* dir) + HTTP::Request_ptr HTTPRepoPrivate::updateDir(HTTPDirectory* dir, const std::string& hash) { - HTTP::Request_ptr r(new DirGetRequest(dir)); + RepoRequestPtr r(new DirGetRequest(dir, hash)); requests.push_back(r); http->makeRequest(r); return r; @@ -890,7 +951,7 @@ HTTPRepository::failure() const return false; } - void HTTPRepoPrivate::finishedRequest(const HTTP::Request_ptr& req) + void HTTPRepoPrivate::finishedRequest(const RepoRequestPtr& req) { RequestVector::iterator it = std::find(requests.begin(), requests.end(), req); if (it == requests.end()) { @@ -911,8 +972,12 @@ HTTPRepository::failure() const void HTTPRepoPrivate::failedToUpdateChild(const SGPath& relativePath, AbstractRepository::ResultCode fileStatus) { - // this means we only record the last error, should this be improved? - status = fileStatus; + Failure f; + f.path = relativePath; + f.error = fileStatus; + failures.push_back(f); + + SG_LOG(SG_TERRASYNC, SG_WARN, "failed to update entry:" << relativePath << " code:" << fileStatus); } diff --git a/simgear/io/test_repository.cxx b/simgear/io/test_repository.cxx index 743d1a72..371dee91 100644 --- a/simgear/io/test_repository.cxx +++ b/simgear/io/test_repository.cxx @@ -34,6 +34,14 @@ std::string dataForFile(const std::string& parentName, const std::string& name, return os.str(); } +std::string hashForData(const std::string& d) +{ + simgear::sha1nfo info; + sha1_init(&info); + sha1_write(&info, d.data(), d.size()); + return strutils::encodeHex(sha1_result(&info), HASH_LENGTH); +} + class TestRepoEntry { public: @@ -57,9 +65,21 @@ public: bool isDir; int revision; // for files int requestCount; + bool getWillFail; + bool returnCorruptData; void clearRequestCounts(); + void setGetWillFail(bool b) + { + getWillFail = b; + } + + void setReturnCorruptData(bool d) + { + returnCorruptData = d; + } + std::string pathInRepo() const { return parent ? (parent->pathInRepo() + "/" + name) : name; @@ -139,6 +159,8 @@ TestRepoEntry::TestRepoEntry(TestRepoEntry* pr, const std::string& nm, bool d) : { revision = 2; requestCount = 0; + getWillFail = false; + returnCorruptData = false; } TestRepoEntry::~TestRepoEntry() @@ -226,8 +248,22 @@ public: return; } + if (entry->getWillFail) { + sendErrorResponse(404, false, "entry marked to fail explicitly:" + repoPath); + return; + } + entry->requestCount++; - std::string content(entry->data()); + + std::string content; + if (entry->returnCorruptData) { + content = dataForFile("!$£$!" + entry->parent->name, + "corrupt_" + entry->name, + entry->revision); + } else { + content = entry->data(); + } + std::stringstream d; d << "HTTP/1.1 " << 200 << " " << reasonForCode(200) << "\r\n"; d << "Content-Length:" << content.size() << "\r\n"; @@ -475,6 +511,54 @@ void testLossOfLocalFiles(HTTP::Client* cl) std::cout << "Passed test: lose and replace local files" << std::endl; } +void testAbandonMissingFiles(HTTP::Client* cl) +{ + std::auto_ptr repo; + SGPath p(simgear::Dir::current().path()); + p.append("http_repo_missing_files"); + simgear::Dir pd(p); + if (pd.exists()) { + pd.removeChildren(); + } + + global_repo->defineFile("dirA/subdirE/fileAEA"); + global_repo->findEntry("dirA/subdirE/fileAEA")->setGetWillFail(true); + + repo.reset(new HTTPRepository(p, cl)); + repo->setBaseUrl("http://localhost:2000/repo"); + repo->update(); + waitForUpdateComplete(cl, repo.get()); + if (repo->failure() != AbstractRepository::REPO_PARTIAL_UPDATE) { + throw sg_exception("Bad result from missing files test"); + } + + global_repo->findEntry("dirA/subdirE/fileAEA")->setGetWillFail(false); +} + +void testAbandonCorruptFiles(HTTP::Client* cl) +{ + std::auto_ptr repo; + SGPath p(simgear::Dir::current().path()); + p.append("http_repo_corrupt_files"); + simgear::Dir pd(p); + if (pd.exists()) { + pd.removeChildren(); + } + + global_repo->defineFile("dirB/subdirG/fileBGA"); + global_repo->findEntry("dirB/subdirG/fileBGA")->setReturnCorruptData(true); + + repo.reset(new HTTPRepository(p, cl)); + repo->setBaseUrl("http://localhost:2000/repo"); + repo->update(); + waitForUpdateComplete(cl, repo.get()); + if (repo->failure() != AbstractRepository::REPO_PARTIAL_UPDATE) { + throw sg_exception("Bad result from corrupt files test"); + } + + std::cout << "Passed test: detect corrupted download" << std::endl; +} + int main(int argc, char* argv[]) { sglog().setLogLevels( SG_ALL, SG_INFO ); @@ -503,5 +587,9 @@ int main(int argc, char* argv[]) testMergeExistingFileWithoutDownload(&cl); + testAbandonMissingFiles(&cl); + + testAbandonCorruptFiles(&cl); + return 0; } -- 2.39.5