1 // HTTPRepository.cxx -- plain HTTP TerraSync remote client
3 // Copyright (C) 20126 James Turner <zakalawe@mac.com>
5 // This program is free software; you can redistribute it and/or
6 // modify it under the terms of the GNU General Public License as
7 // published by the Free Software Foundation; either version 2 of the
8 // License, or (at your option) any later version.
10 // This program is distributed in the hope that it will be useful, but
11 // WITHOUT ANY WARRANTY; without even the implied warranty of
12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 // General Public License for more details.
15 // You should have received a copy of the GNU General Public License
16 // along with this program; if not, write to the Free Software
17 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
19 #include "HTTPRepository.hxx"
21 #include <simgear_config.h>
36 #include "simgear/debug/logstream.hxx"
37 #include "simgear/misc/strutils.hxx"
38 #include <simgear/misc/sg_dir.hxx>
39 #include <simgear/io/HTTPClient.hxx>
40 #include <simgear/io/sg_file.hxx>
41 #include <simgear/misc/sgstream.hxx>
42 #include <simgear/structure/exception.hxx>
44 #include <simgear/misc/sg_hash.hxx>
46 #if defined(SG_WINDOWS)
49 * public domain strtok_r() by Charlie Gordon
51 * from comp.lang.c 9/14/2007
53 * http://groups.google.com/group/comp.lang.c/msg/2ab1ecbb86646684
55 * (Declaration that it's public domain):
56 * http://groups.google.com/group/comp.lang.c/msg/7c7b39328fefab9c
71 str += strspn(str, delim);
80 str += strcspn(str, delim);
98 class HTTPRepoGetRequest : public HTTP::Request
101 HTTPRepoGetRequest(HTTPDirectory* d, const std::string& u) :
107 virtual void cancel()
110 abort("Repository cancelled request");
113 size_t contentSize() const
118 void setContentSize(size_t sz)
123 HTTPDirectory* _directory;
127 typedef SGSharedPtr<HTTPRepoGetRequest> RepoRequestPtr;
129 class HTTPRepoPrivate
132 struct HashCacheEntry
134 std::string filePath;
141 typedef std::vector<HashCacheEntry> HashCache;
147 AbstractRepository::ResultCode error;
150 typedef std::vector<Failure> FailureList;
151 FailureList failures;
153 HTTPRepoPrivate(HTTPRepository* parent) :
156 status(AbstractRepository::REPO_NO_ERROR),
162 HTTPRepository* p; // link back to outer
167 AbstractRepository::ResultCode status;
168 HTTPDirectory* rootDir;
169 size_t totalDownloaded;
171 HTTP::Request_ptr updateFile(HTTPDirectory* dir, const std::string& name,
173 HTTP::Request_ptr updateDir(HTTPDirectory* dir, const std::string& hash,
176 std::string hashForPath(const SGPath& p);
177 void updatedFileContents(const SGPath& p, const std::string& newHash);
178 void parseHashCache();
179 std::string computeHashForPath(const SGPath& p);
180 void writeHashCache();
182 void failedToGetRootIndex(AbstractRepository::ResultCode st);
183 void failedToUpdateChild(const SGPath& relativePath,
184 AbstractRepository::ResultCode fileStatus);
186 typedef std::vector<RepoRequestPtr> RequestVector;
187 RequestVector requests;
189 void finishedRequest(const RepoRequestPtr& req);
191 HTTPDirectory* getOrCreateDirectory(const std::string& path);
192 bool deleteDirectory(const std::string& path);
194 typedef std::vector<HTTPDirectory*> DirectoryVector;
195 DirectoryVector directories;
209 ChildInfo(Type ty, const char* nameData, const char* hashData) :
212 hash(hashData ? hashData : ""),
217 ChildInfo(const ChildInfo& other) :
221 sizeInBytes(other.sizeInBytes)
224 void setSize(const char* sizeData)
226 sizeInBytes = ::strtol(sizeData, NULL, 10);
229 bool operator<(const ChildInfo& other) const
231 return name < other.name;
235 std::string name, hash;
239 typedef std::vector<ChildInfo> ChildInfoList;
240 ChildInfoList children;
243 HTTPDirectory(HTTPRepoPrivate* repo, const std::string& path) :
249 SGPath p(absolutePath());
252 // already exists on disk
253 parseDirIndex(children);
254 std::sort(children.begin(), children.end());
255 } catch (sg_exception& e) {
256 // parsing cache failed
262 HTTPRepoPrivate* repository() const
267 std::string url() const
269 if (_relativePath.str().empty()) {
270 return _repository->baseUrl;
273 return _repository->baseUrl + "/" + _relativePath.str();
276 void dirIndexUpdated(const std::string& hash)
278 SGPath fpath(_relativePath);
279 fpath.append(".dirindex");
280 _repository->updatedFileContents(fpath, hash);
283 parseDirIndex(children);
284 std::sort(children.begin(), children.end());
287 void failedToUpdate(AbstractRepository::ResultCode status)
289 if (_relativePath.isNull()) {
291 _repository->failedToGetRootIndex(status);
293 _repository->failedToUpdateChild(_relativePath, status);
297 void updateChildrenBasedOnHash()
299 //SG_LOG(SG_TERRASYNC, SG_DEBUG, "updated children for:" << relativePath());
301 string_list indexNames = indexChildren(),
302 toBeUpdated, orphans;
303 simgear::Dir d(absolutePath());
304 PathList fsChildren = d.children(0);
305 PathList::const_iterator it = fsChildren.begin();
307 for (; it != fsChildren.end(); ++it) {
308 ChildInfo info(it->isDir() ? ChildInfo::DirectoryType : ChildInfo::FileType,
309 it->file().c_str(), NULL);
310 std::string hash = hashForChild(info);
312 ChildInfoList::iterator c = findIndexChild(it->file());
313 if (c == children.end()) {
314 orphans.push_back(it->file());
315 } else if (c->hash != hash) {
316 // file exists, but hash mismatch, schedule update
318 //SG_LOG(SG_TERRASYNC, SG_INFO, "file exists but hash is wrong for:" << c->name);
319 //SG_LOG(SG_TERRASYNC, SG_INFO, "on disk:" << hash << " vs in info:" << c->hash);
322 toBeUpdated.push_back(c->name);
324 // file exists and hash is valid. If it's a directory,
325 // perform a recursive check.
326 if (c->type == ChildInfo::DirectoryType) {
327 SGPath p(relativePath());
329 HTTPDirectory* childDir = _repository->getOrCreateDirectory(p.str());
330 childDir->updateChildrenBasedOnHash();
334 // remove existing file system children from the index list,
335 // so we can detect new children
336 string_list::iterator it = std::find(indexNames.begin(), indexNames.end(), c->name);
337 if (it != indexNames.end()) {
338 indexNames.erase(it);
340 } // of real children iteration
342 // all remaining names in indexChilden are new children
343 toBeUpdated.insert(toBeUpdated.end(), indexNames.begin(), indexNames.end());
345 removeOrphans(orphans);
346 scheduleUpdates(toBeUpdated);
349 void removeOrphans(const string_list& orphans)
351 string_list::const_iterator it;
352 for (it = orphans.begin(); it != orphans.end(); ++it) {
357 string_list indexChildren() const
360 r.reserve(children.size());
361 ChildInfoList::const_iterator it;
362 for (it=children.begin(); it != children.end(); ++it) {
363 r.push_back(it->name);
368 void scheduleUpdates(const string_list& names)
370 string_list::const_iterator it;
371 for (it = names.begin(); it != names.end(); ++it) {
372 ChildInfoList::iterator cit = findIndexChild(*it);
373 if (cit == children.end()) {
374 SG_LOG(SG_TERRASYNC, SG_WARN, "scheduleUpdate, unknown child:" << *it);
378 if (cit->type == ChildInfo::FileType) {
379 _repository->updateFile(this, *it, cit->sizeInBytes);
381 SGPath p(relativePath());
383 HTTPDirectory* childDir = _repository->getOrCreateDirectory(p.str());
384 _repository->updateDir(childDir, cit->hash, cit->sizeInBytes);
389 SGPath absolutePath() const
391 SGPath r(_repository->basePath);
392 r.append(_relativePath.str());
396 SGPath relativePath() const
398 return _relativePath;
401 void didUpdateFile(const std::string& file, const std::string& hash, size_t sz)
403 // check hash matches what we expected
404 ChildInfoList::iterator it = findIndexChild(file);
405 if (it == children.end()) {
406 SG_LOG(SG_TERRASYNC, SG_WARN, "updated file but not found in dir:" << _relativePath << " " << file);
408 SGPath fpath(_relativePath);
411 if (it->hash != hash) {
412 _repository->failedToUpdateChild(_relativePath, AbstractRepository::REPO_ERROR_CHECKSUM);
414 _repository->updatedFileContents(fpath, hash);
415 _repository->totalDownloaded += sz;
416 //SG_LOG(SG_TERRASYNC, SG_INFO, "did update:" << fpath);
418 } // of found in child list
421 void didFailToUpdateFile(const std::string& file,
422 AbstractRepository::ResultCode status)
424 SGPath fpath(_relativePath);
426 _repository->failedToUpdateChild(fpath, status);
432 ChildWithName(const std::string& n) : name(n) {}
435 bool operator()(const ChildInfo& info) const
436 { return info.name == name; }
439 ChildInfoList::iterator findIndexChild(const std::string& name)
441 return std::find_if(children.begin(), children.end(), ChildWithName(name));
444 bool parseDirIndex(ChildInfoList& children)
446 SGPath p(absolutePath());
447 p.append(".dirindex");
452 std::ifstream indexStream( p.c_str(), std::ios::in );
454 if ( !indexStream.is_open() ) {
455 throw sg_io_exception("cannot open dirIndex file", p);
458 char lineBuffer[512];
461 while (!indexStream.eof() ) {
462 indexStream.getline(lineBuffer, 512);
464 char* typeData = ::strtok_r(lineBuffer, ":", &lastToken);
466 continue; // skip blank line
471 throw sg_io_exception("Malformed dir index file", p);
474 if (!strcmp(typeData, "version")) {
476 } else if (!strcmp(typeData, "path")) {
480 char* nameData = ::strtok_r(NULL, ":", &lastToken);
481 char* hashData = ::strtok_r(NULL, ":", &lastToken);
482 char* sizeData = ::strtok_r(NULL, ":", &lastToken);
484 if (typeData[0] == 'f') {
485 children.push_back(ChildInfo(ChildInfo::FileType, nameData, hashData));
486 } else if (typeData[0] == 'd') {
487 children.push_back(ChildInfo(ChildInfo::DirectoryType, nameData, hashData));
489 throw sg_io_exception("Malformed line code in dir index file", p);
493 children.back().setSize(sizeData);
500 void removeChild(const std::string& name)
502 SGPath p(absolutePath());
506 SGPath fpath(_relativePath);
510 ok = _repository->deleteDirectory(fpath.str());
512 // remove the hash cache entry
513 _repository->updatedFileContents(fpath, std::string());
518 SG_LOG(SG_TERRASYNC, SG_WARN, "removal failed for:" << p);
519 throw sg_io_exception("Failed to remove existing file/dir:", p);
523 std::string hashForChild(const ChildInfo& child) const
525 SGPath p(absolutePath());
526 p.append(child.name);
527 if (child.type == ChildInfo::DirectoryType) {
528 p.append(".dirindex");
530 return _repository->hashForPath(p);
533 HTTPRepoPrivate* _repository;
534 SGPath _relativePath; // in URL and file-system space
539 HTTPRepository::HTTPRepository(const SGPath& base, HTTP::Client *cl) :
540 _d(new HTTPRepoPrivate(this))
544 _d->rootDir = new HTTPDirectory(_d.get(), "");
547 HTTPRepository::~HTTPRepository()
551 void HTTPRepository::setBaseUrl(const std::string &url)
556 std::string HTTPRepository::baseUrl() const
561 HTTP::Client* HTTPRepository::http() const
566 SGPath HTTPRepository::fsBase() const
571 void HTTPRepository::update()
573 if (_d->isUpdating) {
577 _d->status = REPO_NO_ERROR;
578 _d->isUpdating = true;
579 _d->failures.clear();
580 _d->updateDir(_d->rootDir, std::string(), 0);
583 bool HTTPRepository::isDoingSync() const
585 if (_d->status != REPO_NO_ERROR) {
589 return _d->isUpdating;
592 size_t HTTPRepository::bytesToDownload() const
596 HTTPRepoPrivate::RequestVector::const_iterator r;
597 for (r = _d->requests.begin(); r != _d->requests.end(); ++r) {
598 result += (*r)->contentSize() - (*r)->responseBytesReceived();
604 size_t HTTPRepository::bytesDownloaded() const
606 size_t result = _d->totalDownloaded;
608 HTTPRepoPrivate::RequestVector::const_iterator r;
609 for (r = _d->requests.begin(); r != _d->requests.end(); ++r) {
610 result += (*r)->responseBytesReceived();
616 AbstractRepository::ResultCode
617 HTTPRepository::failure() const
619 if ((_d->status == REPO_NO_ERROR) && !_d->failures.empty()) {
620 return REPO_PARTIAL_UPDATE;
626 class FileGetRequest : public HTTPRepoGetRequest
629 FileGetRequest(HTTPDirectory* d, const std::string& file) :
630 HTTPRepoGetRequest(d, makeUrl(d, file)),
633 pathInRepo = _directory->absolutePath();
634 pathInRepo.append(fileName);
635 //SG_LOG(SG_TERRASYNC, SG_INFO, "will GET file " << url());
639 virtual void gotBodyData(const char* s, int n)
642 file.reset(new SGFile(pathInRepo.str()));
643 if (!file->open(SG_IO_OUT)) {
644 SG_LOG(SG_TERRASYNC, SG_WARN, "unable to create file " << pathInRepo);
645 abort("Unable to create output file");
648 sha1_init(&hashContext);
651 sha1_write(&hashContext, s, n);
655 virtual void onDone()
658 if (responseCode() == 200) {
659 std::string hash = strutils::encodeHex(sha1_result(&hashContext), HASH_LENGTH);
660 _directory->didUpdateFile(fileName, hash, contentSize());
661 //SG_LOG(SG_TERRASYNC, SG_INFO, "got file " << fileName << " in " << _directory->absolutePath());
662 } else if (responseCode() == 404) {
663 _directory->didFailToUpdateFile(fileName, AbstractRepository::REPO_ERROR_FILE_NOT_FOUND);
665 _directory->didFailToUpdateFile(fileName, AbstractRepository::REPO_ERROR_HTTP);
668 _directory->repository()->finishedRequest(this);
671 virtual void onFail()
674 if (pathInRepo.exists()) {
679 _directory->didFailToUpdateFile(fileName, AbstractRepository::REPO_ERROR_SOCKET);
680 _directory->repository()->finishedRequest(this);
684 static std::string makeUrl(HTTPDirectory* d, const std::string& file)
686 return d->url() + "/" + file;
689 std::string fileName; // if empty, we're getting the directory itself
691 simgear::sha1nfo hashContext;
692 std::auto_ptr<SGFile> file;
695 class DirGetRequest : public HTTPRepoGetRequest
698 DirGetRequest(HTTPDirectory* d, const std::string& targetHash) :
699 HTTPRepoGetRequest(d, makeUrl(d)),
701 _targetHash(targetHash)
703 sha1_init(&hashContext);
704 //SG_LOG(SG_TERRASYNC, SG_INFO, "will GET dir " << url());
712 bool isRootDir() const
718 virtual void gotBodyData(const char* s, int n)
720 body += std::string(s, n);
721 sha1_write(&hashContext, s, n);
724 virtual void onDone()
726 if (responseCode() == 200) {
727 std::string hash = strutils::encodeHex(sha1_result(&hashContext), HASH_LENGTH);
728 if (!_targetHash.empty() && (hash != _targetHash)) {
729 _directory->failedToUpdate(AbstractRepository::REPO_ERROR_CHECKSUM);
730 _directory->repository()->finishedRequest(this);
734 std::string curHash = _directory->repository()->hashForPath(path());
735 if (hash != curHash) {
736 simgear::Dir d(_directory->absolutePath());
738 if (!d.create(0700)) {
739 throw sg_io_exception("Unable to create directory", d.path());
743 // dir index data has changed, so write to disk and update
744 // the hash accordingly
745 std::ofstream of(pathInRepo().c_str(), std::ios::trunc | std::ios::out);
747 throw sg_io_exception("Failed to open directory index file for writing", pathInRepo().c_str());
750 of.write(body.data(), body.size());
752 _directory->dirIndexUpdated(hash);
754 //SG_LOG(SG_TERRASYNC, SG_INFO, "updated dir index " << _directory->absolutePath());
757 _directory->repository()->totalDownloaded += contentSize();
760 // either way we've confirmed the index is valid so update
762 _directory->updateChildrenBasedOnHash();
763 } catch (sg_exception& e) {
764 _directory->failedToUpdate(AbstractRepository::REPO_ERROR_IO);
766 } else if (responseCode() == 404) {
767 _directory->failedToUpdate(AbstractRepository::REPO_ERROR_FILE_NOT_FOUND);
769 _directory->failedToUpdate(AbstractRepository::REPO_ERROR_HTTP);
772 _directory->repository()->finishedRequest(this);
775 virtual void onFail()
778 _directory->failedToUpdate(AbstractRepository::REPO_ERROR_SOCKET);
779 _directory->repository()->finishedRequest(this);
783 static std::string makeUrl(HTTPDirectory* d)
785 return d->url() + "/.dirindex";
788 SGPath pathInRepo() const
790 SGPath p(_directory->absolutePath());
791 p.append(".dirindex");
795 simgear::sha1nfo hashContext;
797 bool _isRootDir; ///< is this the repository root?
798 std::string _targetHash;
801 HTTPRepoPrivate::~HTTPRepoPrivate()
803 DirectoryVector::iterator it;
804 for (it=directories.begin(); it != directories.end(); ++it) {
808 RequestVector::iterator r;
809 for (r=requests.begin(); r != requests.end(); ++r) {
814 HTTP::Request_ptr HTTPRepoPrivate::updateFile(HTTPDirectory* dir, const std::string& name, size_t sz)
816 RepoRequestPtr r(new FileGetRequest(dir, name));
817 r->setContentSize(sz);
818 requests.push_back(r);
819 http->makeRequest(r);
823 HTTP::Request_ptr HTTPRepoPrivate::updateDir(HTTPDirectory* dir, const std::string& hash, size_t sz)
825 RepoRequestPtr r(new DirGetRequest(dir, hash));
826 r->setContentSize(sz);
827 requests.push_back(r);
828 http->makeRequest(r);
833 class HashEntryWithPath
836 HashEntryWithPath(const std::string& p) : path(p) {}
837 bool operator()(const HTTPRepoPrivate::HashCacheEntry& entry) const
838 { return entry.filePath == path; }
843 std::string HTTPRepoPrivate::hashForPath(const SGPath& p)
845 HashCache::iterator it = std::find_if(hashes.begin(), hashes.end(), HashEntryWithPath(p.str()));
846 if (it != hashes.end()) {
847 // ensure data on disk hasn't changed.
848 // we could also use the file type here if we were paranoid
849 if ((p.sizeInBytes() == it->lengthBytes) && (p.modTime() == it->modTime)) {
853 // entry in the cache, but it's stale so remove and fall through
857 std::string hash = computeHashForPath(p);
858 updatedFileContents(p, hash);
862 std::string HTTPRepoPrivate::computeHashForPath(const SGPath& p)
865 return std::string();
868 char* buf = static_cast<char*>(malloc(1024 * 1024));
871 if (!f.open(SG_IO_IN)) {
872 throw sg_io_exception("Couldn't open file for compute hash", p);
874 while ((readLen = f.read(buf, 1024 * 1024)) > 0) {
875 sha1_write(&info, buf, readLen);
880 std::string hashBytes((char*) sha1_result(&info), HASH_LENGTH);
881 return strutils::encodeHex(hashBytes);
884 void HTTPRepoPrivate::updatedFileContents(const SGPath& p, const std::string& newHash)
886 // remove the existing entry
887 HashCache::iterator it = std::find_if(hashes.begin(), hashes.end(), HashEntryWithPath(p.str()));
888 if (it != hashes.end()) {
892 if (newHash.empty()) {
893 return; // we're done
896 // use a cloned SGPath and reset its caching to force one stat() call
898 p2.set_cached(false);
901 HashCacheEntry entry;
902 entry.filePath = p.str();
903 entry.hashHex = newHash;
904 entry.modTime = p2.modTime();
905 entry.lengthBytes = p2.sizeInBytes();
906 hashes.push_back(entry);
911 void HTTPRepoPrivate::writeHashCache()
913 SGPath cachePath = basePath;
914 cachePath.append(".hashes");
916 std::ofstream stream(cachePath.c_str(),std::ios::out | std::ios::trunc);
917 HashCache::const_iterator it;
918 for (it = hashes.begin(); it != hashes.end(); ++it) {
919 stream << it->filePath << ":" << it->modTime << ":"
920 << it->lengthBytes << ":" << it->hashHex << "\n";
925 void HTTPRepoPrivate::parseHashCache()
928 SGPath cachePath = basePath;
929 cachePath.append(".hashes");
930 if (!cachePath.exists()) {
934 std::ifstream stream(cachePath.c_str(), std::ios::in);
938 while (!stream.eof()) {
939 stream.getline(buf, 2048);
941 char* nameData = ::strtok_r(buf, ":", &lastToken);
942 char* timeData = ::strtok_r(NULL, ":", &lastToken);
943 char* sizeData = ::strtok_r(NULL, ":", &lastToken);
944 char* hashData = ::strtok_r(NULL, ":", &lastToken);
945 if (!nameData || !timeData || !sizeData || !hashData) {
949 HashCacheEntry entry;
950 entry.filePath = nameData;
951 entry.hashHex = hashData;
952 entry.modTime = strtol(timeData, NULL, 10);
953 entry.lengthBytes = strtol(sizeData, NULL, 10);
954 hashes.push_back(entry);
958 class DirectoryWithPath
961 DirectoryWithPath(const std::string& p) : path(p) {}
962 bool operator()(const HTTPDirectory* entry) const
963 { return entry->relativePath().str() == path; }
968 HTTPDirectory* HTTPRepoPrivate::getOrCreateDirectory(const std::string& path)
970 DirectoryWithPath p(path);
971 DirectoryVector::iterator it = std::find_if(directories.begin(), directories.end(), p);
972 if (it != directories.end()) {
976 HTTPDirectory* d = new HTTPDirectory(this, path);
977 directories.push_back(d);
981 bool HTTPRepoPrivate::deleteDirectory(const std::string& path)
983 DirectoryWithPath p(path);
984 DirectoryVector::iterator it = std::find_if(directories.begin(), directories.end(), p);
985 if (it != directories.end()) {
986 HTTPDirectory* d = *it;
987 directories.erase(it);
988 Dir dir(d->absolutePath());
989 bool result = dir.remove(true);
992 // update the hash cache too
993 updatedFileContents(path, std::string());
1001 void HTTPRepoPrivate::finishedRequest(const RepoRequestPtr& req)
1003 RequestVector::iterator it = std::find(requests.begin(), requests.end(), req);
1004 if (it == requests.end()) {
1005 throw sg_exception("lost request somehow", req->url());
1008 if (requests.empty()) {
1013 void HTTPRepoPrivate::failedToGetRootIndex(AbstractRepository::ResultCode st)
1015 SG_LOG(SG_TERRASYNC, SG_WARN, "Failed to get root of repo:" << baseUrl);
1019 void HTTPRepoPrivate::failedToUpdateChild(const SGPath& relativePath,
1020 AbstractRepository::ResultCode fileStatus)
1023 f.path = relativePath;
1024 f.error = fileStatus;
1025 failures.push_back(f);
1027 SG_LOG(SG_TERRASYNC, SG_WARN, "failed to update entry:" << relativePath << " code:" << fileStatus);
1032 } // of namespace simgear