1 // HTTPRepository.cxx -- plain HTTP TerraSync remote client
3 // Copyright (C) 20126 James Turner <zakalawe@mac.com>
5 // This program is free software; you can redistribute it and/or
6 // modify it under the terms of the GNU General Public License as
7 // published by the Free Software Foundation; either version 2 of the
8 // License, or (at your option) any later version.
10 // This program is distributed in the hope that it will be useful, but
11 // WITHOUT ANY WARRANTY; without even the implied warranty of
12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 // General Public License for more details.
15 // You should have received a copy of the GNU General Public License
16 // along with this program; if not, write to the Free Software
17 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
19 #include "HTTPRepository.hxx"
21 #include <simgear_config.h>
36 #include "simgear/debug/logstream.hxx"
37 #include "simgear/misc/strutils.hxx"
38 #include <simgear/misc/sg_dir.hxx>
39 #include <simgear/io/HTTPClient.hxx>
40 #include <simgear/io/sg_file.hxx>
41 #include <simgear/misc/sgstream.hxx>
42 #include <simgear/structure/exception.hxx>
44 #include <simgear/misc/sg_hash.hxx>
46 #if defined(SG_WINDOWS)
49 * public domain strtok_r() by Charlie Gordon
51 * from comp.lang.c 9/14/2007
53 * http://groups.google.com/group/comp.lang.c/msg/2ab1ecbb86646684
55 * (Declaration that it's public domain):
56 * http://groups.google.com/group/comp.lang.c/msg/7c7b39328fefab9c
71 str += strspn(str, delim);
80 str += strcspn(str, delim);
98 class HTTPRepoGetRequest : public HTTP::Request
101 HTTPRepoGetRequest(HTTPDirectory* d, const std::string& u) :
107 virtual void cancel()
110 abort("Repository cancelled request");
113 HTTPDirectory* _directory;
116 typedef SGSharedPtr<HTTPRepoGetRequest> RepoRequestPtr;
118 class HTTPRepoPrivate
121 struct HashCacheEntry
123 std::string filePath;
130 typedef std::vector<HashCacheEntry> HashCache;
136 AbstractRepository::ResultCode error;
139 typedef std::vector<Failure> FailureList;
140 FailureList failures;
142 HTTPRepoPrivate(HTTPRepository* parent) :
145 status(AbstractRepository::REPO_NO_ERROR)
150 HTTPRepository* p; // link back to outer
155 AbstractRepository::ResultCode status;
156 HTTPDirectory* rootDir;
158 HTTP::Request_ptr updateFile(HTTPDirectory* dir, const std::string& name);
159 HTTP::Request_ptr updateDir(HTTPDirectory* dir, const std::string& hash);
161 std::string hashForPath(const SGPath& p);
162 void updatedFileContents(const SGPath& p, const std::string& newHash);
163 void parseHashCache();
164 std::string computeHashForPath(const SGPath& p);
165 void writeHashCache();
167 void failedToGetRootIndex(AbstractRepository::ResultCode st);
168 void failedToUpdateChild(const SGPath& relativePath,
169 AbstractRepository::ResultCode fileStatus);
171 typedef std::vector<RepoRequestPtr> RequestVector;
172 RequestVector requests;
174 void finishedRequest(const RepoRequestPtr& req);
176 HTTPDirectory* getOrCreateDirectory(const std::string& path);
177 bool deleteDirectory(const std::string& path);
179 typedef std::vector<HTTPDirectory*> DirectoryVector;
180 DirectoryVector directories;
194 ChildInfo(Type ty, const char* nameData, const char* hashData) :
197 hash(hashData ? hashData : ""),
202 ChildInfo(const ChildInfo& other) :
206 sizeInBytes(other.sizeInBytes)
209 void setSize(const char* sizeData)
211 sizeInBytes = ::strtol(sizeData, NULL, 10);
214 bool operator<(const ChildInfo& other) const
216 return name < other.name;
220 std::string name, hash;
224 typedef std::vector<ChildInfo> ChildInfoList;
225 ChildInfoList children;
228 HTTPDirectory(HTTPRepoPrivate* repo, const std::string& path) :
234 SGPath p(absolutePath());
237 // already exists on disk
238 bool ok = parseDirIndex(children);
239 std::sort(children.begin(), children.end());
240 } catch (sg_exception& e) {
241 // parsing cache failed
247 HTTPRepoPrivate* repository() const
252 std::string url() const
254 if (_relativePath.str().empty()) {
255 return _repository->baseUrl;
258 return _repository->baseUrl + "/" + _relativePath.str();
261 void dirIndexUpdated(const std::string& hash)
263 SGPath fpath(_relativePath);
264 fpath.append(".dirindex");
265 _repository->updatedFileContents(fpath, hash);
268 parseDirIndex(children);
269 std::sort(children.begin(), children.end());
272 void failedToUpdate(AbstractRepository::ResultCode status)
274 if (_relativePath.isNull()) {
276 _repository->failedToGetRootIndex(status);
278 _repository->failedToUpdateChild(_relativePath, status);
282 void updateChildrenBasedOnHash()
284 //SG_LOG(SG_TERRASYNC, SG_DEBUG, "updated children for:" << relativePath());
286 string_list indexNames = indexChildren(),
287 toBeUpdated, orphans;
288 simgear::Dir d(absolutePath());
289 PathList fsChildren = d.children(0);
290 PathList::const_iterator it = fsChildren.begin();
292 for (; it != fsChildren.end(); ++it) {
293 ChildInfo info(it->isDir() ? ChildInfo::DirectoryType : ChildInfo::FileType,
294 it->file().c_str(), NULL);
295 std::string hash = hashForChild(info);
297 ChildInfoList::iterator c = findIndexChild(it->file());
298 if (c == children.end()) {
299 orphans.push_back(it->file());
300 } else if (c->hash != hash) {
301 // file exists, but hash mismatch, schedule update
303 //SG_LOG(SG_TERRASYNC, SG_INFO, "file exists but hash is wrong for:" << c->name);
304 //SG_LOG(SG_TERRASYNC, SG_INFO, "on disk:" << hash << " vs in info:" << c->hash);
307 toBeUpdated.push_back(c->name);
309 // file exists and hash is valid. If it's a directory,
310 // perform a recursive check.
311 if (c->type == ChildInfo::DirectoryType) {
312 SGPath p(relativePath());
314 HTTPDirectory* childDir = _repository->getOrCreateDirectory(p.str());
315 childDir->updateChildrenBasedOnHash();
319 // remove existing file system children from the index list,
320 // so we can detect new children
321 string_list::iterator it = std::find(indexNames.begin(), indexNames.end(), c->name);
322 if (it != indexNames.end()) {
323 indexNames.erase(it);
325 } // of real children iteration
327 // all remaining names in indexChilden are new children
328 toBeUpdated.insert(toBeUpdated.end(), indexNames.begin(), indexNames.end());
330 removeOrphans(orphans);
331 scheduleUpdates(toBeUpdated);
334 void removeOrphans(const string_list& orphans)
336 string_list::const_iterator it;
337 for (it = orphans.begin(); it != orphans.end(); ++it) {
342 string_list indexChildren() const
345 r.reserve(children.size());
346 ChildInfoList::const_iterator it;
347 for (it=children.begin(); it != children.end(); ++it) {
348 r.push_back(it->name);
353 void scheduleUpdates(const string_list& names)
355 string_list::const_iterator it;
356 for (it = names.begin(); it != names.end(); ++it) {
357 ChildInfoList::iterator cit = findIndexChild(*it);
358 if (cit == children.end()) {
359 SG_LOG(SG_TERRASYNC, SG_WARN, "scheduleUpdate, unknown child:" << *it);
363 if (cit->type == ChildInfo::FileType) {
364 _repository->updateFile(this, *it);
366 SGPath p(relativePath());
368 HTTPDirectory* childDir = _repository->getOrCreateDirectory(p.str());
369 _repository->updateDir(childDir, cit->hash);
374 SGPath absolutePath() const
376 SGPath r(_repository->basePath);
377 r.append(_relativePath.str());
381 SGPath relativePath() const
383 return _relativePath;
386 void didUpdateFile(const std::string& file, const std::string& hash)
388 // check hash matches what we expected
389 ChildInfoList::iterator it = findIndexChild(file);
390 if (it == children.end()) {
391 SG_LOG(SG_TERRASYNC, SG_WARN, "updated file but not found in dir:" << _relativePath << " " << file);
393 SGPath fpath(_relativePath);
396 if (it->hash != hash) {
397 _repository->failedToUpdateChild(_relativePath, AbstractRepository::REPO_ERROR_CHECKSUM);
399 _repository->updatedFileContents(fpath, hash);
400 //SG_LOG(SG_TERRASYNC, SG_INFO, "did update:" << fpath);
402 } // of found in child list
405 void didFailToUpdateFile(const std::string& file,
406 AbstractRepository::ResultCode status)
408 SGPath fpath(_relativePath);
410 _repository->failedToUpdateChild(fpath, status);
416 ChildWithName(const std::string& n) : name(n) {}
419 bool operator()(const ChildInfo& info) const
420 { return info.name == name; }
423 ChildInfoList::iterator findIndexChild(const std::string& name)
425 return std::find_if(children.begin(), children.end(), ChildWithName(name));
428 bool parseDirIndex(ChildInfoList& children)
430 SGPath p(absolutePath());
431 p.append(".dirindex");
436 std::ifstream indexStream( p.c_str(), std::ios::in );
438 if ( !indexStream.is_open() ) {
439 throw sg_io_exception("cannot open dirIndex file", p);
442 char lineBuffer[512];
445 while (!indexStream.eof() ) {
446 indexStream.getline(lineBuffer, 512);
448 char* typeData = ::strtok_r(lineBuffer, ":", &lastToken);
450 continue; // skip blank line
455 throw sg_io_exception("Malformed dir index file", p);
458 if (!strcmp(typeData, "version")) {
460 } else if (!strcmp(typeData, "path")) {
464 char* nameData = ::strtok_r(NULL, ":", &lastToken);
465 char* hashData = ::strtok_r(NULL, ":", &lastToken);
466 char* sizeData = ::strtok_r(NULL, ":", &lastToken);
468 if (typeData[0] == 'f') {
469 children.push_back(ChildInfo(ChildInfo::FileType, nameData, hashData));
470 } else if (typeData[0] == 'd') {
471 children.push_back(ChildInfo(ChildInfo::DirectoryType, nameData, hashData));
473 throw sg_io_exception("Malformed line code in dir index file", p);
477 children.back().setSize(sizeData);
484 void removeChild(const std::string& name)
486 SGPath p(absolutePath());
490 SGPath fpath(_relativePath);
494 ok = _repository->deleteDirectory(fpath.str());
496 // remove the hash cache entry
497 _repository->updatedFileContents(fpath, std::string());
502 SG_LOG(SG_TERRASYNC, SG_WARN, "removal failed for:" << p);
503 throw sg_io_exception("Failed to remove existing file/dir:", p);
507 std::string hashForChild(const ChildInfo& child) const
509 SGPath p(absolutePath());
510 p.append(child.name);
511 if (child.type == ChildInfo::DirectoryType) {
512 p.append(".dirindex");
514 return _repository->hashForPath(p);
517 HTTPRepoPrivate* _repository;
518 SGPath _relativePath; // in URL and file-system space
523 HTTPRepository::HTTPRepository(const SGPath& base, HTTP::Client *cl) :
524 _d(new HTTPRepoPrivate(this))
528 _d->rootDir = new HTTPDirectory(_d.get(), "");
531 HTTPRepository::~HTTPRepository()
535 void HTTPRepository::setBaseUrl(const std::string &url)
540 std::string HTTPRepository::baseUrl() const
545 HTTP::Client* HTTPRepository::http() const
550 SGPath HTTPRepository::fsBase() const
555 void HTTPRepository::update()
557 if (_d->isUpdating) {
561 _d->status = REPO_NO_ERROR;
562 _d->isUpdating = true;
563 _d->failures.clear();
564 _d->updateDir(_d->rootDir, std::string());
567 bool HTTPRepository::isDoingSync() const
569 if (_d->status != REPO_NO_ERROR) {
573 return _d->isUpdating;
576 AbstractRepository::ResultCode
577 HTTPRepository::failure() const
579 if ((_d->status == REPO_NO_ERROR) && !_d->failures.empty()) {
580 return REPO_PARTIAL_UPDATE;
586 class FileGetRequest : public HTTPRepoGetRequest
589 FileGetRequest(HTTPDirectory* d, const std::string& file) :
590 HTTPRepoGetRequest(d, makeUrl(d, file)),
593 pathInRepo = _directory->absolutePath();
594 pathInRepo.append(fileName);
595 //SG_LOG(SG_TERRASYNC, SG_INFO, "will GET file " << url());
599 virtual void gotBodyData(const char* s, int n)
602 file.reset(new SGFile(pathInRepo.str()));
603 if (!file->open(SG_IO_OUT)) {
604 SG_LOG(SG_TERRASYNC, SG_WARN, "unable to create file " << pathInRepo);
605 abort("Unable to create output file");
608 sha1_init(&hashContext);
611 sha1_write(&hashContext, s, n);
615 virtual void onDone()
618 if (responseCode() == 200) {
619 std::string hash = strutils::encodeHex(sha1_result(&hashContext), HASH_LENGTH);
620 _directory->didUpdateFile(fileName, hash);
621 //SG_LOG(SG_TERRASYNC, SG_INFO, "got file " << fileName << " in " << _directory->absolutePath());
622 } else if (responseCode() == 404) {
623 _directory->didFailToUpdateFile(fileName, AbstractRepository::REPO_ERROR_FILE_NOT_FOUND);
625 _directory->didFailToUpdateFile(fileName, AbstractRepository::REPO_ERROR_HTTP);
628 _directory->repository()->finishedRequest(this);
631 virtual void onFail()
636 _directory->didFailToUpdateFile(fileName, AbstractRepository::REPO_ERROR_SOCKET);
637 _directory->repository()->finishedRequest(this);
641 static std::string makeUrl(HTTPDirectory* d, const std::string& file)
643 return d->url() + "/" + file;
646 std::string fileName; // if empty, we're getting the directory itself
648 simgear::sha1nfo hashContext;
649 std::auto_ptr<SGFile> file;
652 class DirGetRequest : public HTTPRepoGetRequest
655 DirGetRequest(HTTPDirectory* d, const std::string& targetHash) :
656 HTTPRepoGetRequest(d, makeUrl(d)),
658 _targetHash(targetHash)
660 sha1_init(&hashContext);
661 //SG_LOG(SG_TERRASYNC, SG_INFO, "will GET dir " << url());
669 bool isRootDir() const
675 virtual void gotBodyData(const char* s, int n)
677 body += std::string(s, n);
678 sha1_write(&hashContext, s, n);
681 virtual void onDone()
683 if (responseCode() == 200) {
684 std::string hash = strutils::encodeHex(sha1_result(&hashContext), HASH_LENGTH);
685 if (!_targetHash.empty() && (hash != _targetHash)) {
686 _directory->failedToUpdate(AbstractRepository::REPO_ERROR_CHECKSUM);
687 _directory->repository()->finishedRequest(this);
691 std::string curHash = _directory->repository()->hashForPath(path());
692 if (hash != curHash) {
693 simgear::Dir d(_directory->absolutePath());
695 if (!d.create(0700)) {
696 throw sg_io_exception("Unable to create directory", d.path());
700 // dir index data has changed, so write to disk and update
701 // the hash accordingly
702 std::ofstream of(pathInRepo().c_str(), std::ios::trunc | std::ios::out);
704 throw sg_io_exception("Failed to open directory index file for writing", pathInRepo().c_str());
707 of.write(body.data(), body.size());
709 _directory->dirIndexUpdated(hash);
711 //SG_LOG(SG_TERRASYNC, SG_INFO, "updated dir index " << _directory->absolutePath());
715 // either way we've confirmed the index is valid so update
717 _directory->updateChildrenBasedOnHash();
718 } catch (sg_exception& e) {
719 _directory->failedToUpdate(AbstractRepository::REPO_ERROR_IO);
721 } else if (responseCode() == 404) {
722 _directory->failedToUpdate(AbstractRepository::REPO_ERROR_FILE_NOT_FOUND);
724 _directory->failedToUpdate(AbstractRepository::REPO_ERROR_HTTP);
727 _directory->repository()->finishedRequest(this);
730 virtual void onFail()
733 _directory->failedToUpdate(AbstractRepository::REPO_ERROR_SOCKET);
734 _directory->repository()->finishedRequest(this);
738 static std::string makeUrl(HTTPDirectory* d)
740 return d->url() + "/.dirindex";
743 SGPath pathInRepo() const
745 SGPath p(_directory->absolutePath());
746 p.append(".dirindex");
750 simgear::sha1nfo hashContext;
752 bool _isRootDir; ///< is this the repository root?
753 std::string _targetHash;
756 HTTPRepoPrivate::~HTTPRepoPrivate()
758 DirectoryVector::iterator it;
759 for (it=directories.begin(); it != directories.end(); ++it) {
763 RequestVector::iterator r;
764 for (r=requests.begin(); r != requests.end(); ++r) {
769 HTTP::Request_ptr HTTPRepoPrivate::updateFile(HTTPDirectory* dir, const std::string& name)
771 RepoRequestPtr r(new FileGetRequest(dir, name));
772 requests.push_back(r);
773 http->makeRequest(r);
777 HTTP::Request_ptr HTTPRepoPrivate::updateDir(HTTPDirectory* dir, const std::string& hash)
779 RepoRequestPtr r(new DirGetRequest(dir, hash));
780 requests.push_back(r);
781 http->makeRequest(r);
786 class HashEntryWithPath
789 HashEntryWithPath(const std::string& p) : path(p) {}
790 bool operator()(const HTTPRepoPrivate::HashCacheEntry& entry) const
791 { return entry.filePath == path; }
796 std::string HTTPRepoPrivate::hashForPath(const SGPath& p)
798 HashCache::iterator it = std::find_if(hashes.begin(), hashes.end(), HashEntryWithPath(p.str()));
799 if (it != hashes.end()) {
800 // ensure data on disk hasn't changed.
801 // we could also use the file type here if we were paranoid
802 if ((p.sizeInBytes() == it->lengthBytes) && (p.modTime() == it->modTime)) {
806 // entry in the cache, but it's stale so remove and fall through
810 std::string hash = computeHashForPath(p);
811 updatedFileContents(p, hash);
815 std::string HTTPRepoPrivate::computeHashForPath(const SGPath& p)
818 return std::string();
821 char* buf = static_cast<char*>(malloc(1024 * 1024));
824 if (!f.open(SG_IO_IN)) {
825 throw sg_io_exception("Couldn't open file for compute hash", p);
827 while ((readLen = f.read(buf, 1024 * 1024)) > 0) {
828 sha1_write(&info, buf, readLen);
833 std::string hashBytes((char*) sha1_result(&info), HASH_LENGTH);
834 return strutils::encodeHex(hashBytes);
837 void HTTPRepoPrivate::updatedFileContents(const SGPath& p, const std::string& newHash)
839 // remove the existing entry
840 HashCache::iterator it = std::find_if(hashes.begin(), hashes.end(), HashEntryWithPath(p.str()));
841 if (it != hashes.end()) {
845 if (newHash.empty()) {
846 return; // we're done
849 // use a cloned SGPath and reset its caching to force one stat() call
851 p2.set_cached(false);
854 HashCacheEntry entry;
855 entry.filePath = p.str();
856 entry.hashHex = newHash;
857 entry.modTime = p2.modTime();
858 entry.lengthBytes = p2.sizeInBytes();
859 hashes.push_back(entry);
864 void HTTPRepoPrivate::writeHashCache()
866 SGPath cachePath = basePath;
867 cachePath.append(".hashes");
869 std::ofstream stream(cachePath.c_str(),std::ios::out | std::ios::trunc);
870 HashCache::const_iterator it;
871 for (it = hashes.begin(); it != hashes.end(); ++it) {
872 stream << it->filePath << ":" << it->modTime << ":"
873 << it->lengthBytes << ":" << it->hashHex << "\n";
878 void HTTPRepoPrivate::parseHashCache()
881 SGPath cachePath = basePath;
882 cachePath.append(".hashes");
883 if (!cachePath.exists()) {
887 std::ifstream stream(cachePath.c_str(), std::ios::in);
891 while (!stream.eof()) {
892 stream.getline(buf, 2048);
894 char* nameData = ::strtok_r(buf, ":", &lastToken);
895 char* timeData = ::strtok_r(NULL, ":", &lastToken);
896 char* sizeData = ::strtok_r(NULL, ":", &lastToken);
897 char* hashData = ::strtok_r(NULL, ":", &lastToken);
898 if (!nameData || !timeData || !sizeData || !hashData) {
902 HashCacheEntry entry;
903 entry.filePath = nameData;
904 entry.hashHex = hashData;
905 entry.modTime = strtol(timeData, NULL, 10);
906 entry.lengthBytes = strtol(sizeData, NULL, 10);
907 hashes.push_back(entry);
911 class DirectoryWithPath
914 DirectoryWithPath(const std::string& p) : path(p) {}
915 bool operator()(const HTTPDirectory* entry) const
916 { return entry->relativePath().str() == path; }
921 HTTPDirectory* HTTPRepoPrivate::getOrCreateDirectory(const std::string& path)
923 DirectoryWithPath p(path);
924 DirectoryVector::iterator it = std::find_if(directories.begin(), directories.end(), p);
925 if (it != directories.end()) {
929 HTTPDirectory* d = new HTTPDirectory(this, path);
930 directories.push_back(d);
934 bool HTTPRepoPrivate::deleteDirectory(const std::string& path)
936 DirectoryWithPath p(path);
937 DirectoryVector::iterator it = std::find_if(directories.begin(), directories.end(), p);
938 if (it != directories.end()) {
939 HTTPDirectory* d = *it;
940 directories.erase(it);
941 Dir dir(d->absolutePath());
942 bool result = dir.remove(true);
945 // update the hash cache too
946 updatedFileContents(path, std::string());
954 void HTTPRepoPrivate::finishedRequest(const RepoRequestPtr& req)
956 RequestVector::iterator it = std::find(requests.begin(), requests.end(), req);
957 if (it == requests.end()) {
958 throw sg_exception("lost request somehow", req->url());
961 if (requests.empty()) {
966 void HTTPRepoPrivate::failedToGetRootIndex(AbstractRepository::ResultCode st)
968 SG_LOG(SG_TERRASYNC, SG_WARN, "Failed to get root of repo:" << baseUrl);
972 void HTTPRepoPrivate::failedToUpdateChild(const SGPath& relativePath,
973 AbstractRepository::ResultCode fileStatus)
976 f.path = relativePath;
977 f.error = fileStatus;
978 failures.push_back(f);
980 SG_LOG(SG_TERRASYNC, SG_WARN, "failed to update entry:" << relativePath << " code:" << fileStatus);
985 } // of namespace simgear