1 // HTTPRepository.cxx -- plain HTTP TerraSync remote client
3 // Copyright (C) 20126 James Turner <zakalawe@mac.com>
5 // This program is free software; you can redistribute it and/or
6 // modify it under the terms of the GNU General Public License as
7 // published by the Free Software Foundation; either version 2 of the
8 // License, or (at your option) any later version.
10 // This program is distributed in the hope that it will be useful, but
11 // WITHOUT ANY WARRANTY; without even the implied warranty of
12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 // General Public License for more details.
15 // You should have received a copy of the GNU General Public License
16 // along with this program; if not, write to the Free Software
17 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
19 #include "HTTPRepository.hxx"
34 #include "simgear/debug/logstream.hxx"
35 #include "simgear/misc/strutils.hxx"
36 #include <simgear/misc/sg_dir.hxx>
37 #include <simgear/io/HTTPClient.hxx>
38 #include <simgear/io/sg_file.hxx>
39 #include <simgear/misc/sgstream.hxx>
40 #include <simgear/structure/exception.hxx>
42 #include <simgear/misc/sg_hash.hxx>
61 typedef std::vector<HashCacheEntry> HashCache;
64 HTTPRepoPrivate(HTTPRepository* parent) :
67 status(AbstractRepository::REPO_NO_ERROR)
70 HTTPRepository* p; // link back to outer
75 AbstractRepository::ResultCode status;
76 HTTPDirectory* rootDir;
78 HTTP::Request_ptr updateFile(HTTPDirectory* dir, const std::string& name);
79 HTTP::Request_ptr updateDir(HTTPDirectory* dir);
81 std::string hashForPath(const SGPath& p);
82 void updatedFileContents(const SGPath& p, const std::string& newHash);
83 void parseHashCache();
84 std::string computeHashForPath(const SGPath& p);
85 void writeHashCache();
87 void failedToGetRootIndex();
89 typedef std::vector<HTTP::Request_ptr> RequestVector;
90 RequestVector requests;
92 void finishedRequest(const HTTP::Request_ptr& req);
94 HTTPDirectory* getOrCreateDirectory(const std::string& path);
95 bool deleteDirectory(const std::string& path);
97 typedef std::vector<HTTPDirectory*> DirectoryVector;
98 DirectoryVector directories;
112 ChildInfo(Type ty, const char* nameData, const char* hashData) :
115 hash(hashData ? hashData : ""),
120 ChildInfo(const ChildInfo& other) :
124 sizeInBytes(other.sizeInBytes)
127 void setSize(const char* sizeData)
129 sizeInBytes = ::strtol(sizeData, NULL, 10);
132 bool operator<(const ChildInfo& other) const
134 return name < other.name;
138 std::string name, hash;
142 typedef std::vector<ChildInfo> ChildInfoList;
143 ChildInfoList children;
146 HTTPDirectory(HTTPRepoPrivate* repo, const std::string& path) :
150 SGPath p(absolutePath());
152 // what is indexValid for?
153 // bool indexValid = false;
155 // already exists on disk
156 parseDirIndex(children);
157 // indexValid = true;
158 std::sort(children.begin(), children.end());
159 } catch (sg_exception& e) {
160 // parsing cache failed
166 HTTPRepoPrivate* repository() const
171 std::string url() const
173 if (_relativePath.str().empty()) {
174 return _repository->baseUrl;
177 return _repository->baseUrl + "/" + _relativePath.str();
180 void dirIndexUpdated(const std::string& hash)
182 SGPath fpath(_relativePath);
183 fpath.append(".dirindex");
184 _repository->updatedFileContents(fpath, hash);
187 parseDirIndex(children);
188 std::sort(children.begin(), children.end());
191 void failedToUpdate()
193 if (_relativePath.isNull()) {
195 _repository->failedToGetRootIndex();
197 SG_LOG(SG_TERRASYNC, SG_WARN, "failed to update dir:" << _relativePath);
201 void updateChildrenBasedOnHash()
203 SG_LOG(SG_TERRASYNC, SG_DEBUG, "updated children for:" << relativePath());
205 string_list indexNames = indexChildren(),
206 toBeUpdated, orphans;
207 simgear::Dir d(absolutePath());
208 PathList fsChildren = d.children(0);
209 PathList::const_iterator it = fsChildren.begin();
211 for (; it != fsChildren.end(); ++it) {
212 ChildInfo info(it->isDir() ? ChildInfo::DirectoryType : ChildInfo::FileType,
213 it->file().c_str(), NULL);
214 std::string hash = hashForChild(info);
216 ChildInfoList::iterator c = findIndexChild(it->file());
217 if (c == children.end()) {
218 orphans.push_back(it->file());
219 } else if (c->hash != hash) {
220 // file exists, but hash mismatch, schedule update
222 SG_LOG(SG_TERRASYNC, SG_INFO, "file exists but hash is wrong for:" << c->name);
225 toBeUpdated.push_back(c->name);
227 // file exists and hash is valid. If it's a directory,
228 // perform a recursive check.
229 if (c->type == ChildInfo::DirectoryType) {
230 SGPath p(relativePath());
232 HTTPDirectory* childDir = _repository->getOrCreateDirectory(p.str());
233 childDir->updateChildrenBasedOnHash();
235 SG_LOG(SG_TERRASYNC, SG_INFO, "existing file is ok:" << c->name);
239 // remove existing file system children from the index list,
240 // so we can detect new children
241 string_list::iterator it = std::find(indexNames.begin(), indexNames.end(), c->name);
242 if (it != indexNames.end()) {
243 indexNames.erase(it);
245 } // of real children iteration
247 // all remaining names in indexChilden are new children
248 toBeUpdated.insert(toBeUpdated.end(), indexNames.begin(), indexNames.end());
250 removeOrphans(orphans);
251 scheduleUpdates(toBeUpdated);
254 void removeOrphans(const string_list& orphans)
256 string_list::const_iterator it;
257 for (it = orphans.begin(); it != orphans.end(); ++it) {
262 string_list indexChildren() const
265 r.reserve(children.size());
266 ChildInfoList::const_iterator it;
267 for (it=children.begin(); it != children.end(); ++it) {
268 r.push_back(it->name);
273 void scheduleUpdates(const string_list& names)
275 string_list::const_iterator it;
276 for (it = names.begin(); it != names.end(); ++it) {
277 ChildInfoList::iterator cit = findIndexChild(*it);
278 if (cit == children.end()) {
279 SG_LOG(SG_TERRASYNC, SG_WARN, "scheduleUpdate, unknown child:" << *it);
283 if (cit->type == ChildInfo::FileType) {
284 _repository->updateFile(this, *it);
286 SGPath p(relativePath());
288 HTTPDirectory* childDir = _repository->getOrCreateDirectory(p.str());
289 _repository->updateDir(childDir);
294 SGPath absolutePath() const
296 SGPath r(_repository->basePath);
297 r.append(_relativePath.str());
301 SGPath relativePath() const
303 return _relativePath;
306 void didUpdateFile(const std::string& file, const std::string& hash)
308 SGPath fpath(_relativePath);
310 _repository->updatedFileContents(fpath, hash);
311 SG_LOG(SG_TERRASYNC, SG_INFO, "did update:" << fpath);
314 void didFailToUpdateFile(const std::string& file)
316 SGPath fpath(_relativePath);
318 SG_LOG(SG_TERRASYNC, SG_WARN, "failed to update:" << fpath);
324 ChildWithName(const std::string& n) : name(n) {}
327 bool operator()(const ChildInfo& info) const
328 { return info.name == name; }
331 ChildInfoList::iterator findIndexChild(const std::string& name)
333 return std::find_if(children.begin(), children.end(), ChildWithName(name));
336 void parseDirIndex(ChildInfoList& children)
338 SGPath p(absolutePath());
339 p.append(".dirindex");
340 std::ifstream indexStream( p.str().c_str(), std::ios::in );
342 if ( !indexStream.is_open() ) {
343 throw sg_io_exception("cannot open dirIndex file", p);
346 char lineBuffer[512];
349 while (!indexStream.eof() ) {
350 indexStream.getline(lineBuffer, 512);
352 char* typeData = ::strtok_r(lineBuffer, ":", &lastToken);
354 continue; // skip blank line
359 throw sg_io_exception("Malformed dir index file", p);
362 if (!strcmp(typeData, "version")) {
364 } else if (!strcmp(typeData, "path")) {
368 char* nameData = ::strtok_r(NULL, ":", &lastToken);
369 char* hashData = ::strtok_r(NULL, ":", &lastToken);
370 char* sizeData = ::strtok_r(NULL, ":", &lastToken);
372 if (typeData[0] == 'f') {
373 children.push_back(ChildInfo(ChildInfo::FileType, nameData, hashData));
374 } else if (typeData[0] == 'd') {
375 children.push_back(ChildInfo(ChildInfo::DirectoryType, nameData, hashData));
377 throw sg_io_exception("Malformed line code in dir index file", p);
381 children.back().setSize(sizeData);
386 void removeChild(const std::string& name)
388 SGPath p(absolutePath());
392 SGPath fpath(_relativePath);
396 ok = _repository->deleteDirectory(fpath.str());
398 // remove the hash cache entry
399 _repository->updatedFileContents(fpath, std::string());
404 SG_LOG(SG_TERRASYNC, SG_WARN, "removal failed for:" << p);
408 std::string hashForChild(const ChildInfo& child) const
410 SGPath p(absolutePath());
411 p.append(child.name);
412 if (child.type == ChildInfo::DirectoryType) {
413 p.append(".dirindex");
415 return _repository->hashForPath(p);
418 HTTPRepoPrivate* _repository;
419 SGPath _relativePath; // in URL and file-system space
424 HTTPRepository::HTTPRepository(const SGPath& base, HTTP::Client *cl) :
425 _d(new HTTPRepoPrivate(this))
429 _d->rootDir = new HTTPDirectory(_d.get(), "");
432 HTTPRepository::~HTTPRepository()
436 void HTTPRepository::setBaseUrl(const std::string &url)
441 std::string HTTPRepository::baseUrl() const
446 HTTP::Client* HTTPRepository::http() const
451 SGPath HTTPRepository::fsBase() const
456 void HTTPRepository::update()
458 if (_d->isUpdating) {
462 _d->status = REPO_NO_ERROR;
463 _d->isUpdating = true;
464 _d->updateDir(_d->rootDir);
467 bool HTTPRepository::isDoingSync() const
469 if (_d->status != REPO_NO_ERROR) {
473 return _d->isUpdating;
476 AbstractRepository::ResultCode
477 HTTPRepository::failure() const
482 class FileGetRequest : public HTTP::Request
485 FileGetRequest(HTTPDirectory* d, const std::string& file) :
486 HTTP::Request(makeUrl(d, file)),
491 SG_LOG(SG_TERRASYNC, SG_INFO, "will GET file " << url());
496 virtual void gotBodyData(const char* s, int n)
499 SGPath p(pathInRepo());
503 mode_t mode = S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH;
505 fd = ::open(p.c_str(), O_CREAT | O_TRUNC | O_RDWR, mode);
507 SG_LOG(SG_TERRASYNC, SG_WARN, "unable to create file " << p);
510 sha1_init(&hashContext);
514 sha1_write(&hashContext, s, n);
518 virtual void onDone()
521 if (responseCode() == 200) {
522 std::string hash = strutils::encodeHex((char*) sha1_result(&hashContext));
523 directory->didUpdateFile(fileName, hash);
525 SG_LOG(SG_TERRASYNC, SG_DEBUG, "got file " << fileName << " in " << directory->absolutePath());
527 directory->didFailToUpdateFile(fileName);
530 directory->repository()->finishedRequest(this);
533 static std::string makeUrl(HTTPDirectory* d, const std::string& file)
535 return d->url() + "/" + file;
538 SGPath pathInRepo() const
540 SGPath p(directory->absolutePath());
545 HTTPDirectory* directory;
546 std::string fileName; // if empty, we're getting the directory itself
547 simgear::sha1nfo hashContext;
551 class DirGetRequest : public HTTP::Request
554 DirGetRequest(HTTPDirectory* d) :
555 HTTP::Request(makeUrl(d)),
559 sha1_init(&hashContext);
560 SG_LOG(SG_TERRASYNC, SG_INFO, "will GET dir " << url());
569 bool isRootDir() const
575 virtual void gotBodyData(const char* s, int n)
577 body += std::string(s, n);
578 sha1_write(&hashContext, s, n);
581 virtual void onDone()
583 if (responseCode() == 200) {
584 std::string hash = strutils::encodeHex((char*) sha1_result(&hashContext));
585 std::string curHash = directory->repository()->hashForPath(path());
586 if (hash != curHash) {
588 simgear::Dir d(directory->absolutePath());
590 if (!d.create(0700)) {
591 throw sg_io_exception("Unable to create directory", d.path());
595 // dir index data has changed, so write to disk and update
596 // the hash accordingly
597 std::ofstream of(pathInRepo().str().c_str(), std::ios::trunc | std::ios::out);
598 assert(of.is_open());
599 of.write(body.data(), body.size());
601 directory->dirIndexUpdated(hash);
603 SG_LOG(SG_TERRASYNC, SG_DEBUG, "updated dir index " << directory->absolutePath());
607 // either way we've confirmed the index is valid so update
609 directory->updateChildrenBasedOnHash();
611 directory->failedToUpdate();
614 directory->repository()->finishedRequest(this);
617 static std::string makeUrl(HTTPDirectory* d)
619 return d->url() + "/.dirindex";
622 SGPath pathInRepo() const
624 SGPath p(directory->absolutePath());
625 p.append(".dirindex");
629 HTTPDirectory* directory;
630 simgear::sha1nfo hashContext;
632 bool _isRootDir; ///< is this the repository root?
636 HTTP::Request_ptr HTTPRepoPrivate::updateFile(HTTPDirectory* dir, const std::string& name)
638 HTTP::Request_ptr r(new FileGetRequest(dir, name));
639 http->makeRequest(r);
640 requests.push_back(r);
644 HTTP::Request_ptr HTTPRepoPrivate::updateDir(HTTPDirectory* dir)
646 HTTP::Request_ptr r(new DirGetRequest(dir));
647 http->makeRequest(r);
648 requests.push_back(r);
653 class HashEntryWithPath
656 HashEntryWithPath(const std::string& p) : path(p) {}
657 bool operator()(const HTTPRepoPrivate::HashCacheEntry& entry) const
658 { return entry.filePath == path; }
663 std::string HTTPRepoPrivate::hashForPath(const SGPath& p)
665 HashCache::iterator it = std::find_if(hashes.begin(), hashes.end(), HashEntryWithPath(p.str()));
666 if (it != hashes.end()) {
667 // ensure data on disk hasn't changed.
668 // we could also use the file type here if we were paranoid
669 if ((p.sizeInBytes() == it->lengthBytes) && (p.modTime() == it->modTime)) {
673 // entry in the cache, but it's stale so remove and fall through
677 std::string hash = computeHashForPath(p);
678 updatedFileContents(p, hash);
682 std::string HTTPRepoPrivate::computeHashForPath(const SGPath& p)
685 return std::string();
688 char* buf = static_cast<char*>(malloc(1024 * 1024));
690 int fd = ::open(p.c_str(), O_RDONLY);
692 throw sg_io_exception("Couldn't open file for compute hash", p);
694 while ((readLen = ::read(fd, buf, 1024 * 1024)) > 0) {
695 sha1_write(&info, buf, readLen);
700 std::string hashBytes((char*) sha1_result(&info), HASH_LENGTH);
701 return strutils::encodeHex(hashBytes);
704 void HTTPRepoPrivate::updatedFileContents(const SGPath& p, const std::string& newHash)
706 // remove the existing entry
707 HashCache::iterator it = std::find_if(hashes.begin(), hashes.end(), HashEntryWithPath(p.str()));
708 if (it != hashes.end()) {
712 if (newHash.empty()) {
713 return; // we're done
716 // use a cloned SGPath and reset its caching to force one stat() call
718 p2.set_cached(false);
721 HashCacheEntry entry;
722 entry.filePath = p.str();
723 entry.hashHex = newHash;
724 entry.modTime = p2.modTime();
725 entry.lengthBytes = p2.sizeInBytes();
726 hashes.push_back(entry);
731 void HTTPRepoPrivate::writeHashCache()
733 SGPath cachePath = basePath;
734 cachePath.append(".hashes");
736 std::ofstream stream(cachePath.str().c_str(),std::ios::out | std::ios::trunc);
737 HashCache::const_iterator it;
738 for (it = hashes.begin(); it != hashes.end(); ++it) {
739 stream << it->filePath << ":" << it->modTime << ":"
740 << it->lengthBytes << ":" << it->hashHex << "\n";
745 void HTTPRepoPrivate::parseHashCache()
748 SGPath cachePath = basePath;
749 cachePath.append(".hashes");
750 if (!cachePath.exists()) {
754 std::ifstream stream(cachePath.str().c_str(), std::ios::in);
758 while (!stream.eof()) {
759 stream.getline(buf, 2048);
761 char* nameData = ::strtok_r(buf, ":", &lastToken);
762 char* timeData = ::strtok_r(NULL, ":", &lastToken);
763 char* sizeData = ::strtok_r(NULL, ":", &lastToken);
764 char* hashData = ::strtok_r(NULL, ":", &lastToken);
765 if (!nameData || !timeData || !sizeData || !hashData) {
769 HashCacheEntry entry;
770 entry.filePath = nameData;
771 entry.hashHex = hashData;
772 entry.modTime = strtol(timeData, NULL, 10);
773 entry.lengthBytes = strtol(sizeData, NULL, 10);
774 hashes.push_back(entry);
778 class DirectoryWithPath
781 DirectoryWithPath(const std::string& p) : path(p) {}
782 bool operator()(const HTTPDirectory* entry) const
783 { return entry->relativePath().str() == path; }
788 HTTPDirectory* HTTPRepoPrivate::getOrCreateDirectory(const std::string& path)
790 DirectoryWithPath p(path);
791 DirectoryVector::iterator it = std::find_if(directories.begin(), directories.end(), p);
792 if (it != directories.end()) {
796 HTTPDirectory* d = new HTTPDirectory(this, path);
797 directories.push_back(d);
801 bool HTTPRepoPrivate::deleteDirectory(const std::string& path)
803 DirectoryWithPath p(path);
804 DirectoryVector::iterator it = std::find_if(directories.begin(), directories.end(), p);
805 if (it != directories.end()) {
806 HTTPDirectory* d = *it;
807 directories.erase(it);
808 Dir dir(d->absolutePath());
809 bool result = dir.remove(true);
812 // update the hash cache too
813 updatedFileContents(path, std::string());
821 void HTTPRepoPrivate::finishedRequest(const HTTP::Request_ptr& req)
823 RequestVector::iterator it = std::find(requests.begin(), requests.end(), req);
824 if (it == requests.end()) {
825 throw sg_exception("lost request somehow");
828 if (requests.empty()) {
833 void HTTPRepoPrivate::failedToGetRootIndex()
835 SG_LOG(SG_TERRASYNC, SG_WARN, "Failed to get root of repo:" << baseUrl);
836 status = AbstractRepository::REPO_ERROR_NOT_FOUND;
840 } // of namespace simgear