From 115531e944f51c3295f69f3ed361bf421515cc58 Mon Sep 17 00:00:00 2001 From: James Turner Date: Thu, 4 Aug 2011 18:03:56 +0100 Subject: [PATCH] Further HTTP improvements, correct proxy support, spec compliance, support for chunked encoding. --- simgear/io/HTTPClient.cxx | 193 ++++++++++++++++++++++++++++++------- simgear/io/HTTPClient.hxx | 3 +- simgear/io/HTTPRequest.cxx | 39 ++++++-- simgear/io/HTTPRequest.hxx | 22 ++++- simgear/io/httpget.cxx | 12 ++- simgear/io/sg_netChat.cxx | 4 +- simgear/io/test_HTTP.cxx | 83 ++++++++++++++-- simgear/misc/strutils.cxx | 15 ++- simgear/misc/strutils.hxx | 5 +- 9 files changed, 317 insertions(+), 59 deletions(-) diff --git a/simgear/io/HTTPClient.cxx b/simgear/io/HTTPClient.cxx index 8c023ab3..3ea5f02a 100644 --- a/simgear/io/HTTPClient.cxx +++ b/simgear/io/HTTPClient.cxx @@ -17,7 +17,7 @@ #include "version.h" #else # if !defined(SIMGEAR_VERSION) -# define SIMGEAR_VERSION "development " __DATE__ +# define SIMGEAR_VERSION "simgear-development" # endif #endif @@ -43,10 +43,30 @@ public: setTerminator("\r\n"); } - void connectToHost(const string& host, short port) + bool connectToHost(const string& host, short port) { - open(); - connect(host.c_str(), port); + if (!open()) { + SG_LOG(SG_ALL, SG_WARN, "HTTP::Connection: connectToHost: open() failed"); + return false; + } + + if (connect(host.c_str(), port) != 0) { + return false; + } + + return true; + } + + // socket-level errors + virtual void handleError(int error) + { + NetChat::handleError(error); + if (activeRequest) { + activeRequest->setFailure(error, "socket error"); + activeRequest = NULL; + } + + state = STATE_SOCKET_ERROR; } void queueRequest(const Request_ptr& r) @@ -63,16 +83,17 @@ public: activeRequest = r; state = STATE_IDLE; bodyTransferSize = 0; + chunkedTransfer = false; stringstream headerData; string path = r->path(); if (!client->proxyHost().empty()) { - path = "http://" + r->hostAndPort() + path; + path = r->url(); } - headerData << r->method() << " " << path << " HTTP/1.1 " << client->userAgent() << "\r\n"; + headerData << r->method() << " " << path << " HTTP/1.1\r\n"; headerData << "Host: " << r->hostAndPort() << "\r\n"; - + headerData << "User-Agent:" << client->userAgent() << "\r\n"; if (!client->proxyAuth().empty()) { headerData << "Proxy-Authorization: " << client->proxyAuth() << "\r\n"; } @@ -90,8 +111,8 @@ public: virtual void collectIncomingData(const char* s, int n) { - if (state == STATE_GETTING_BODY) { - activeRequest->gotBodyData(s, n); + if ((state == STATE_GETTING_BODY) || (state == STATE_GETTING_CHUNKED_BYTES)) { + activeRequest->processBodyBytes(s, n); } else { buffer += string(s, n); } @@ -113,17 +134,23 @@ public: case STATE_GETTING_BODY: responseComplete(); - state = STATE_IDLE; - setTerminator("\r\n"); + break; + + case STATE_GETTING_CHUNKED: + processChunkHeader(); + break; - if (!queuedRequests.empty()) { - Request_ptr next = queuedRequests.front(); - queuedRequests.pop_front(); - startRequest(next); - } else { - idleTime.stamp(); - } + case STATE_GETTING_CHUNKED_BYTES: + setTerminator("\r\n"); + state = STATE_GETTING_CHUNKED; + break; + case STATE_GETTING_TRAILER: + processTrailer(); + buffer.clear(); + break; + + default: break; } } @@ -136,6 +163,11 @@ public: return idleTime.elapsedMSec() > 1000 * 10; // ten seconds } + + bool hasError() const + { + return (state == STATE_SOCKET_ERROR); + } private: void processHeader() { @@ -143,12 +175,13 @@ private: if (h.empty()) { // blank line terminates headers headersComplete(); - if (bodyTransferSize > 0) { + if (chunkedTransfer) { + state = STATE_GETTING_CHUNKED; + } else if (bodyTransferSize > 0) { state = STATE_GETTING_BODY; setByteCount(bodyTransferSize); } else { responseComplete(); - state = STATE_IDLE; // no response body, we're done } return; } @@ -163,15 +196,73 @@ private: string lkey = boost::to_lower_copy(key); string value = strutils::strip(buffer.substr(colonPos + 1)); - if (lkey == "content-length" && (bodyTransferSize <= 0)) { - bodyTransferSize = strutils::to_int(value); - } else if (lkey == "transfer-length") { - bodyTransferSize = strutils::to_int(value); + // only consider these if getting headers (as opposed to trailers + // of a chunked transfer) + if (state == STATE_GETTING_HEADERS) { + if (lkey == "content-length") { + int sz = strutils::to_int(value); + if (bodyTransferSize <= 0) { + bodyTransferSize = sz; + } + activeRequest->setResponseLength(sz); + } else if (lkey == "transfer-length") { + bodyTransferSize = strutils::to_int(value); + } else if (lkey == "transfer-encoding") { + processTransferEncoding(value); + } } - + activeRequest->responseHeader(lkey, value); } + void processTransferEncoding(const string& te) + { + if (te == "chunked") { + chunkedTransfer = true; + } else { + SG_LOG(SG_IO, SG_WARN, "unsupported transfer encoding:" << te); + // failure + } + } + + void processChunkHeader() + { + if (buffer.empty()) { + // blank line after chunk data + return; + } + + int chunkSize = 0; + int semiPos = buffer.find(';'); + if (semiPos >= 0) { + // extensions ignored for the moment + chunkSize = strutils::to_int(buffer.substr(0, semiPos), 16); + } else { + chunkSize = strutils::to_int(buffer, 16); + } + + buffer.clear(); + if (chunkSize == 0) { // trailer start + state = STATE_GETTING_TRAILER; + return; + } + + state = STATE_GETTING_CHUNKED_BYTES; + setByteCount(chunkSize); + } + + void processTrailer() + { + if (buffer.empty()) { + // end of trailers + responseComplete(); + return; + } + + // process as a normal header + processHeader(); + } + void headersComplete() { activeRequest->responseHeadersComplete(); @@ -182,12 +273,27 @@ private: activeRequest->responseComplete(); client->requestFinished(this); activeRequest = NULL; + + state = STATE_IDLE; + setTerminator("\r\n"); + + if (!queuedRequests.empty()) { + Request_ptr next = queuedRequests.front(); + queuedRequests.pop_front(); + startRequest(next); + } else { + idleTime.stamp(); + } } enum ConnectionState { STATE_IDLE = 0, STATE_GETTING_HEADERS, - STATE_GETTING_BODY + STATE_GETTING_BODY, + STATE_GETTING_CHUNKED, + STATE_GETTING_CHUNKED_BYTES, + STATE_GETTING_TRAILER, + STATE_SOCKET_ERROR }; Client* client; @@ -196,6 +302,7 @@ private: std::string buffer; int bodyTransferSize; SGTimeStamp idleTime; + bool chunkedTransfer; std::list queuedRequests; }; @@ -207,9 +314,11 @@ Client::Client() void Client::update() { + NetChannel::poll(); + ConnectionDict::iterator it = _connections.begin(); for (; it != _connections.end(); ) { - if (it->second->hasIdleTimeout()) { + if (it->second->hasIdleTimeout() || it->second->hasError()) { // connection has been idle for a while, clean it up ConnectionDict::iterator del = it++; delete del->second; @@ -222,18 +331,35 @@ void Client::update() void Client::makeRequest(const Request_ptr& r) { - string host = r->hostAndPort(); + string host = r->host(); + int port = r->port(); if (!_proxy.empty()) { host = _proxy; + port = _proxyPort; } - if (_connections.find(host) == _connections.end()) { + stringstream ss; + ss << host << "-" << port; + string connectionId = ss.str(); + + if (_connections.find(connectionId) == _connections.end()) { Connection* con = new Connection(this); - con->connectToHost(r->host(), r->port()); - _connections[host] = con; + bool ok = con->connectToHost(host, port); + if (!ok) { + // since NetChannel connect is non-blocking, this failure + // path is unlikely, but still checked for. + SG_LOG(SG_IO, SG_WARN, "unable to connect to host:" + << host << " (port:" << port << ")"); + delete con; + + r->setFailure(-1, "unable to connect to host"); + return; + } + + _connections[connectionId] = con; } - _connections[host]->queueRequest(r); + _connections[connectionId]->queueRequest(r); } void Client::requestFinished(Connection* con) @@ -246,9 +372,10 @@ void Client::setUserAgent(const string& ua) _userAgent = ua; } -void Client::setProxy(const string& proxy, const string& auth) +void Client::setProxy(const string& proxy, int port, const string& auth) { _proxy = proxy; + _proxyPort = port; _proxyAuth = auth; } diff --git a/simgear/io/HTTPClient.hxx b/simgear/io/HTTPClient.hxx index 7e7a69b7..67065935 100644 --- a/simgear/io/HTTPClient.hxx +++ b/simgear/io/HTTPClient.hxx @@ -23,7 +23,7 @@ public: void makeRequest(const Request_ptr& r); void setUserAgent(const std::string& ua); - void setProxy(const std::string& proxy, const std::string& auth = ""); + void setProxy(const std::string& proxy, int port, const std::string& auth = ""); const std::string& userAgent() const { return _userAgent; } @@ -40,6 +40,7 @@ private: std::string _userAgent; std::string _proxy; + int _proxyPort; std::string _proxyAuth; // connections by host diff --git a/simgear/io/HTTPRequest.cxx b/simgear/io/HTTPRequest.cxx index 68d69d3c..84a19c99 100644 --- a/simgear/io/HTTPRequest.cxx +++ b/simgear/io/HTTPRequest.cxx @@ -17,7 +17,10 @@ extern const int DEFAULT_HTTP_PORT; Request::Request(const string& url, const string method) : _method(method), - _url(url) + _url(url), + _responseStatus(0), + _responseLength(0), + _receivedBodyBytes(0) { } @@ -68,6 +71,12 @@ void Request::responseHeadersComplete() // no op } +void Request::processBodyBytes(const char* s, int n) +{ + _receivedBodyBytes += n; + gotBodyData(s, n); +} + void Request::gotBodyData(const char* s, int n) { @@ -148,14 +157,32 @@ string Request::hostAndPort() const return u.substr(schemeEnd + 3, hostEnd - (schemeEnd + 3)); } -unsigned int Request::contentLength() const +void Request::setResponseLength(unsigned int l) +{ + _responseLength = l; +} + +unsigned int Request::responseLength() const { - HeaderDict::const_iterator it = _responseHeaders.find("content-length"); - if (it == _responseHeaders.end()) { - return 0; +// if the server didn't supply a content length, use the number +// of bytes we actually received (so far) + if ((_responseLength == 0) && (_receivedBodyBytes > 0)) { + return _receivedBodyBytes; } - return (unsigned int) strutils::to_int(it->second); + return _responseLength; +} + +void Request::setFailure(int code, const std::string& reason) +{ + _responseStatus = code; + _responseReason = reason; + failed(); +} + +void Request::failed() +{ + // no-op in base class } } // of namespace HTTP diff --git a/simgear/io/HTTPRequest.hxx b/simgear/io/HTTPRequest.hxx index 8c0fcbac..1c7dc083 100644 --- a/simgear/io/HTTPRequest.hxx +++ b/simgear/io/HTTPRequest.hxx @@ -40,24 +40,38 @@ public: virtual std::string responseReason() const { return _responseReason; } - virtual unsigned int contentLength() const; -protected: - friend class Connection; + void setResponseLength(unsigned int l); + virtual unsigned int responseLength() const; + /** + * running total of body bytes received so far. Can be used + * to generate a completion percentage, if the response length is + * known. + */ + unsigned int responseBytesReceived() const + { return _receivedBodyBytes; } +protected: Request(const std::string& url, const std::string method = "GET"); virtual void responseStart(const std::string& r); virtual void responseHeader(const std::string& key, const std::string& value); virtual void responseHeadersComplete(); virtual void responseComplete(); - + virtual void failed(); virtual void gotBodyData(const char* s, int n); private: + friend class Client; + friend class Connection; + + void processBodyBytes(const char* s, int n); + void setFailure(int code, const std::string& reason); std::string _method; std::string _url; int _responseStatus; std::string _responseReason; + unsigned int _responseLength; + unsigned int _receivedBodyBytes; typedef std::map HeaderDict; HeaderDict _responseHeaders; diff --git a/simgear/io/httpget.cxx b/simgear/io/httpget.cxx index 513bb4b2..c52117a9 100644 --- a/simgear/io/httpget.cxx +++ b/simgear/io/httpget.cxx @@ -10,6 +10,7 @@ #include #include #include +#include using namespace simgear; using std::cout; @@ -119,7 +120,16 @@ int main(int argc, char* argv[]) } // of arguments iteration if (!proxy.empty()) { - cl.setProxy(proxy, proxyAuth); + int colonPos = proxy.find(':'); + string proxyHost = proxy; + int proxyPort = 8800; + if (colonPos >= 0) { + proxyHost = proxy.substr(0, colonPos); + proxyPort = strutils::to_int(proxy.substr(colonPos + 1)); + cout << proxyHost << " " << proxyPort << endl; + } + + cl.setProxy(proxyHost, proxyPort, proxyAuth); } if (!outFile) { diff --git a/simgear/io/sg_netChat.cxx b/simgear/io/sg_netChat.cxx index f007e285..72064013 100644 --- a/simgear/io/sg_netChat.cxx +++ b/simgear/io/sg_netChat.cxx @@ -115,8 +115,8 @@ NetChat::handleBufferRead (NetBuffer& in_buffer) collectIncomingData (in_buffer.getData(),in_buffer.getLength()); in_buffer.remove (); } - - return; + + continue; } int terminator_len = strlen(terminator); diff --git a/simgear/io/test_HTTP.cxx b/simgear/io/test_HTTP.cxx index 8aafef2d..d0c77121 100644 --- a/simgear/io/test_HTTP.cxx +++ b/simgear/io/test_HTTP.cxx @@ -43,6 +43,7 @@ class TestRequest : public HTTP::Request { public: bool complete; + bool failed; string bodyData; TestRequest(const std::string& url) : @@ -52,6 +53,7 @@ public: } + std::map headers; protected: virtual void responseHeadersComplete() { @@ -62,10 +64,20 @@ protected: complete = true; } + virtual void failure() + { + failed = true; + } + virtual void gotBodyData(const char* s, int n) { bodyData += string(s, n); } + + virtual void responseHeader(const string& header, const string& value) + { + headers[header] = value; + } }; class TestServerChannel : public NetChat @@ -143,6 +155,21 @@ public: push(d.str().c_str()); } else if (path == "/test2") { sendBody2(); + } else if (path == "/testchunked") { + stringstream d; + d << "HTTP1.1 " << 200 << " " << reasonForCode(200) << "\r\n"; + d << "Transfer-Encoding:chunked\r\n"; + d << "\r\n"; + d << "8\r\n"; // first chunk + d << "ABCDEFGH\r\n"; + d << "6\r\n"; // second chunk + d << "ABCDEF\r\n"; + d << "10\r\n"; // third chunk + d << "ABCDSTUVABCDSTUV\r\n"; + d << "0\r\n"; // start of trailer + d << "X-Foobar: wibble\r\n"; // trailer data + d << "\r\n"; + push(d.str().c_str()); } else if (path == "http://www.google.com/test2") { // proxy test if (requestHeaders["host"] != "www.google.com") { @@ -227,7 +254,7 @@ public: { simgear::IPAddress addr ; int handle = accept ( &addr ) ; - + cout << "did accept from " << addr.getHost() << ":" << addr.getPort() << endl; TestServerChannel* chan = new TestServerChannel(); chan->setHandle(handle); } @@ -246,6 +273,19 @@ void waitForComplete(TestRequest* tr) cerr << "timed out" << endl; } +void waitForFailed(TestRequest* tr) +{ + SGTimeStamp start(SGTimeStamp::now()); + while (start.elapsedMSec() < 1000) { + NetChannel::poll(10); + if (tr->failed) { + return; + } + } + + cerr << "timed out waiting for failure" << endl; +} + int main(int argc, char* argv[]) { TestServer s; @@ -276,7 +316,8 @@ int main(int argc, char* argv[]) waitForComplete(tr); COMPARE(tr->responseCode(), 200); COMPARE(tr->responseReason(), string("OK")); - COMPARE(tr->contentLength(), strlen(BODY1)); + COMPARE(tr->responseLength(), strlen(BODY1)); + COMPARE(tr->responseBytesReceived(), strlen(BODY1)); COMPARE(tr->bodyData, string(BODY1)); } @@ -291,10 +332,24 @@ int main(int argc, char* argv[]) cl.makeRequest(tr); waitForComplete(tr); COMPARE(tr->responseCode(), 200); - COMPARE(tr->contentLength(), body2Size); + COMPARE(tr->responseBytesReceived(), body2Size); COMPARE(tr->bodyData, string(body2, body2Size)); } + { + TestRequest* tr = new TestRequest("http://localhost:2000/testchunked"); + HTTP::Request_ptr own(tr); + cl.makeRequest(tr); + + waitForComplete(tr); + COMPARE(tr->responseCode(), 200); + COMPARE(tr->responseReason(), string("OK")); + COMPARE(tr->responseBytesReceived(), 30); + COMPARE(tr->bodyData, "ABCDEFGHABCDEFABCDSTUVABCDSTUV"); + // check trailers made it too + COMPARE(tr->headers["x-foobar"], string("wibble")); + } + // test 404 { TestRequest* tr = new TestRequest("http://localhost:2000/not-found"); @@ -303,29 +358,39 @@ int main(int argc, char* argv[]) waitForComplete(tr); COMPARE(tr->responseCode(), 404); COMPARE(tr->responseReason(), string("not found")); - COMPARE(tr->contentLength(), 0); + COMPARE(tr->responseLength(), 0); } - + +// test connectToHost failure +/* + { + TestRequest* tr = new TestRequest("http://not.found/something"); + HTTP::Request_ptr own(tr); + cl.makeRequest(tr); + waitForFailed(tr); + COMPARE(tr->responseCode(), -1); + } + */ // test proxy { - cl.setProxy("localhost:2000"); + cl.setProxy("localhost", 2000); TestRequest* tr = new TestRequest("http://www.google.com/test2"); HTTP::Request_ptr own(tr); cl.makeRequest(tr); waitForComplete(tr); COMPARE(tr->responseCode(), 200); - COMPARE(tr->contentLength(), body2Size); + COMPARE(tr->responseLength(), body2Size); COMPARE(tr->bodyData, string(body2, body2Size)); } { - cl.setProxy("localhost:2000", "ABCDEF"); + cl.setProxy("localhost", 2000, "ABCDEF"); TestRequest* tr = new TestRequest("http://www.google.com/test3"); HTTP::Request_ptr own(tr); cl.makeRequest(tr); waitForComplete(tr); COMPARE(tr->responseCode(), 200); - COMPARE(tr->contentLength(), body2Size); + COMPARE(tr->responseBytesReceived(), body2Size); COMPARE(tr->bodyData, string(body2, body2Size)); } diff --git a/simgear/misc/strutils.cxx b/simgear/misc/strutils.cxx index d4ff8393..8bd33b7f 100644 --- a/simgear/misc/strutils.cxx +++ b/simgear/misc/strutils.cxx @@ -22,11 +22,13 @@ #include #include +#include #include "strutils.hxx" using std::string; using std::vector; +using std::stringstream; namespace simgear { namespace strutils { @@ -243,9 +245,18 @@ namespace simgear { return result; } - int to_int(const std::string& s) + int to_int(const std::string& s, int base) { - return atoi(s.c_str()); + stringstream ss(s); + switch (base) { + case 8: ss >> std::oct; break; + case 16: ss >> std::hex; break; + default: break; + } + + int result; + ss >> result; + return result; } } // end namespace strutils diff --git a/simgear/misc/strutils.hxx b/simgear/misc/strutils.hxx index eeb1bfaa..1a0859d5 100644 --- a/simgear/misc/strutils.hxx +++ b/simgear/misc/strutils.hxx @@ -123,7 +123,10 @@ namespace simgear { */ std::string simplify(const std::string& s); - int to_int(const std::string& s); + /** + * convert a string representing a decimal number, to an int + */ + int to_int(const std::string& s, int base = 10); } // end namespace strutils } // end namespace simgear -- 2.39.5