2 from twisted.internet import reactor, defer, protocol
3 from twisted.internet.protocol import ClientFactory
4 from twisted.web2.client.interfaces import IHTTPClientManager
5 from twisted.web2.client.http import ProtocolError, ClientRequest, HTTPClientProtocol
6 from twisted.trial import unittest
7 from zope.interface import implements
9 class HTTPClientManager(ClientFactory):
10 """A manager for all HTTP requests to a single site.
12 Controls all requests that got to a single site (host and port).
13 This includes buffering requests until they can be sent and reconnecting
14 in the even of the connection being closed.
18 implements(IHTTPClientManager)
20 def __init__(self, host, port):
26 self.connecting = False
27 self.request_queue = []
28 self.response_queue = []
33 assert(self.closed and not self.connecting)
34 self.connecting = True
35 d = protocol.ClientCreator(reactor, HTTPClientProtocol, self).connectTCP(self.host, self.port)
36 d.addCallback(self.connected)
38 def connected(self, proto):
40 self.connecting = False
46 self.proto.transport.loseConnection()
49 return not self.busy and not self.request_queue and not self.response_queue
51 def submitRequest(self, request):
52 request.deferRequest = defer.Deferred()
53 self.request_queue.append(request)
55 return request.deferRequest
57 def processQueue(self):
58 if not self.request_queue:
65 if self.busy and not self.pipeline:
67 if self.response_queue and not self.pipeline:
70 req = self.request_queue.pop(0)
71 self.response_queue.append(req)
72 req.deferResponse = self.proto.submitRequest(req, False)
73 req.deferResponse.addCallback(self.requestComplete)
74 req.deferResponse.addErrback(self.requestError)
76 def requestComplete(self, resp):
77 req = self.response_queue.pop(0)
78 req.deferRequest.callback(resp)
80 def requestError(self, error):
81 req = self.response_queue.pop(0)
82 req.deferRequest.errback(error)
84 def clientBusy(self, proto):
87 def clientIdle(self, proto):
91 def clientPipelining(self, proto):
95 def clientGone(self, proto):
96 for req in self.response_queue:
97 req.deferRequest.errback(ProtocolError('lost connection'))
101 self.connecting = False
102 self.response_queue = []
104 if self.request_queue:
107 class HTTPDownloader:
108 """Manages all the HTTP connections to various sites."""
113 def get(self, host, port, request):
114 site = host + ":" + str(port)
115 if site not in self.clients:
116 self.clients[site] = HTTPClientManager(host, port)
117 return self.clients[site].submitRequest(request)
120 for site in self.clients:
121 self.clients[site].close()
124 class TestClientManager(unittest.TestCase):
125 """Unit tests for the HTTPClientManager."""
130 def gotResp(self, resp, num, expect):
131 self.failUnless(resp.code >= 200 and resp.code < 300, "Got a non-200 response: %r" % resp.code)
132 self.failUnless(resp.stream.length == expect, "Length was incorrect, got %r, expected %r" % (resp.stream.length, expect))
135 def test_download(self):
136 host = 'www.camrdale.org'
137 self.client = HTTPClientManager(host, 80)
139 lastDefer = defer.Deferred()
141 d = self.client.submitRequest(ClientRequest("GET", '/robots.txt', {'Host':host}, None))
142 d.addCallback(self.gotResp, 1, 309)
143 d.addBoth(lastDefer.callback)
147 host = 'www.camrdale.org'
148 self.client = HTTPClientManager(host, 80)
150 lastDefer = defer.Deferred()
152 d = self.client.submitRequest(ClientRequest("HEAD", '/robots.txt', {'Host':host}, None))
153 d.addCallback(self.gotResp, 1, 0)
154 d.addBoth(lastDefer.callback)
157 def test_multiple_downloads(self):
158 host = 'www.camrdale.org'
159 self.client = HTTPClientManager(host, 80)
161 lastDefer = defer.Deferred()
163 def newRequest(path, num, expect, last=False):
164 d = self.client.submitRequest(ClientRequest("GET", path, {'Host':host}, None))
165 d.addCallback(self.gotResp, num, expect)
167 d.addCallback(lastDefer.callback)
169 newRequest("/", 1, 3433)
170 newRequest("/blog/", 2, 37121)
171 newRequest("/camrdale.html", 3, 2234)
172 self.pending_calls.append(reactor.callLater(1, newRequest, '/robots.txt', 4, 309))
173 self.pending_calls.append(reactor.callLater(10, newRequest, '/wikilink.html', 5, 3084))
174 self.pending_calls.append(reactor.callLater(30, newRequest, '/sitemap.html', 6, 4750))
175 self.pending_calls.append(reactor.callLater(31, newRequest, '/PlanetLab.html', 7, 2783))
176 self.pending_calls.append(reactor.callLater(32, newRequest, '/openid.html', 8, 2525))
177 self.pending_calls.append(reactor.callLater(32, newRequest, '/subpage.html', 9, 2381))
178 self.pending_calls.append(reactor.callLater(62, newRequest, '/sitemap2.rss', 0, 302362, True))
181 def test_range(self):
182 host = 'www.camrdale.org'
183 self.client = HTTPClientManager(host, 80)
185 lastDefer = defer.Deferred()
187 d = self.client.submitRequest(ClientRequest("GET", '/robots.txt', {'Host':host, 'Range': ('bytes', [(100, 199)])}, None))
188 d.addCallback(self.gotResp, 1, 100)
189 d.addBoth(lastDefer.callback)
193 for p in self.pending_calls:
196 self.pending_calls = []
201 class TestDownloader(unittest.TestCase):
202 """Unit tests for the HTTPDownloader."""
207 def gotResp(self, resp, num, expect):
208 self.failUnless(resp.code >= 200 and resp.code < 300, "Got a non-200 response: %r" % resp.code)
209 if expect is not None:
210 self.failUnless(resp.stream.length == expect, "Length was incorrect, got %r, expected %r" % (resp.stream.length, expect))
213 def test_download(self):
214 self.manager = HTTPDownloader()
216 lastDefer = defer.Deferred()
218 host = 'www.camrdale.org'
219 d = self.manager.get(host, 80, ClientRequest("GET", '/robots.txt', {'Host':host}, None))
220 d.addCallback(self.gotResp, 1, 309)
221 d.addBoth(lastDefer.callback)
225 self.manager = HTTPDownloader()
227 lastDefer = defer.Deferred()
229 host = 'www.camrdale.org'
230 d = self.manager.get(host, 80, ClientRequest("HEAD", '/robots.txt', {'Host':host}, None))
231 d.addCallback(self.gotResp, 1, 0)
232 d.addBoth(lastDefer.callback)
235 def test_multiple_downloads(self):
236 self.manager = HTTPDownloader()
238 lastDefer = defer.Deferred()
240 def newRequest(host, path, num, expect, last=False):
241 d = self.manager.get(host, 80, ClientRequest("GET", path, {'Host':host}, None))
242 d.addCallback(self.gotResp, num, expect)
244 d.addCallback(lastDefer.callback)
246 newRequest('www.camrdale.org', "/", 1, 3433)
247 newRequest('www.camrdale.org', "/blog/", 2, 37121)
248 newRequest('www.google.ca', "/", 3, None)
249 self.pending_calls.append(reactor.callLater(1, newRequest, 'www.sfu.ca', '/', 4, None))
250 self.pending_calls.append(reactor.callLater(10, newRequest, 'www.camrdale.org', '/wikilink.html', 5, 3084))
251 self.pending_calls.append(reactor.callLater(30, newRequest, 'www.camrdale.org', '/sitemap.html', 6, 4750))
252 self.pending_calls.append(reactor.callLater(31, newRequest, 'www.sfu.ca', '/studentcentral/index.html', 7, None))
253 self.pending_calls.append(reactor.callLater(32, newRequest, 'www.camrdale.org', '/openid.html', 8, 2525))
254 self.pending_calls.append(reactor.callLater(32, newRequest, 'www.camrdale.org', '/subpage.html', 9, 2381))
255 self.pending_calls.append(reactor.callLater(62, newRequest, 'www.google.ca', '/intl/en/options/', 0, None, True))
258 def test_range(self):
259 self.manager = HTTPDownloader()
261 lastDefer = defer.Deferred()
263 host = 'www.camrdale.org'
264 d = self.manager.get(host, 80, ClientRequest("GET", '/robots.txt', {'Host':host, 'Range': ('bytes', [(100, 199)])}, None))
265 d.addCallback(self.gotResp, 1, 100)
266 d.addBoth(lastDefer.callback)
270 for p in self.pending_calls:
273 self.pending_calls = []
275 self.manager.closeAll()