]> git.mxchange.org Git - quix0rs-apt-p2p.git/blob - HTTPDownloader.py
Get it working and add trial unit testing.
[quix0rs-apt-p2p.git] / HTTPDownloader.py
1
2 from twisted.internet import reactor, defer, protocol
3 from twisted.internet.protocol import ClientFactory
4 from twisted.web2.client.interfaces import IHTTPClientManager
5 from twisted.web2.client.http import ProtocolError, ClientRequest, HTTPClientProtocol
6 from twisted.trial import unittest
7 from zope.interface import implements
8
9 class HTTPClientManager(ClientFactory):
10     """A manager for all HTTP requests to a single site.
11     
12     
13     """
14
15     implements(IHTTPClientManager)
16     
17     def __init__(self, host, port):
18         self.host = host
19         self.port = port
20         self.busy = False
21         self.pipeline = False
22         self.closed = True
23         self.connecting = False
24         self.request_queue = []
25         self.response_queue = []
26         self.proto = None
27         self.connector = None
28         
29     def connect(self):
30         assert(self.closed and not self.connecting)
31         self.connecting = True
32         d = protocol.ClientCreator(reactor, HTTPClientProtocol, self).connectTCP(self.host, self.port)
33         d.addCallback(self.connected)
34
35     def connected(self, proto):
36         self.closed = False
37         self.connecting = False
38         self.proto = proto
39         self.processQueue()
40         
41     def close(self):
42         if not self.closed:
43             self.proto.transport.loseConnection()
44
45     def submitRequest(self, request):
46         request.deferRequest = defer.Deferred()
47         self.request_queue.append(request)
48         self.processQueue()
49         return request.deferRequest
50
51     def processQueue(self):
52         if not self.request_queue:
53             return
54         if self.connecting:
55             return
56         if self.closed:
57             self.connect()
58             return
59         if self.busy and not self.pipeline:
60             return
61         if self.response_queue and not self.pipeline:
62             return
63
64         req = self.request_queue.pop(0)
65         self.response_queue.append(req)
66         req.deferResponse = self.proto.submitRequest(req, False)
67         req.deferResponse.addCallback(self.requestComplete)
68         req.deferResponse.addErrback(self.requestError)
69
70     def requestComplete(self, resp):
71         req = self.response_queue.pop(0)
72         req.deferRequest.callback(resp)
73
74     def requestError(self, error):
75         req = self.response_queue.pop(0)
76         req.deferRequest.errback(error)
77
78     def clientBusy(self, proto):
79         self.busy = True
80
81     def clientIdle(self, proto):
82         self.busy = False
83         self.processQueue()
84
85     def clientPipelining(self, proto):
86         self.pipeline = True
87         self.processQueue()
88
89     def clientGone(self, proto):
90         for req in self.response_queue:
91             req.deferRequest.errback(ProtocolError('lost connection'))
92         self.busy = False
93         self.pipeline = False
94         self.closed = True
95         self.connecting = False
96         self.response_queue = []
97         self.proto = None
98         if self.request_queue:
99             self.processQueue()
100
101 class TestDownloader(unittest.TestCase):
102     
103     client = None
104     pending_calls = []
105     
106     def gotResp(self, resp, num, expect):
107         self.failUnless(resp.code >= 200 and resp.code < 300, "Got a non-200 response: %r" % resp.code)
108         self.failUnless(resp.stream.length == expect, "Length was incorrect, got %r, expected %r" % (resp.stream.length, expect))
109         resp.stream.close()
110     
111     def test_download(self):
112         host = 'www.camrdale.org'
113         self.client = HTTPClientManager(host, 80)
114         self.timeout = 10
115         lastDefer = defer.Deferred()
116         
117         d = self.client.submitRequest(ClientRequest("GET", '/robots.txt', {'Host':host}, None))
118         d.addCallback(self.gotResp, 1, 309)
119         d.addBoth(lastDefer.callback)
120         return lastDefer
121         
122     def test_head(self):
123         host = 'www.camrdale.org'
124         self.client = HTTPClientManager(host, 80)
125         self.timeout = 10
126         lastDefer = defer.Deferred()
127         
128         d = self.client.submitRequest(ClientRequest("HEAD", '/robots.txt', {'Host':host}, None))
129         d.addCallback(self.gotResp, 1, 0)
130         d.addBoth(lastDefer.callback)
131         return lastDefer
132         
133     def test_multiple_downloads(self):
134         host = 'www.camrdale.org'
135         self.client = HTTPClientManager(host, 80)
136         self.timeout = 120
137         lastDefer = defer.Deferred()
138         
139         def newRequest(path, num, expect, last=False):
140             d = self.client.submitRequest(ClientRequest("GET", path, {'Host':host}, None))
141             d.addCallback(self.gotResp, num, expect)
142             if last:
143                 d.addCallback(lastDefer.callback)
144                 
145         newRequest("/", 1, 3433)
146         newRequest("/blog/", 2, 37121)
147         newRequest("/camrdale.html", 3, 2234)
148         self.pending_calls.append(reactor.callLater(1, newRequest, '/robots.txt', 4, 309))
149         self.pending_calls.append(reactor.callLater(10, newRequest, '/wikilink.html', 5, 3084))
150         self.pending_calls.append(reactor.callLater(30, newRequest, '/sitemap.html', 6, 4750))
151         self.pending_calls.append(reactor.callLater(31, newRequest, '/PlanetLab.html', 7, 2783))
152         self.pending_calls.append(reactor.callLater(32, newRequest, '/openid.html', 8, 2525))
153         self.pending_calls.append(reactor.callLater(32, newRequest, '/subpage.html', 9, 2381))
154         self.pending_calls.append(reactor.callLater(62, newRequest, '/sitemap2.rss', 0, 302362, True))
155         return lastDefer
156         
157     def test_range(self):
158         host = 'www.camrdale.org'
159         self.client = HTTPClientManager(host, 80)
160         self.timeout = 10
161         lastDefer = defer.Deferred()
162         
163         d = self.client.submitRequest(ClientRequest("GET", '/robots.txt', {'Host':host, 'Range': ('bytes', [(100, 199)])}, None))
164         d.addCallback(self.gotResp, 1, 100)
165         d.addBoth(lastDefer.callback)
166         return lastDefer
167         
168     def tearDown(self):
169         for p in self.pending_calls:
170             if p.active():
171                 p.cancel()
172         self.pending_calls = []
173         if self.client:
174             self.client.close()
175             self.client = None