Call the AptPackages file_updated when files complete downloading.
[quix0rs-apt-p2p.git] / apt_dht / MirrorManager.py
1
2 from urlparse import urlparse
3 import os
4
5 from twisted.python import log, filepath
6 from twisted.internet import defer
7 from twisted.trial import unittest
8 from twisted.web2 import stream
9 from twisted.web2.http import splitHostPort
10
11 from AptPackages import AptPackages
12
13 aptpkg_dir='.apt-dht'
14
15 class MirrorError(Exception):
16     """Exception raised when there's a problem with the mirror."""
17
18 class ProxyFileStream(stream.SimpleStream):
19     """Saves a stream to a file while providing a new stream."""
20     
21     def __init__(self, stream, outFile):
22         """Initializes the proxy.
23         
24         @type stream: C{twisted.web2.stream.IByteStream}
25         @param stream: the input stream to read from
26         @type outFile: C{twisted.python.filepath.FilePath}
27         @param outFile: the file to write to
28         """
29         self.stream = stream
30         self.outFile = outFile.open('w')
31         self.length = self.stream.length
32         self.start = 0
33         self.doneDefer = defer.Deferred()
34
35     def _done(self):
36         """Close the output file."""
37         if not self.outFile.closed:
38             self.outFile.close()
39             self.doneDefer.callback(1)
40     
41     def read(self):
42         """Read some data from the stream."""
43         if self.outFile.closed:
44             return None
45         
46         data = self.stream.read()
47         if isinstance(data, defer.Deferred):
48             data.addCallbacks(self._write, self._done)
49             return data
50         
51         self._write(data)
52         return data
53     
54     def _write(self, data):
55         """Write the stream data to the file and return it for others to use."""
56         if data is None:
57             self._done()
58             return data
59         
60         self.outFile.write(data)
61         return data
62     
63     def close(self):
64         """Clean everything up and return None to future reads."""
65         self.length = 0
66         self._done()
67         self.stream.close()
68
69 class MirrorManager:
70     """Manages all requests for mirror objects."""
71     
72     def __init__(self, cache_dir):
73         self.cache_dir = cache_dir
74         self.cache = filepath.FilePath(self.cache_dir)
75         self.apt_caches = {}
76     
77     def extractPath(self, url):
78         parsed = urlparse(url)
79         host, port = splitHostPort(parsed[0], parsed[1])
80         site = host + ":" + str(port)
81         path = parsed[2]
82             
83         i = max(path.rfind('/dists/'), path.rfind('/pool/'))
84         if i >= 0:
85             baseDir = path[:i]
86             path = path[i:]
87         else:
88             # Uh oh, this is not good
89             log.msg("Couldn't find a good base directory for path: %s" % (site + path))
90             baseDir = ''
91             if site in self.apt_caches:
92                 longest_match = 0
93                 for base in self.apt_caches[site]:
94                     base_match = ''
95                     for dirs in path.split('/'):
96                         if base.startswith(base_match + '/' + dirs):
97                             base_match += '/' + dirs
98                         else:
99                             break
100                     if len(base_match) > longest_match:
101                         longest_match = len(base_match)
102                         baseDir = base_match
103             log.msg("Settled on baseDir: %s" % baseDir)
104         
105         return site, baseDir, path
106         
107     def init(self, site, baseDir):
108         if site not in self.apt_caches:
109             self.apt_caches[site] = {}
110             
111         if baseDir not in self.apt_caches[site]:
112             site_cache = os.path.join(self.cache_dir, aptpkg_dir, 'mirrors', site + baseDir.replace('/', '_'))
113             self.apt_caches[site][baseDir] = AptPackages(site_cache)
114     
115     def updatedFile(self, url, file_path):
116         site, baseDir, path = self.extractPath(url)
117         self.init(site, baseDir)
118         self.apt_caches[site][baseDir].file_updated(path, file_path)
119     
120     def findHash(self, url):
121         site, baseDir, path = self.extractPath(url)
122         if site in self.apt_caches and baseDir in self.apt_caches[site]:
123             return self.apt_caches[site][baseDir].findHash(path)
124         d = defer.Deferred()
125         d.errback(MirrorError("Site Not Found"))
126         return d
127
128     def save_file(self, response, hash, size, url):
129         """Save a downloaded file to the cache and stream it."""
130         log.msg('Returning file: %s' % url)
131         
132         parsed = urlparse(url)
133         destFile = self.cache.preauthChild(parsed[1] + parsed[2])
134         log.msg('Saving returned %r byte file to cache: %s' % (response.stream.length, destFile.path))
135         
136         if destFile.exists():
137             log.msg('File already exists, removing: %s' % destFile.path)
138             destFile.remove()
139         else:
140             destFile.parent().makedirs()
141         
142         orig_stream = response.stream
143         response.stream = ProxyFileStream(orig_stream, destFile)
144         response.stream.doneDefer.addCallback(self.save_complete, url, destFile,
145                                               response.headers.getHeader('Last-Modified'))
146         response.stream.doneDefer.addErrback(self.save_error, url)
147         return response
148
149     def save_complete(self, result, url, destFile, modtime = None):
150         """Update the modification time and AptPackages."""
151         if modtime:
152             os.utime(destFile.path, (modtime, modtime))
153             
154         site, baseDir, path = self.extractPath(url)
155         self.init(site, baseDir)
156         self.apt_caches[site][baseDir].file_updated(path, destFile.path)
157
158     def save_error(self, failure, url):
159         """An error has occurred in downloadign or saving the file."""
160         log.msg('Error occurred downloading %s' % url)
161         log.err(failure)
162         return failure
163
164 class TestMirrorManager(unittest.TestCase):
165     """Unit tests for the mirror manager."""
166     
167     timeout = 20
168     pending_calls = []
169     client = None
170     
171     def setUp(self):
172         self.client = MirrorManager('/tmp')
173         
174     def test_extractPath(self):
175         site, baseDir, path = self.client.extractPath('http://ftp.us.debian.org/debian/dists/unstable/Release')
176         self.failUnless(site == "ftp.us.debian.org:80", "no match: %s" % site)
177         self.failUnless(baseDir == "/debian", "no match: %s" % baseDir)
178         self.failUnless(path == "/dists/unstable/Release", "no match: %s" % path)
179
180         site, baseDir, path = self.client.extractPath('http://ftp.us.debian.org:16999/debian/pool/d/dpkg/dpkg_1.2.1-1.tar.gz')
181         self.failUnless(site == "ftp.us.debian.org:16999", "no match: %s" % site)
182         self.failUnless(baseDir == "/debian", "no match: %s" % baseDir)
183         self.failUnless(path == "/pool/d/dpkg/dpkg_1.2.1-1.tar.gz", "no match: %s" % path)
184
185         site, baseDir, path = self.client.extractPath('http://debian.camrdale.org/dists/unstable/Release')
186         self.failUnless(site == "debian.camrdale.org:80", "no match: %s" % site)
187         self.failUnless(baseDir == "", "no match: %s" % baseDir)
188         self.failUnless(path == "/dists/unstable/Release", "no match: %s" % path)
189
190     def verifyHash(self, found_hash, path, true_hash):
191         self.failUnless(found_hash[0] == true_hash, 
192                     "%s hashes don't match: %s != %s" % (path, found_hash[0], true_hash))
193
194     def test_findHash(self):
195         self.packagesFile = os.popen('ls -Sr /var/lib/apt/lists/ | grep -E "_main_.*Packages$" | tail -n 1').read().rstrip('\n')
196         self.sourcesFile = os.popen('ls -Sr /var/lib/apt/lists/ | grep -E "_main_.*Sources$" | tail -n 1').read().rstrip('\n')
197         for f in os.walk('/var/lib/apt/lists').next()[2]:
198             if f[-7:] == "Release" and self.packagesFile.startswith(f[:-7]):
199                 self.releaseFile = f
200                 break
201         
202         self.client.updatedFile('http://' + self.releaseFile.replace('_','/'), 
203                                 '/var/lib/apt/lists/' + self.releaseFile)
204         self.client.updatedFile('http://' + self.releaseFile[:self.releaseFile.find('_dists_')+1].replace('_','/') +
205                                 self.packagesFile[self.packagesFile.find('_dists_')+1:].replace('_','/'), 
206                                 '/var/lib/apt/lists/' + self.packagesFile)
207         self.client.updatedFile('http://' + self.releaseFile[:self.releaseFile.find('_dists_')+1].replace('_','/') +
208                                 self.sourcesFile[self.sourcesFile.find('_dists_')+1:].replace('_','/'), 
209                                 '/var/lib/apt/lists/' + self.sourcesFile)
210
211         lastDefer = defer.Deferred()
212         
213         idx_hash = os.popen('grep -A 3000 -E "^SHA1:" ' + 
214                             '/var/lib/apt/lists/' + self.releaseFile + 
215                             ' | grep -E " main/binary-i386/Packages.bz2$"'
216                             ' | head -n 1 | cut -d\  -f 2').read().rstrip('\n')
217         idx_path = 'http://' + self.releaseFile.replace('_','/')[:-7] + 'main/binary-i386/Packages.bz2'
218
219         d = self.client.findHash(idx_path)
220         d.addCallback(self.verifyHash, idx_path, idx_hash)
221
222         pkg_hash = os.popen('grep -A 30 -E "^Package: dpkg$" ' + 
223                             '/var/lib/apt/lists/' + self.packagesFile + 
224                             ' | grep -E "^SHA1:" | head -n 1' + 
225                             ' | cut -d\  -f 2').read().rstrip('\n')
226         pkg_path = 'http://' + self.releaseFile[:self.releaseFile.find('_dists_')+1].replace('_','/') + \
227                    os.popen('grep -A 30 -E "^Package: dpkg$" ' + 
228                             '/var/lib/apt/lists/' + self.packagesFile + 
229                             ' | grep -E "^Filename:" | head -n 1' + 
230                             ' | cut -d\  -f 2').read().rstrip('\n')
231
232         d = self.client.findHash(pkg_path)
233         d.addCallback(self.verifyHash, pkg_path, pkg_hash)
234
235         src_dir = os.popen('grep -A 30 -E "^Package: dpkg$" ' + 
236                             '/var/lib/apt/lists/' + self.sourcesFile + 
237                             ' | grep -E "^Directory:" | head -n 1' + 
238                             ' | cut -d\  -f 2').read().rstrip('\n')
239         src_hashes = os.popen('grep -A 20 -E "^Package: dpkg$" ' + 
240                             '/var/lib/apt/lists/' + self.sourcesFile + 
241                             ' | grep -A 4 -E "^Files:" | grep -E "^ " ' + 
242                             ' | cut -d\  -f 2').read().split('\n')[:-1]
243         src_paths = os.popen('grep -A 20 -E "^Package: dpkg$" ' + 
244                             '/var/lib/apt/lists/' + self.sourcesFile + 
245                             ' | grep -A 4 -E "^Files:" | grep -E "^ " ' + 
246                             ' | cut -d\  -f 4').read().split('\n')[:-1]
247
248         for i in range(len(src_hashes)):
249             src_path = 'http://' + self.releaseFile[:self.releaseFile.find('_dists_')+1].replace('_','/') + src_dir + '/' + src_paths[i]
250             d = self.client.findHash(src_path)
251             d.addCallback(self.verifyHash, src_path, src_hashes[i])
252             
253         idx_hash = os.popen('grep -A 3000 -E "^SHA1:" ' + 
254                             '/var/lib/apt/lists/' + self.releaseFile + 
255                             ' | grep -E " main/source/Sources.bz2$"'
256                             ' | head -n 1 | cut -d\  -f 2').read().rstrip('\n')
257         idx_path = 'http://' + self.releaseFile.replace('_','/')[:-7] + 'main/source/Sources.bz2'
258
259         d = self.client.findHash(idx_path)
260         d.addCallback(self.verifyHash, idx_path, idx_hash)
261
262         d.addBoth(lastDefer.callback)
263         return lastDefer
264
265     def tearDown(self):
266         for p in self.pending_calls:
267             if p.active():
268                 p.cancel()
269         self.client = None
270