2 from urlparse import urlparse
5 from twisted.python import log, filepath
6 from twisted.internet import defer
7 from twisted.trial import unittest
8 from twisted.web2 import stream
9 from twisted.web2.http import splitHostPort
11 from AptPackages import AptPackages
15 class MirrorError(Exception):
16 """Exception raised when there's a problem with the mirror."""
18 class ProxyFileStream(stream.SimpleStream):
19 """Saves a stream to a file while providing a new stream."""
21 def __init__(self, stream, outFile):
22 """Initializes the proxy.
24 @type stream: C{twisted.web2.stream.IByteStream}
25 @param stream: the input stream to read from
26 @type outFile: C{twisted.python.filepath.FilePath}
27 @param outFile: the file to write to
30 self.outFile = outFile.open('w')
31 self.length = self.stream.length
33 self.doneDefer = defer.Deferred()
36 """Close the output file."""
37 if not self.outFile.closed:
39 self.doneDefer.callback(1)
42 """Read some data from the stream."""
43 if self.outFile.closed:
46 data = self.stream.read()
47 if isinstance(data, defer.Deferred):
48 data.addCallbacks(self._write, self._done)
54 def _write(self, data):
55 """Write the stream data to the file and return it for others to use."""
60 self.outFile.write(data)
64 """Clean everything up and return None to future reads."""
70 """Manages all requests for mirror objects."""
72 def __init__(self, cache_dir):
73 self.cache_dir = cache_dir
74 self.cache = filepath.FilePath(self.cache_dir)
77 def extractPath(self, url):
78 parsed = urlparse(url)
79 host, port = splitHostPort(parsed[0], parsed[1])
80 site = host + ":" + str(port)
83 i = max(path.rfind('/dists/'), path.rfind('/pool/'))
88 # Uh oh, this is not good
89 log.msg("Couldn't find a good base directory for path: %s" % (site + path))
91 if site in self.apt_caches:
93 for base in self.apt_caches[site]:
95 for dirs in path.split('/'):
96 if base.startswith(base_match + '/' + dirs):
97 base_match += '/' + dirs
100 if len(base_match) > longest_match:
101 longest_match = len(base_match)
103 log.msg("Settled on baseDir: %s" % baseDir)
105 return site, baseDir, path
107 def init(self, site, baseDir):
108 if site not in self.apt_caches:
109 self.apt_caches[site] = {}
111 if baseDir not in self.apt_caches[site]:
112 site_cache = os.path.join(self.cache_dir, aptpkg_dir, 'mirrors', site + baseDir.replace('/', '_'))
113 self.apt_caches[site][baseDir] = AptPackages(site_cache)
115 def updatedFile(self, url, file_path):
116 site, baseDir, path = self.extractPath(url)
117 self.init(site, baseDir)
118 self.apt_caches[site][baseDir].file_updated(path, file_path)
120 def findHash(self, url):
121 site, baseDir, path = self.extractPath(url)
122 if site in self.apt_caches and baseDir in self.apt_caches[site]:
123 return self.apt_caches[site][baseDir].findHash(path)
125 d.errback(MirrorError("Site Not Found"))
128 def save_file(self, response, hash, size, url):
129 """Save a downloaded file to the cache and stream it."""
130 log.msg('Returning file: %s' % url)
132 parsed = urlparse(url)
133 destFile = self.cache.preauthChild(parsed[1] + parsed[2])
134 log.msg('Saving returned %r byte file to cache: %s' % (response.stream.length, destFile.path))
136 if destFile.exists():
137 log.msg('File already exists, removing: %s' % destFile.path)
140 destFile.parent().makedirs()
142 orig_stream = response.stream
143 response.stream = ProxyFileStream(orig_stream, destFile)
144 response.stream.doneDefer.addCallback(self.save_complete, url, destFile,
145 response.headers.getHeader('Last-Modified'))
146 response.stream.doneDefer.addErrback(self.save_error, url)
149 def save_complete(self, result, url, destFile, modtime = None):
150 """Update the modification time and AptPackages."""
152 os.utime(destFile.path, (modtime, modtime))
154 site, baseDir, path = self.extractPath(url)
155 self.init(site, baseDir)
156 self.apt_caches[site][baseDir].file_updated(path, destFile.path)
158 def save_error(self, failure, url):
159 """An error has occurred in downloadign or saving the file."""
160 log.msg('Error occurred downloading %s' % url)
164 class TestMirrorManager(unittest.TestCase):
165 """Unit tests for the mirror manager."""
172 self.client = MirrorManager('/tmp')
174 def test_extractPath(self):
175 site, baseDir, path = self.client.extractPath('http://ftp.us.debian.org/debian/dists/unstable/Release')
176 self.failUnless(site == "ftp.us.debian.org:80", "no match: %s" % site)
177 self.failUnless(baseDir == "/debian", "no match: %s" % baseDir)
178 self.failUnless(path == "/dists/unstable/Release", "no match: %s" % path)
180 site, baseDir, path = self.client.extractPath('http://ftp.us.debian.org:16999/debian/pool/d/dpkg/dpkg_1.2.1-1.tar.gz')
181 self.failUnless(site == "ftp.us.debian.org:16999", "no match: %s" % site)
182 self.failUnless(baseDir == "/debian", "no match: %s" % baseDir)
183 self.failUnless(path == "/pool/d/dpkg/dpkg_1.2.1-1.tar.gz", "no match: %s" % path)
185 site, baseDir, path = self.client.extractPath('http://debian.camrdale.org/dists/unstable/Release')
186 self.failUnless(site == "debian.camrdale.org:80", "no match: %s" % site)
187 self.failUnless(baseDir == "", "no match: %s" % baseDir)
188 self.failUnless(path == "/dists/unstable/Release", "no match: %s" % path)
190 def verifyHash(self, found_hash, path, true_hash):
191 self.failUnless(found_hash[0] == true_hash,
192 "%s hashes don't match: %s != %s" % (path, found_hash[0], true_hash))
194 def test_findHash(self):
195 self.packagesFile = os.popen('ls -Sr /var/lib/apt/lists/ | grep -E "_main_.*Packages$" | tail -n 1').read().rstrip('\n')
196 self.sourcesFile = os.popen('ls -Sr /var/lib/apt/lists/ | grep -E "_main_.*Sources$" | tail -n 1').read().rstrip('\n')
197 for f in os.walk('/var/lib/apt/lists').next()[2]:
198 if f[-7:] == "Release" and self.packagesFile.startswith(f[:-7]):
202 self.client.updatedFile('http://' + self.releaseFile.replace('_','/'),
203 '/var/lib/apt/lists/' + self.releaseFile)
204 self.client.updatedFile('http://' + self.releaseFile[:self.releaseFile.find('_dists_')+1].replace('_','/') +
205 self.packagesFile[self.packagesFile.find('_dists_')+1:].replace('_','/'),
206 '/var/lib/apt/lists/' + self.packagesFile)
207 self.client.updatedFile('http://' + self.releaseFile[:self.releaseFile.find('_dists_')+1].replace('_','/') +
208 self.sourcesFile[self.sourcesFile.find('_dists_')+1:].replace('_','/'),
209 '/var/lib/apt/lists/' + self.sourcesFile)
211 lastDefer = defer.Deferred()
213 idx_hash = os.popen('grep -A 3000 -E "^SHA1:" ' +
214 '/var/lib/apt/lists/' + self.releaseFile +
215 ' | grep -E " main/binary-i386/Packages.bz2$"'
216 ' | head -n 1 | cut -d\ -f 2').read().rstrip('\n')
217 idx_path = 'http://' + self.releaseFile.replace('_','/')[:-7] + 'main/binary-i386/Packages.bz2'
219 d = self.client.findHash(idx_path)
220 d.addCallback(self.verifyHash, idx_path, idx_hash)
222 pkg_hash = os.popen('grep -A 30 -E "^Package: dpkg$" ' +
223 '/var/lib/apt/lists/' + self.packagesFile +
224 ' | grep -E "^SHA1:" | head -n 1' +
225 ' | cut -d\ -f 2').read().rstrip('\n')
226 pkg_path = 'http://' + self.releaseFile[:self.releaseFile.find('_dists_')+1].replace('_','/') + \
227 os.popen('grep -A 30 -E "^Package: dpkg$" ' +
228 '/var/lib/apt/lists/' + self.packagesFile +
229 ' | grep -E "^Filename:" | head -n 1' +
230 ' | cut -d\ -f 2').read().rstrip('\n')
232 d = self.client.findHash(pkg_path)
233 d.addCallback(self.verifyHash, pkg_path, pkg_hash)
235 src_dir = os.popen('grep -A 30 -E "^Package: dpkg$" ' +
236 '/var/lib/apt/lists/' + self.sourcesFile +
237 ' | grep -E "^Directory:" | head -n 1' +
238 ' | cut -d\ -f 2').read().rstrip('\n')
239 src_hashes = os.popen('grep -A 20 -E "^Package: dpkg$" ' +
240 '/var/lib/apt/lists/' + self.sourcesFile +
241 ' | grep -A 4 -E "^Files:" | grep -E "^ " ' +
242 ' | cut -d\ -f 2').read().split('\n')[:-1]
243 src_paths = os.popen('grep -A 20 -E "^Package: dpkg$" ' +
244 '/var/lib/apt/lists/' + self.sourcesFile +
245 ' | grep -A 4 -E "^Files:" | grep -E "^ " ' +
246 ' | cut -d\ -f 4').read().split('\n')[:-1]
248 for i in range(len(src_hashes)):
249 src_path = 'http://' + self.releaseFile[:self.releaseFile.find('_dists_')+1].replace('_','/') + src_dir + '/' + src_paths[i]
250 d = self.client.findHash(src_path)
251 d.addCallback(self.verifyHash, src_path, src_hashes[i])
253 idx_hash = os.popen('grep -A 3000 -E "^SHA1:" ' +
254 '/var/lib/apt/lists/' + self.releaseFile +
255 ' | grep -E " main/source/Sources.bz2$"'
256 ' | head -n 1 | cut -d\ -f 2').read().rstrip('\n')
257 idx_path = 'http://' + self.releaseFile.replace('_','/')[:-7] + 'main/source/Sources.bz2'
259 d = self.client.findHash(idx_path)
260 d.addCallback(self.verifyHash, idx_path, idx_hash)
262 d.addBoth(lastDefer.callback)
266 for p in self.pending_calls: