2 from urlparse import urlparse
5 from twisted.python import log, filepath
6 from twisted.internet import defer
7 from twisted.trial import unittest
8 from twisted.web2 import stream
9 from twisted.web2.http import splitHostPort
11 from AptPackages import AptPackages
15 class MirrorError(Exception):
16 """Exception raised when there's a problem with the mirror."""
18 class ProxyFileStream(stream.SimpleStream):
19 """Saves a stream to a file while providing a new stream."""
21 def __init__(self, stream, outFile, modtime = None):
22 """Initializes the proxy.
24 @type stream: C{twisted.web2.stream.IByteStream}
25 @param stream: the input stream to read from
26 @type outFile: C{twisted.python.filepath.FilePath}
27 @param outFile: the file to write to
29 @param modtime: the modification time to set for the file
32 self.outFile = outFile
33 self.openFile = outFile.open('w')
34 self.modtime = modtime
35 self.length = self.stream.length
39 """Close the output file."""
40 if not self.openFile.closed:
43 os.utime(self.outFile.path, (self.modtime, self.modtime))
46 """Read some data from the stream."""
47 if self.openFile.closed:
50 data = self.stream.read()
51 if isinstance(data, defer.Deferred):
52 data.addCallbacks(self._write, self._done)
58 def _write(self, data):
59 """Write the stream data to the file and return it for others to use."""
64 self.openFile.write(data)
68 """Clean everything up and return None to future reads."""
74 """Manages all requests for mirror objects."""
76 def __init__(self, cache_dir):
77 self.cache_dir = cache_dir
78 self.cache = filepath.FilePath(self.cache_dir)
81 def extractPath(self, url):
82 parsed = urlparse(url)
83 host, port = splitHostPort(parsed[0], parsed[1])
84 site = host + ":" + str(port)
87 i = max(path.rfind('/dists/'), path.rfind('/pool/'))
92 # Uh oh, this is not good
93 log.msg("Couldn't find a good base directory for path: %s" % (site + path))
95 if site in self.apt_caches:
97 for base in self.apt_caches[site]:
99 for dirs in path.split('/'):
100 if base.startswith(base_match + '/' + dirs):
101 base_match += '/' + dirs
104 if len(base_match) > longest_match:
105 longest_match = len(base_match)
107 log.msg("Settled on baseDir: %s" % baseDir)
109 return site, baseDir, path
111 def init(self, site, baseDir):
112 if site not in self.apt_caches:
113 self.apt_caches[site] = {}
115 if baseDir not in self.apt_caches[site]:
116 site_cache = os.path.join(self.cache_dir, aptpkg_dir, 'mirrors', site + baseDir.replace('/', '_'))
117 self.apt_caches[site][baseDir] = AptPackages(site_cache)
119 def updatedFile(self, url, file_path):
120 site, baseDir, path = self.extractPath(url)
121 self.init(site, baseDir)
122 self.apt_caches[site][baseDir].file_updated(path, file_path)
124 def findHash(self, url):
125 site, baseDir, path = self.extractPath(url)
126 if site in self.apt_caches and baseDir in self.apt_caches[site]:
127 return self.apt_caches[site][baseDir].findHash(path)
129 d.errback(MirrorError("Site Not Found"))
132 def save_file(self, response, hash, size, url):
133 """Save a downloaded file to the cache and stream it."""
134 log.msg('Returning file: %s' % url)
136 parsed = urlparse(url)
137 destFile = self.cache.preauthChild(parsed[1] + parsed[2])
138 log.msg('Saving returned %r byte file to cache: %s' % (response.stream.length, destFile.path))
140 if destFile.exists():
141 log.msg('File already exists, removing: %s' % destFile.path)
144 destFile.parent().makedirs()
146 orig_stream = response.stream
147 response.stream = ProxyFileStream(orig_stream, destFile, response.headers.getHeader('Last-Modified'))
150 def save_error(self, failure, url):
151 """An error has occurred in downloadign or saving the file."""
152 log.msg('Error occurred downloading %s' % url)
156 class TestMirrorManager(unittest.TestCase):
157 """Unit tests for the mirror manager."""
164 self.client = MirrorManager('/tmp')
166 def test_extractPath(self):
167 site, baseDir, path = self.client.extractPath('http://ftp.us.debian.org/debian/dists/unstable/Release')
168 self.failUnless(site == "ftp.us.debian.org:80", "no match: %s" % site)
169 self.failUnless(baseDir == "/debian", "no match: %s" % baseDir)
170 self.failUnless(path == "/dists/unstable/Release", "no match: %s" % path)
172 site, baseDir, path = self.client.extractPath('http://ftp.us.debian.org:16999/debian/pool/d/dpkg/dpkg_1.2.1-1.tar.gz')
173 self.failUnless(site == "ftp.us.debian.org:16999", "no match: %s" % site)
174 self.failUnless(baseDir == "/debian", "no match: %s" % baseDir)
175 self.failUnless(path == "/pool/d/dpkg/dpkg_1.2.1-1.tar.gz", "no match: %s" % path)
177 site, baseDir, path = self.client.extractPath('http://debian.camrdale.org/dists/unstable/Release')
178 self.failUnless(site == "debian.camrdale.org:80", "no match: %s" % site)
179 self.failUnless(baseDir == "", "no match: %s" % baseDir)
180 self.failUnless(path == "/dists/unstable/Release", "no match: %s" % path)
182 def verifyHash(self, found_hash, path, true_hash):
183 self.failUnless(found_hash[0] == true_hash,
184 "%s hashes don't match: %s != %s" % (path, found_hash[0], true_hash))
186 def test_findHash(self):
187 self.packagesFile = os.popen('ls -Sr /var/lib/apt/lists/ | grep -E "_main_.*Packages$" | tail -n 1').read().rstrip('\n')
188 self.sourcesFile = os.popen('ls -Sr /var/lib/apt/lists/ | grep -E "_main_.*Sources$" | tail -n 1').read().rstrip('\n')
189 for f in os.walk('/var/lib/apt/lists').next()[2]:
190 if f[-7:] == "Release" and self.packagesFile.startswith(f[:-7]):
194 self.client.updatedFile('http://' + self.releaseFile.replace('_','/'),
195 '/var/lib/apt/lists/' + self.releaseFile)
196 self.client.updatedFile('http://' + self.releaseFile[:self.releaseFile.find('_dists_')+1].replace('_','/') +
197 self.packagesFile[self.packagesFile.find('_dists_')+1:].replace('_','/'),
198 '/var/lib/apt/lists/' + self.packagesFile)
199 self.client.updatedFile('http://' + self.releaseFile[:self.releaseFile.find('_dists_')+1].replace('_','/') +
200 self.sourcesFile[self.sourcesFile.find('_dists_')+1:].replace('_','/'),
201 '/var/lib/apt/lists/' + self.sourcesFile)
203 lastDefer = defer.Deferred()
205 idx_hash = os.popen('grep -A 3000 -E "^SHA1:" ' +
206 '/var/lib/apt/lists/' + self.releaseFile +
207 ' | grep -E " main/binary-i386/Packages.bz2$"'
208 ' | head -n 1 | cut -d\ -f 2').read().rstrip('\n')
209 idx_path = 'http://' + self.releaseFile.replace('_','/')[:-7] + 'main/binary-i386/Packages.bz2'
211 d = self.client.findHash(idx_path)
212 d.addCallback(self.verifyHash, idx_path, idx_hash)
214 pkg_hash = os.popen('grep -A 30 -E "^Package: dpkg$" ' +
215 '/var/lib/apt/lists/' + self.packagesFile +
216 ' | grep -E "^SHA1:" | head -n 1' +
217 ' | cut -d\ -f 2').read().rstrip('\n')
218 pkg_path = 'http://' + self.releaseFile[:self.releaseFile.find('_dists_')+1].replace('_','/') + \
219 os.popen('grep -A 30 -E "^Package: dpkg$" ' +
220 '/var/lib/apt/lists/' + self.packagesFile +
221 ' | grep -E "^Filename:" | head -n 1' +
222 ' | cut -d\ -f 2').read().rstrip('\n')
224 d = self.client.findHash(pkg_path)
225 d.addCallback(self.verifyHash, pkg_path, pkg_hash)
227 src_dir = os.popen('grep -A 30 -E "^Package: dpkg$" ' +
228 '/var/lib/apt/lists/' + self.sourcesFile +
229 ' | grep -E "^Directory:" | head -n 1' +
230 ' | cut -d\ -f 2').read().rstrip('\n')
231 src_hashes = os.popen('grep -A 20 -E "^Package: dpkg$" ' +
232 '/var/lib/apt/lists/' + self.sourcesFile +
233 ' | grep -A 4 -E "^Files:" | grep -E "^ " ' +
234 ' | cut -d\ -f 2').read().split('\n')[:-1]
235 src_paths = os.popen('grep -A 20 -E "^Package: dpkg$" ' +
236 '/var/lib/apt/lists/' + self.sourcesFile +
237 ' | grep -A 4 -E "^Files:" | grep -E "^ " ' +
238 ' | cut -d\ -f 4').read().split('\n')[:-1]
240 for i in range(len(src_hashes)):
241 src_path = 'http://' + self.releaseFile[:self.releaseFile.find('_dists_')+1].replace('_','/') + src_dir + '/' + src_paths[i]
242 d = self.client.findHash(src_path)
243 d.addCallback(self.verifyHash, src_path, src_hashes[i])
245 idx_hash = os.popen('grep -A 3000 -E "^SHA1:" ' +
246 '/var/lib/apt/lists/' + self.releaseFile +
247 ' | grep -E " main/source/Sources.bz2$"'
248 ' | head -n 1 | cut -d\ -f 2').read().rstrip('\n')
249 idx_path = 'http://' + self.releaseFile.replace('_','/')[:-7] + 'main/source/Sources.bz2'
251 d = self.client.findHash(idx_path)
252 d.addCallback(self.verifyHash, idx_path, idx_hash)
254 d.addBoth(lastDefer.callback)
258 for p in self.pending_calls: