2 """Manage the multiple mirrors that may be requested.
4 @var aptpkg_dir: the name of the directory to use for mirror files
7 from urlparse import urlparse
10 from twisted.python import log
11 from twisted.python.filepath import FilePath
12 from twisted.internet import defer
13 from twisted.trial import unittest
14 from twisted.web2.http import splitHostPort
16 from AptPackages import AptPackages
18 aptpkg_dir='apt-packages'
20 class MirrorError(Exception):
21 """Exception raised when there's a problem with the mirror."""
24 """Manages all requests for mirror information.
26 @type cache_dir: L{twisted.python.filepath.FilePath}
27 @ivar cache_dir: the directory to use for storing all files
28 @type unload_delay: C{int}
29 @ivar unload_delay: the time to wait before unloading the apt cache
30 @type apt_caches: C{dictionary}
31 @ivar apt_caches: the avaliable mirrors
34 def __init__(self, cache_dir, unload_delay):
35 self.cache_dir = cache_dir
36 self.unload_delay = unload_delay
39 def extractPath(self, url):
40 """Break the full URI down into the site, base directory and path.
42 Site is the host and port of the mirror. Base directory is the
43 directory to the mirror location (usually just '/debian'). Path is
44 the remaining path to get to the file.
46 E.g. http://ftp.debian.org/debian/dists/sid/binary-i386/Packages.bz2
47 would return ('ftp.debian.org:80', '/debian',
48 '/dists/sid/binary-i386/Packages.bz2').
50 @param url: the URI of the file's location on the mirror
51 @rtype: (C{string}, C{string}, C{string})
52 @return: the site, base directory and path to the file
54 # Extract the host and port
55 parsed = urlparse(url)
56 host, port = splitHostPort(parsed[0], parsed[1])
57 site = host + ":" + str(port)
60 # Try to find the base directory (most can be found this way)
61 i = max(path.rfind('/dists/'), path.rfind('/pool/'))
66 # Uh oh, this is not good
67 log.msg("Couldn't find a good base directory for path: %s" % (site + path))
69 # Try to find an existing cache that starts with this one
70 # (fallback to using an empty base directory)
72 if site in self.apt_caches:
74 for base in self.apt_caches[site]:
76 for dirs in path.split('/'):
77 if base.startswith(base_match + '/' + dirs):
78 base_match += '/' + dirs
81 if len(base_match) > longest_match:
82 longest_match = len(base_match)
84 log.msg("Settled on baseDir: %s" % baseDir)
86 return site, baseDir, path
88 def init(self, site, baseDir):
89 """Make sure an L{AptPackages} exists for this mirror."""
90 if site not in self.apt_caches:
91 self.apt_caches[site] = {}
93 if baseDir not in self.apt_caches[site]:
94 site_cache = self.cache_dir.child(aptpkg_dir).child('mirrors').child(site + baseDir.replace('/', '_'))
96 self.apt_caches[site][baseDir] = AptPackages(site_cache, self.unload_delay)
98 def updatedFile(self, url, file_path):
99 """A file in the mirror has changed or been added.
101 @see: L{AptPackages.PackageFileList.update_file}
103 site, baseDir, path = self.extractPath(url)
104 self.init(site, baseDir)
105 self.apt_caches[site][baseDir].file_updated(path, file_path)
107 def findHash(self, url):
108 """Find the hash for a given url.
110 @param url: the URI of the file's location on the mirror
111 @rtype: L{twisted.internet.defer.Deferred}
112 @return: a deferred that will fire with the returned L{Hash.HashObject}
114 site, baseDir, path = self.extractPath(url)
115 if site in self.apt_caches and baseDir in self.apt_caches[site]:
116 return self.apt_caches[site][baseDir].findHash(path)
118 d.errback(MirrorError("Site Not Found"))
121 class TestMirrorManager(unittest.TestCase):
122 """Unit tests for the mirror manager."""
129 self.client = MirrorManager(FilePath('/tmp/.apt-dht'), 300)
131 def test_extractPath(self):
132 """Test extracting the site and base directory from various mirrors."""
133 site, baseDir, path = self.client.extractPath('http://ftp.us.debian.org/debian/dists/unstable/Release')
134 self.failUnless(site == "ftp.us.debian.org:80", "no match: %s" % site)
135 self.failUnless(baseDir == "/debian", "no match: %s" % baseDir)
136 self.failUnless(path == "/dists/unstable/Release", "no match: %s" % path)
138 site, baseDir, path = self.client.extractPath('http://ftp.us.debian.org:16999/debian/pool/d/dpkg/dpkg_1.2.1-1.tar.gz')
139 self.failUnless(site == "ftp.us.debian.org:16999", "no match: %s" % site)
140 self.failUnless(baseDir == "/debian", "no match: %s" % baseDir)
141 self.failUnless(path == "/pool/d/dpkg/dpkg_1.2.1-1.tar.gz", "no match: %s" % path)
143 site, baseDir, path = self.client.extractPath('http://debian.camrdale.org/dists/unstable/Release')
144 self.failUnless(site == "debian.camrdale.org:80", "no match: %s" % site)
145 self.failUnless(baseDir == "", "no match: %s" % baseDir)
146 self.failUnless(path == "/dists/unstable/Release", "no match: %s" % path)
148 def verifyHash(self, found_hash, path, true_hash):
149 self.failUnless(found_hash.hexexpected() == true_hash,
150 "%s hashes don't match: %s != %s" % (path, found_hash.hexexpected(), true_hash))
152 def test_findHash(self):
153 """Tests finding the hash of an index file, binary package, source package, and another index file."""
154 # Find the largest index files that are for 'main'
155 self.packagesFile = os.popen('ls -Sr /var/lib/apt/lists/ | grep -E "_main_.*Packages$" | tail -n 1').read().rstrip('\n')
156 self.sourcesFile = os.popen('ls -Sr /var/lib/apt/lists/ | grep -E "_main_.*Sources$" | tail -n 1').read().rstrip('\n')
158 # Find the Release file corresponding to the found Packages file
159 for f in os.walk('/var/lib/apt/lists').next()[2]:
160 if f[-7:] == "Release" and self.packagesFile.startswith(f[:-7]):
164 # Add all the found files to the mirror
165 self.client.updatedFile('http://' + self.releaseFile.replace('_','/'),
166 FilePath('/var/lib/apt/lists/' + self.releaseFile))
167 self.client.updatedFile('http://' + self.releaseFile[:self.releaseFile.find('_dists_')+1].replace('_','/') +
168 self.packagesFile[self.packagesFile.find('_dists_')+1:].replace('_','/'),
169 FilePath('/var/lib/apt/lists/' + self.packagesFile))
170 self.client.updatedFile('http://' + self.releaseFile[:self.releaseFile.find('_dists_')+1].replace('_','/') +
171 self.sourcesFile[self.sourcesFile.find('_dists_')+1:].replace('_','/'),
172 FilePath('/var/lib/apt/lists/' + self.sourcesFile))
174 lastDefer = defer.Deferred()
176 # Lookup a Packages.bz2 file
177 idx_hash = os.popen('grep -A 3000 -E "^SHA1:" ' +
178 '/var/lib/apt/lists/' + self.releaseFile +
179 ' | grep -E " main/binary-i386/Packages.bz2$"'
180 ' | head -n 1 | cut -d\ -f 2').read().rstrip('\n')
181 idx_path = 'http://' + self.releaseFile.replace('_','/')[:-7] + 'main/binary-i386/Packages.bz2'
183 d = self.client.findHash(idx_path)
184 d.addCallback(self.verifyHash, idx_path, idx_hash)
186 # Lookup the binary 'dpkg' package
187 pkg_hash = os.popen('grep -A 30 -E "^Package: dpkg$" ' +
188 '/var/lib/apt/lists/' + self.packagesFile +
189 ' | grep -E "^SHA1:" | head -n 1' +
190 ' | cut -d\ -f 2').read().rstrip('\n')
191 pkg_path = 'http://' + self.releaseFile[:self.releaseFile.find('_dists_')+1].replace('_','/') + \
192 os.popen('grep -A 30 -E "^Package: dpkg$" ' +
193 '/var/lib/apt/lists/' + self.packagesFile +
194 ' | grep -E "^Filename:" | head -n 1' +
195 ' | cut -d\ -f 2').read().rstrip('\n')
197 d = self.client.findHash(pkg_path)
198 d.addCallback(self.verifyHash, pkg_path, pkg_hash)
200 # Lookup the source 'dpkg' package
201 src_dir = os.popen('grep -A 30 -E "^Package: dpkg$" ' +
202 '/var/lib/apt/lists/' + self.sourcesFile +
203 ' | grep -E "^Directory:" | head -n 1' +
204 ' | cut -d\ -f 2').read().rstrip('\n')
205 src_hashes = os.popen('grep -A 20 -E "^Package: dpkg$" ' +
206 '/var/lib/apt/lists/' + self.sourcesFile +
207 ' | grep -A 4 -E "^Files:" | grep -E "^ " ' +
208 ' | cut -d\ -f 2').read().split('\n')[:-1]
209 src_paths = os.popen('grep -A 20 -E "^Package: dpkg$" ' +
210 '/var/lib/apt/lists/' + self.sourcesFile +
211 ' | grep -A 4 -E "^Files:" | grep -E "^ " ' +
212 ' | cut -d\ -f 4').read().split('\n')[:-1]
214 for i in range(len(src_hashes)):
215 src_path = 'http://' + self.releaseFile[:self.releaseFile.find('_dists_')+1].replace('_','/') + src_dir + '/' + src_paths[i]
216 d = self.client.findHash(src_path)
217 d.addCallback(self.verifyHash, src_path, src_hashes[i])
219 # Lookup a Sources.bz2 file
220 idx_hash = os.popen('grep -A 3000 -E "^SHA1:" ' +
221 '/var/lib/apt/lists/' + self.releaseFile +
222 ' | grep -E " main/source/Sources.bz2$"'
223 ' | head -n 1 | cut -d\ -f 2').read().rstrip('\n')
224 idx_path = 'http://' + self.releaseFile.replace('_','/')[:-7] + 'main/source/Sources.bz2'
226 d = self.client.findHash(idx_path)
227 d.addCallback(self.verifyHash, idx_path, idx_hash)
229 d.addBoth(lastDefer.callback)
233 for p in self.pending_calls: