2 """Manage the multiple mirrors that may be requested.
4 @var aptpkg_dir: the name of the directory to use for mirror files
7 from urlparse import urlparse
10 from twisted.python import log
11 from twisted.python.filepath import FilePath
12 from twisted.internet import defer
13 from twisted.trial import unittest
14 from twisted.web2.http import splitHostPort
16 from AptPackages import AptPackages
18 aptpkg_dir='apt-packages'
20 class MirrorError(Exception):
21 """Exception raised when there's a problem with the mirror."""
24 """Manages all requests for mirror information.
26 @type cache_dir: L{twisted.python.filepath.FilePath}
27 @ivar cache_dir: the directory to use for storing all files
28 @type apt_caches: C{dictionary}
29 @ivar apt_caches: the avaliable mirrors
32 def __init__(self, cache_dir):
33 self.cache_dir = cache_dir
36 def extractPath(self, url):
37 """Break the full URI down into the site, base directory and path.
39 Site is the host and port of the mirror. Base directory is the
40 directory to the mirror location (usually just '/debian'). Path is
41 the remaining path to get to the file.
43 E.g. http://ftp.debian.org/debian/dists/sid/binary-i386/Packages.bz2
44 would return ('ftp.debian.org:80', '/debian',
45 '/dists/sid/binary-i386/Packages.bz2').
47 @param url: the URI of the file's location on the mirror
48 @rtype: (C{string}, C{string}, C{string})
49 @return: the site, base directory and path to the file
51 # Extract the host and port
52 parsed = urlparse(url)
53 host, port = splitHostPort(parsed[0], parsed[1])
54 site = host + ":" + str(port)
57 # Try to find the base directory (most can be found this way)
58 i = max(path.rfind('/dists/'), path.rfind('/pool/'))
63 # Uh oh, this is not good
64 log.msg("Couldn't find a good base directory for path: %s" % (site + path))
66 # Try to find an existing cache that starts with this one
67 # (fallback to using an empty base directory)
69 if site in self.apt_caches:
71 for base in self.apt_caches[site]:
73 for dirs in path.split('/'):
74 if base.startswith(base_match + '/' + dirs):
75 base_match += '/' + dirs
78 if len(base_match) > longest_match:
79 longest_match = len(base_match)
81 log.msg("Settled on baseDir: %s" % baseDir)
83 return site, baseDir, path
85 def init(self, site, baseDir):
86 """Make sure an L{AptPackages} exists for this mirror."""
87 if site not in self.apt_caches:
88 self.apt_caches[site] = {}
90 if baseDir not in self.apt_caches[site]:
91 site_cache = self.cache_dir.child(aptpkg_dir).child('mirrors').child(site + baseDir.replace('/', '_'))
93 self.apt_caches[site][baseDir] = AptPackages(site_cache)
95 def updatedFile(self, url, file_path):
96 """A file in the mirror has changed or been added.
98 @see: L{AptPackages.PackageFileList.update_file}
100 site, baseDir, path = self.extractPath(url)
101 self.init(site, baseDir)
102 self.apt_caches[site][baseDir].file_updated(path, file_path)
104 def findHash(self, url):
105 """Find the hash for a given url.
107 @param url: the URI of the file's location on the mirror
108 @rtype: L{twisted.internet.defer.Deferred}
109 @return: a deferred that will fire with the returned L{Hash.HashObject}
111 site, baseDir, path = self.extractPath(url)
112 if site in self.apt_caches and baseDir in self.apt_caches[site]:
113 return self.apt_caches[site][baseDir].findHash(path)
114 return defer.fail(MirrorError("Site Not Found"))
117 for site in self.apt_caches.keys():
118 for baseDir in self.apt_caches[site].keys():
119 self.apt_caches[site][baseDir].cleanup()
120 del self.apt_caches[site][baseDir]
121 del self.apt_caches[site]
123 class TestMirrorManager(unittest.TestCase):
124 """Unit tests for the mirror manager."""
131 self.client = MirrorManager(FilePath('/tmp/.apt-p2p'))
133 def test_extractPath(self):
134 """Test extracting the site and base directory from various mirrors."""
135 site, baseDir, path = self.client.extractPath('http://ftp.us.debian.org/debian/dists/unstable/Release')
136 self.failUnless(site == "ftp.us.debian.org:80", "no match: %s" % site)
137 self.failUnless(baseDir == "/debian", "no match: %s" % baseDir)
138 self.failUnless(path == "/dists/unstable/Release", "no match: %s" % path)
140 site, baseDir, path = self.client.extractPath('http://ftp.us.debian.org:16999/debian/pool/d/dpkg/dpkg_1.2.1-1.tar.gz')
141 self.failUnless(site == "ftp.us.debian.org:16999", "no match: %s" % site)
142 self.failUnless(baseDir == "/debian", "no match: %s" % baseDir)
143 self.failUnless(path == "/pool/d/dpkg/dpkg_1.2.1-1.tar.gz", "no match: %s" % path)
145 site, baseDir, path = self.client.extractPath('http://debian.camrdale.org/dists/unstable/Release')
146 self.failUnless(site == "debian.camrdale.org:80", "no match: %s" % site)
147 self.failUnless(baseDir == "", "no match: %s" % baseDir)
148 self.failUnless(path == "/dists/unstable/Release", "no match: %s" % path)
150 def verifyHash(self, found_hash, path, true_hash):
151 self.failUnless(found_hash.hexexpected() == true_hash,
152 "%s hashes don't match: %s != %s" % (path, found_hash.hexexpected(), true_hash))
154 def test_findHash(self):
155 """Tests finding the hash of an index file, binary package, source package, and another index file."""
156 # Find the largest index files that are for 'main'
157 self.packagesFile = os.popen('ls -Sr /var/lib/apt/lists/ | grep -E "_main_.*Packages$" | tail -n 1').read().rstrip('\n')
158 self.sourcesFile = os.popen('ls -Sr /var/lib/apt/lists/ | grep -E "_main_.*Sources$" | tail -n 1').read().rstrip('\n')
160 # Find the Release file corresponding to the found Packages file
161 for f in os.walk('/var/lib/apt/lists').next()[2]:
162 if f[-7:] == "Release" and self.packagesFile.startswith(f[:-7]):
166 # Add all the found files to the mirror
167 self.client.updatedFile('http://' + self.releaseFile.replace('_','/'),
168 FilePath('/var/lib/apt/lists/' + self.releaseFile))
169 self.client.updatedFile('http://' + self.releaseFile[:self.releaseFile.find('_dists_')+1].replace('_','/') +
170 self.packagesFile[self.packagesFile.find('_dists_')+1:].replace('_','/'),
171 FilePath('/var/lib/apt/lists/' + self.packagesFile))
172 self.client.updatedFile('http://' + self.releaseFile[:self.releaseFile.find('_dists_')+1].replace('_','/') +
173 self.sourcesFile[self.sourcesFile.find('_dists_')+1:].replace('_','/'),
174 FilePath('/var/lib/apt/lists/' + self.sourcesFile))
176 lastDefer = defer.Deferred()
178 # Lookup a Packages.bz2 file
179 idx_hash = os.popen('grep -A 3000 -E "^SHA1:" ' +
180 '/var/lib/apt/lists/' + self.releaseFile +
181 ' | grep -E " main/binary-i386/Packages.bz2$"'
182 ' | head -n 1 | cut -d\ -f 2').read().rstrip('\n')
183 idx_path = 'http://' + self.releaseFile.replace('_','/')[:-7] + 'main/binary-i386/Packages.bz2'
185 d = self.client.findHash(idx_path)
186 d.addCallback(self.verifyHash, idx_path, idx_hash)
188 # Lookup the binary 'dpkg' package
189 pkg_hash = os.popen('grep -A 30 -E "^Package: dpkg$" ' +
190 '/var/lib/apt/lists/' + self.packagesFile +
191 ' | grep -E "^SHA1:" | head -n 1' +
192 ' | cut -d\ -f 2').read().rstrip('\n')
193 pkg_path = 'http://' + self.releaseFile[:self.releaseFile.find('_dists_')+1].replace('_','/') + \
194 os.popen('grep -A 30 -E "^Package: dpkg$" ' +
195 '/var/lib/apt/lists/' + self.packagesFile +
196 ' | grep -E "^Filename:" | head -n 1' +
197 ' | cut -d\ -f 2').read().rstrip('\n')
199 d = self.client.findHash(pkg_path)
200 d.addCallback(self.verifyHash, pkg_path, pkg_hash)
202 # Lookup the source 'dpkg' package
203 src_dir = os.popen('grep -A 30 -E "^Package: dpkg$" ' +
204 '/var/lib/apt/lists/' + self.sourcesFile +
205 ' | grep -E "^Directory:" | head -n 1' +
206 ' | cut -d\ -f 2').read().rstrip('\n')
207 src_hashes = os.popen('grep -A 20 -E "^Package: dpkg$" ' +
208 '/var/lib/apt/lists/' + self.sourcesFile +
209 ' | grep -A 4 -E "^Files:" | grep -E "^ " ' +
210 ' | cut -d\ -f 2').read().split('\n')[:-1]
211 src_paths = os.popen('grep -A 20 -E "^Package: dpkg$" ' +
212 '/var/lib/apt/lists/' + self.sourcesFile +
213 ' | grep -A 4 -E "^Files:" | grep -E "^ " ' +
214 ' | cut -d\ -f 4').read().split('\n')[:-1]
216 for i in range(len(src_hashes)):
217 src_path = 'http://' + self.releaseFile[:self.releaseFile.find('_dists_')+1].replace('_','/') + src_dir + '/' + src_paths[i]
218 d = self.client.findHash(src_path)
219 d.addCallback(self.verifyHash, src_path, src_hashes[i])
221 # Lookup a Sources.bz2 file
222 idx_hash = os.popen('grep -A 3000 -E "^SHA1:" ' +
223 '/var/lib/apt/lists/' + self.releaseFile +
224 ' | grep -E " main/source/Sources.bz2$"'
225 ' | head -n 1 | cut -d\ -f 2').read().rstrip('\n')
226 idx_path = 'http://' + self.releaseFile.replace('_','/')[:-7] + 'main/source/Sources.bz2'
228 d = self.client.findHash(idx_path)
229 d.addCallback(self.verifyHash, idx_path, idx_hash)
231 d.addBoth(lastDefer.callback)
235 for p in self.pending_calls:
238 self.client.cleanup()