2 """Manage the multiple mirrors that may be requested.
4 @var aptpkg_dir: the name of the directory to use for mirror files
7 from urlparse import urlparse
10 from twisted.python import log
11 from twisted.python.filepath import FilePath
12 from twisted.internet import defer
13 from twisted.trial import unittest
14 from twisted.web2.http import splitHostPort
16 from AptPackages import AptPackages
18 aptpkg_dir='apt-packages'
20 class MirrorError(Exception):
21 """Exception raised when there's a problem with the mirror."""
24 """Manages all requests for mirror information.
26 @type cache_dir: L{twisted.python.filepath.FilePath}
27 @ivar cache_dir: the directory to use for storing all files
28 @type apt_caches: C{dictionary}
29 @ivar apt_caches: the avaliable mirrors
32 def __init__(self, cache_dir):
33 self.cache_dir = cache_dir
36 def extractPath(self, url):
37 """Break the full URI down into the site, base directory and path.
39 Site is the host and port of the mirror. Base directory is the
40 directory to the mirror location (usually just '/debian'). Path is
41 the remaining path to get to the file.
43 E.g. http://ftp.debian.org/debian/dists/sid/binary-i386/Packages.bz2
44 would return ('ftp.debian.org:80', '/debian',
45 '/dists/sid/binary-i386/Packages.bz2').
47 @param url: the URI of the file's location on the mirror
48 @rtype: (C{string}, C{string}, C{string})
49 @return: the site, base directory and path to the file
51 # Extract the host and port
52 parsed = urlparse(url)
53 host, port = splitHostPort(parsed[0], parsed[1])
54 site = host + ":" + str(port)
57 # Try to find the base directory (most can be found this way)
58 i = max(path.rfind('/dists/'), path.rfind('/pool/'))
63 # Uh oh, this is not good
64 log.msg("Couldn't find a good base directory for path: %s" % (site + path))
66 # Try to find an existing cache that starts with this one
67 # (fallback to using an empty base directory)
69 if site in self.apt_caches:
71 for base in self.apt_caches[site]:
73 for dirs in path.split('/'):
74 if base.startswith(base_match + '/' + dirs):
75 base_match += '/' + dirs
78 if len(base_match) > longest_match:
79 longest_match = len(base_match)
81 log.msg("Settled on baseDir: %s" % baseDir)
83 return site, baseDir, path
85 def init(self, site, baseDir):
86 """Make sure an L{AptPackages} exists for this mirror."""
87 if site not in self.apt_caches:
88 self.apt_caches[site] = {}
90 if baseDir not in self.apt_caches[site]:
91 site_cache = self.cache_dir.child(aptpkg_dir).child('mirrors').child(site + baseDir.replace('/', '_'))
93 self.apt_caches[site][baseDir] = AptPackages(site_cache)
95 def updatedFile(self, url, file_path):
96 """A file in the mirror has changed or been added.
98 @see: L{AptPackages.PackageFileList.update_file}
100 site, baseDir, path = self.extractPath(url)
101 self.init(site, baseDir)
102 self.apt_caches[site][baseDir].file_updated(path, file_path)
104 def findHash(self, url):
105 """Find the hash for a given url.
107 @param url: the URI of the file's location on the mirror
108 @rtype: L{twisted.internet.defer.Deferred}
109 @return: a deferred that will fire with the returned L{Hash.HashObject}
111 site, baseDir, path = self.extractPath(url)
112 if site in self.apt_caches and baseDir in self.apt_caches[site]:
113 return self.apt_caches[site][baseDir].findHash(path)
115 d.errback(MirrorError("Site Not Found"))
119 for site in self.apt_caches.keys():
120 for baseDir in self.apt_caches[site].keys():
121 self.apt_caches[site][baseDir].cleanup()
122 del self.apt_caches[site][baseDir]
123 del self.apt_caches[site]
125 class TestMirrorManager(unittest.TestCase):
126 """Unit tests for the mirror manager."""
133 self.client = MirrorManager(FilePath('/tmp/.apt-p2p'))
135 def test_extractPath(self):
136 """Test extracting the site and base directory from various mirrors."""
137 site, baseDir, path = self.client.extractPath('http://ftp.us.debian.org/debian/dists/unstable/Release')
138 self.failUnless(site == "ftp.us.debian.org:80", "no match: %s" % site)
139 self.failUnless(baseDir == "/debian", "no match: %s" % baseDir)
140 self.failUnless(path == "/dists/unstable/Release", "no match: %s" % path)
142 site, baseDir, path = self.client.extractPath('http://ftp.us.debian.org:16999/debian/pool/d/dpkg/dpkg_1.2.1-1.tar.gz')
143 self.failUnless(site == "ftp.us.debian.org:16999", "no match: %s" % site)
144 self.failUnless(baseDir == "/debian", "no match: %s" % baseDir)
145 self.failUnless(path == "/pool/d/dpkg/dpkg_1.2.1-1.tar.gz", "no match: %s" % path)
147 site, baseDir, path = self.client.extractPath('http://debian.camrdale.org/dists/unstable/Release')
148 self.failUnless(site == "debian.camrdale.org:80", "no match: %s" % site)
149 self.failUnless(baseDir == "", "no match: %s" % baseDir)
150 self.failUnless(path == "/dists/unstable/Release", "no match: %s" % path)
152 def verifyHash(self, found_hash, path, true_hash):
153 self.failUnless(found_hash.hexexpected() == true_hash,
154 "%s hashes don't match: %s != %s" % (path, found_hash.hexexpected(), true_hash))
156 def test_findHash(self):
157 """Tests finding the hash of an index file, binary package, source package, and another index file."""
158 # Find the largest index files that are for 'main'
159 self.packagesFile = os.popen('ls -Sr /var/lib/apt/lists/ | grep -E "_main_.*Packages$" | tail -n 1').read().rstrip('\n')
160 self.sourcesFile = os.popen('ls -Sr /var/lib/apt/lists/ | grep -E "_main_.*Sources$" | tail -n 1').read().rstrip('\n')
162 # Find the Release file corresponding to the found Packages file
163 for f in os.walk('/var/lib/apt/lists').next()[2]:
164 if f[-7:] == "Release" and self.packagesFile.startswith(f[:-7]):
168 # Add all the found files to the mirror
169 self.client.updatedFile('http://' + self.releaseFile.replace('_','/'),
170 FilePath('/var/lib/apt/lists/' + self.releaseFile))
171 self.client.updatedFile('http://' + self.releaseFile[:self.releaseFile.find('_dists_')+1].replace('_','/') +
172 self.packagesFile[self.packagesFile.find('_dists_')+1:].replace('_','/'),
173 FilePath('/var/lib/apt/lists/' + self.packagesFile))
174 self.client.updatedFile('http://' + self.releaseFile[:self.releaseFile.find('_dists_')+1].replace('_','/') +
175 self.sourcesFile[self.sourcesFile.find('_dists_')+1:].replace('_','/'),
176 FilePath('/var/lib/apt/lists/' + self.sourcesFile))
178 lastDefer = defer.Deferred()
180 # Lookup a Packages.bz2 file
181 idx_hash = os.popen('grep -A 3000 -E "^SHA1:" ' +
182 '/var/lib/apt/lists/' + self.releaseFile +
183 ' | grep -E " main/binary-i386/Packages.bz2$"'
184 ' | head -n 1 | cut -d\ -f 2').read().rstrip('\n')
185 idx_path = 'http://' + self.releaseFile.replace('_','/')[:-7] + 'main/binary-i386/Packages.bz2'
187 d = self.client.findHash(idx_path)
188 d.addCallback(self.verifyHash, idx_path, idx_hash)
190 # Lookup the binary 'dpkg' package
191 pkg_hash = os.popen('grep -A 30 -E "^Package: dpkg$" ' +
192 '/var/lib/apt/lists/' + self.packagesFile +
193 ' | grep -E "^SHA1:" | head -n 1' +
194 ' | cut -d\ -f 2').read().rstrip('\n')
195 pkg_path = 'http://' + self.releaseFile[:self.releaseFile.find('_dists_')+1].replace('_','/') + \
196 os.popen('grep -A 30 -E "^Package: dpkg$" ' +
197 '/var/lib/apt/lists/' + self.packagesFile +
198 ' | grep -E "^Filename:" | head -n 1' +
199 ' | cut -d\ -f 2').read().rstrip('\n')
201 d = self.client.findHash(pkg_path)
202 d.addCallback(self.verifyHash, pkg_path, pkg_hash)
204 # Lookup the source 'dpkg' package
205 src_dir = os.popen('grep -A 30 -E "^Package: dpkg$" ' +
206 '/var/lib/apt/lists/' + self.sourcesFile +
207 ' | grep -E "^Directory:" | head -n 1' +
208 ' | cut -d\ -f 2').read().rstrip('\n')
209 src_hashes = os.popen('grep -A 20 -E "^Package: dpkg$" ' +
210 '/var/lib/apt/lists/' + self.sourcesFile +
211 ' | grep -A 4 -E "^Files:" | grep -E "^ " ' +
212 ' | cut -d\ -f 2').read().split('\n')[:-1]
213 src_paths = os.popen('grep -A 20 -E "^Package: dpkg$" ' +
214 '/var/lib/apt/lists/' + self.sourcesFile +
215 ' | grep -A 4 -E "^Files:" | grep -E "^ " ' +
216 ' | cut -d\ -f 4').read().split('\n')[:-1]
218 for i in range(len(src_hashes)):
219 src_path = 'http://' + self.releaseFile[:self.releaseFile.find('_dists_')+1].replace('_','/') + src_dir + '/' + src_paths[i]
220 d = self.client.findHash(src_path)
221 d.addCallback(self.verifyHash, src_path, src_hashes[i])
223 # Lookup a Sources.bz2 file
224 idx_hash = os.popen('grep -A 3000 -E "^SHA1:" ' +
225 '/var/lib/apt/lists/' + self.releaseFile +
226 ' | grep -E " main/source/Sources.bz2$"'
227 ' | head -n 1 | cut -d\ -f 2').read().rstrip('\n')
228 idx_path = 'http://' + self.releaseFile.replace('_','/')[:-7] + 'main/source/Sources.bz2'
230 d = self.client.findHash(idx_path)
231 d.addCallback(self.verifyHash, idx_path, idx_hash)
233 d.addBoth(lastDefer.callback)
237 for p in self.pending_calls:
240 self.client.cleanup()