2 """Manage the multiple mirrors that may be requested.
4 @var aptpkg_dir: the name of the directory to use for mirror files
7 from urlparse import urlparse
10 from twisted.python import log
11 from twisted.python.filepath import FilePath
12 from twisted.internet import defer
13 from twisted.trial import unittest
14 from twisted.web2.http import splitHostPort
16 from AptPackages import AptPackages
18 aptpkg_dir='apt-packages'
20 class MirrorError(Exception):
21 """Exception raised when there's a problem with the mirror."""
24 """Manages all requests for mirror information.
26 @type cache_dir: L{twisted.python.filepath.FilePath}
27 @ivar cache_dir: the directory to use for storing all files
28 @type unload_delay: C{int}
29 @ivar unload_delay: the time to wait before unloading the apt cache
30 @type apt_caches: C{dictionary}
31 @ivar apt_caches: the avaliable mirrors
34 def __init__(self, cache_dir, unload_delay):
35 self.cache_dir = cache_dir
36 self.unload_delay = unload_delay
39 def extractPath(self, url):
40 """Break the full URI down into the site, base directory and path.
42 Site is the host and port of the mirror. Base directory is the
43 directory to the mirror location (usually just '/debian'). Path is
44 the remaining path to get to the file.
46 E.g. http://ftp.debian.org/debian/dists/sid/binary-i386/Packages.bz2
47 would return ('ftp.debian.org:80', '/debian',
48 '/dists/sid/binary-i386/Packages.bz2').
50 @param url: the URI of the file's location on the mirror
51 @rtype: (C{string}, C{string}, C{string})
52 @return: the site, base directory and path to the file
54 # Extract the host and port
55 parsed = urlparse(url)
56 host, port = splitHostPort(parsed[0], parsed[1])
57 site = host + ":" + str(port)
60 # Try to find the base directory (most can be found this way)
61 i = max(path.rfind('/dists/'), path.rfind('/pool/'))
66 # Uh oh, this is not good
67 log.msg("Couldn't find a good base directory for path: %s" % (site + path))
69 # Try to find an existing cache that starts with this one
70 # (fallback to using an empty base directory)
72 if site in self.apt_caches:
74 for base in self.apt_caches[site]:
76 for dirs in path.split('/'):
77 if base.startswith(base_match + '/' + dirs):
78 base_match += '/' + dirs
81 if len(base_match) > longest_match:
82 longest_match = len(base_match)
84 log.msg("Settled on baseDir: %s" % baseDir)
86 return site, baseDir, path
88 def init(self, site, baseDir):
89 """Make sure an L{AptPackages} exists for this mirror."""
90 if site not in self.apt_caches:
91 self.apt_caches[site] = {}
93 if baseDir not in self.apt_caches[site]:
94 site_cache = self.cache_dir.child(aptpkg_dir).child('mirrors').child(site + baseDir.replace('/', '_'))
96 self.apt_caches[site][baseDir] = AptPackages(site_cache, self.unload_delay)
98 def updatedFile(self, url, file_path):
99 """A file in the mirror has changed or been added.
101 @see: L{AptPackages.PackageFileList.update_file}
103 site, baseDir, path = self.extractPath(url)
104 self.init(site, baseDir)
105 self.apt_caches[site][baseDir].file_updated(path, file_path)
107 def findHash(self, url):
108 """Find the hash for a given url.
110 @param url: the URI of the file's location on the mirror
111 @rtype: L{twisted.internet.defer.Deferred}
112 @return: a deferred that will fire with the returned L{Hash.HashObject}
114 site, baseDir, path = self.extractPath(url)
115 if site in self.apt_caches and baseDir in self.apt_caches[site]:
116 return self.apt_caches[site][baseDir].findHash(path)
118 d.errback(MirrorError("Site Not Found"))
122 for site in self.apt_caches.keys():
123 for baseDir in self.apt_caches[site].keys():
124 self.apt_caches[site][baseDir].cleanup()
125 del self.apt_caches[site][baseDir]
126 del self.apt_caches[site]
128 class TestMirrorManager(unittest.TestCase):
129 """Unit tests for the mirror manager."""
136 self.client = MirrorManager(FilePath('/tmp/.apt-p2p'), 300)
138 def test_extractPath(self):
139 """Test extracting the site and base directory from various mirrors."""
140 site, baseDir, path = self.client.extractPath('http://ftp.us.debian.org/debian/dists/unstable/Release')
141 self.failUnless(site == "ftp.us.debian.org:80", "no match: %s" % site)
142 self.failUnless(baseDir == "/debian", "no match: %s" % baseDir)
143 self.failUnless(path == "/dists/unstable/Release", "no match: %s" % path)
145 site, baseDir, path = self.client.extractPath('http://ftp.us.debian.org:16999/debian/pool/d/dpkg/dpkg_1.2.1-1.tar.gz')
146 self.failUnless(site == "ftp.us.debian.org:16999", "no match: %s" % site)
147 self.failUnless(baseDir == "/debian", "no match: %s" % baseDir)
148 self.failUnless(path == "/pool/d/dpkg/dpkg_1.2.1-1.tar.gz", "no match: %s" % path)
150 site, baseDir, path = self.client.extractPath('http://debian.camrdale.org/dists/unstable/Release')
151 self.failUnless(site == "debian.camrdale.org:80", "no match: %s" % site)
152 self.failUnless(baseDir == "", "no match: %s" % baseDir)
153 self.failUnless(path == "/dists/unstable/Release", "no match: %s" % path)
155 def verifyHash(self, found_hash, path, true_hash):
156 self.failUnless(found_hash.hexexpected() == true_hash,
157 "%s hashes don't match: %s != %s" % (path, found_hash.hexexpected(), true_hash))
159 def test_findHash(self):
160 """Tests finding the hash of an index file, binary package, source package, and another index file."""
161 # Find the largest index files that are for 'main'
162 self.packagesFile = os.popen('ls -Sr /var/lib/apt/lists/ | grep -E "_main_.*Packages$" | tail -n 1').read().rstrip('\n')
163 self.sourcesFile = os.popen('ls -Sr /var/lib/apt/lists/ | grep -E "_main_.*Sources$" | tail -n 1').read().rstrip('\n')
165 # Find the Release file corresponding to the found Packages file
166 for f in os.walk('/var/lib/apt/lists').next()[2]:
167 if f[-7:] == "Release" and self.packagesFile.startswith(f[:-7]):
171 # Add all the found files to the mirror
172 self.client.updatedFile('http://' + self.releaseFile.replace('_','/'),
173 FilePath('/var/lib/apt/lists/' + self.releaseFile))
174 self.client.updatedFile('http://' + self.releaseFile[:self.releaseFile.find('_dists_')+1].replace('_','/') +
175 self.packagesFile[self.packagesFile.find('_dists_')+1:].replace('_','/'),
176 FilePath('/var/lib/apt/lists/' + self.packagesFile))
177 self.client.updatedFile('http://' + self.releaseFile[:self.releaseFile.find('_dists_')+1].replace('_','/') +
178 self.sourcesFile[self.sourcesFile.find('_dists_')+1:].replace('_','/'),
179 FilePath('/var/lib/apt/lists/' + self.sourcesFile))
181 lastDefer = defer.Deferred()
183 # Lookup a Packages.bz2 file
184 idx_hash = os.popen('grep -A 3000 -E "^SHA1:" ' +
185 '/var/lib/apt/lists/' + self.releaseFile +
186 ' | grep -E " main/binary-i386/Packages.bz2$"'
187 ' | head -n 1 | cut -d\ -f 2').read().rstrip('\n')
188 idx_path = 'http://' + self.releaseFile.replace('_','/')[:-7] + 'main/binary-i386/Packages.bz2'
190 d = self.client.findHash(idx_path)
191 d.addCallback(self.verifyHash, idx_path, idx_hash)
193 # Lookup the binary 'dpkg' package
194 pkg_hash = os.popen('grep -A 30 -E "^Package: dpkg$" ' +
195 '/var/lib/apt/lists/' + self.packagesFile +
196 ' | grep -E "^SHA1:" | head -n 1' +
197 ' | cut -d\ -f 2').read().rstrip('\n')
198 pkg_path = 'http://' + self.releaseFile[:self.releaseFile.find('_dists_')+1].replace('_','/') + \
199 os.popen('grep -A 30 -E "^Package: dpkg$" ' +
200 '/var/lib/apt/lists/' + self.packagesFile +
201 ' | grep -E "^Filename:" | head -n 1' +
202 ' | cut -d\ -f 2').read().rstrip('\n')
204 d = self.client.findHash(pkg_path)
205 d.addCallback(self.verifyHash, pkg_path, pkg_hash)
207 # Lookup the source 'dpkg' package
208 src_dir = os.popen('grep -A 30 -E "^Package: dpkg$" ' +
209 '/var/lib/apt/lists/' + self.sourcesFile +
210 ' | grep -E "^Directory:" | head -n 1' +
211 ' | cut -d\ -f 2').read().rstrip('\n')
212 src_hashes = os.popen('grep -A 20 -E "^Package: dpkg$" ' +
213 '/var/lib/apt/lists/' + self.sourcesFile +
214 ' | grep -A 4 -E "^Files:" | grep -E "^ " ' +
215 ' | cut -d\ -f 2').read().split('\n')[:-1]
216 src_paths = os.popen('grep -A 20 -E "^Package: dpkg$" ' +
217 '/var/lib/apt/lists/' + self.sourcesFile +
218 ' | grep -A 4 -E "^Files:" | grep -E "^ " ' +
219 ' | cut -d\ -f 4').read().split('\n')[:-1]
221 for i in range(len(src_hashes)):
222 src_path = 'http://' + self.releaseFile[:self.releaseFile.find('_dists_')+1].replace('_','/') + src_dir + '/' + src_paths[i]
223 d = self.client.findHash(src_path)
224 d.addCallback(self.verifyHash, src_path, src_hashes[i])
226 # Lookup a Sources.bz2 file
227 idx_hash = os.popen('grep -A 3000 -E "^SHA1:" ' +
228 '/var/lib/apt/lists/' + self.releaseFile +
229 ' | grep -E " main/source/Sources.bz2$"'
230 ' | head -n 1 | cut -d\ -f 2').read().rstrip('\n')
231 idx_path = 'http://' + self.releaseFile.replace('_','/')[:-7] + 'main/source/Sources.bz2'
233 d = self.client.findHash(idx_path)
234 d.addCallback(self.verifyHash, idx_path, idx_hash)
236 d.addBoth(lastDefer.callback)
240 for p in self.pending_calls:
243 self.client.cleanup()