2 """Manage the multiple mirrors that may be requested.
4 @var aptpkg_dir: the name of the directory to use for mirror files
7 from urlparse import urlparse
10 from twisted.python import log
11 from twisted.python.filepath import FilePath
12 from twisted.internet import defer
13 from twisted.trial import unittest
14 from twisted.web2.http import splitHostPort
16 from AptPackages import AptPackages
18 aptpkg_dir='apt-packages'
20 class MirrorError(Exception):
21 """Exception raised when there's a problem with the mirror."""
24 """Manages all requests for mirror information.
26 @type cache_dir: L{twisted.python.filepath.FilePath}
27 @ivar cache_dir: the directory to use for storing all files
28 @type apt_caches: C{dictionary}
29 @ivar apt_caches: the avaliable mirrors
32 def __init__(self, cache_dir):
33 self.cache_dir = cache_dir
36 def extractPath(self, url):
37 """Break the full URI down into the site, base directory and path.
39 Site is the host and port of the mirror. Base directory is the
40 directory to the mirror location (usually just '/debian'). Path is
41 the remaining path to get to the file.
43 E.g. http://ftp.debian.org/debian/dists/sid/binary-i386/Packages.bz2
44 would return ('ftp.debian.org:80', '/debian',
45 '/dists/sid/binary-i386/Packages.bz2').
47 @param url: the URI of the file's location on the mirror
48 @rtype: (C{string}, C{string}, C{string})
49 @return: the site, base directory and path to the file
51 # Extract the host and port
52 parsed = urlparse(url)
53 host, port = splitHostPort(parsed[0], parsed[1])
54 site = host + ":" + str(port)
57 # Try to find the base directory (most can be found this way)
58 i = max(path.rfind('/dists/'), path.rfind('/pool/'))
63 # Uh oh, this is not good
64 log.msg("Couldn't find a good base directory for path: %s" % (site + path))
66 # Try to find an existing cache that starts with this one
67 # (fallback to using an empty base directory)
69 if site in self.apt_caches:
71 for base in self.apt_caches[site]:
73 for dirs in path.split('/'):
74 if base.startswith(base_match + '/' + dirs):
75 base_match += '/' + dirs
78 if len(base_match) > longest_match:
79 longest_match = len(base_match)
81 log.msg("Settled on baseDir: %s" % baseDir)
83 return site, baseDir, path
85 def init(self, site, baseDir):
86 """Make sure an L{AptPackages} exists for this mirror."""
87 if site not in self.apt_caches:
88 self.apt_caches[site] = {}
90 if baseDir not in self.apt_caches[site]:
91 site_cache = self.cache_dir.child(aptpkg_dir).child('mirrors').child(site + baseDir.replace('/', '_'))
93 self.apt_caches[site][baseDir] = AptPackages(site_cache)
95 def updatedFile(self, url, file_path):
96 """A file in the mirror has changed or been added.
98 @see: L{AptPackages.PackageFileList.update_file}
100 site, baseDir, path = self.extractPath(url)
101 self.init(site, baseDir)
102 self.apt_caches[site][baseDir].file_updated(path, file_path)
104 def findHash(self, url):
105 """Find the hash for a given url.
107 @param url: the URI of the file's location on the mirror
108 @rtype: L{twisted.internet.defer.Deferred}
109 @return: a deferred that will fire with the returned L{Hash.HashObject}
111 site, baseDir, path = self.extractPath(url)
112 self.init(site, baseDir)
113 if site in self.apt_caches and baseDir in self.apt_caches[site]:
114 return self.apt_caches[site][baseDir].findHash(path)
115 return defer.fail(MirrorError("Site Not Found"))
118 for site in self.apt_caches.keys():
119 for baseDir in self.apt_caches[site].keys():
120 self.apt_caches[site][baseDir].cleanup()
121 del self.apt_caches[site][baseDir]
122 del self.apt_caches[site]
124 class TestMirrorManager(unittest.TestCase):
125 """Unit tests for the mirror manager."""
132 self.client = MirrorManager(FilePath('/tmp/.apt-p2p'))
134 def test_extractPath(self):
135 """Test extracting the site and base directory from various mirrors."""
136 site, baseDir, path = self.client.extractPath('http://ftp.us.debian.org/debian/dists/unstable/Release')
137 self.failUnless(site == "ftp.us.debian.org:80", "no match: %s" % site)
138 self.failUnless(baseDir == "/debian", "no match: %s" % baseDir)
139 self.failUnless(path == "/dists/unstable/Release", "no match: %s" % path)
141 site, baseDir, path = self.client.extractPath('http://ftp.us.debian.org:16999/debian/pool/d/dpkg/dpkg_1.2.1-1.tar.gz')
142 self.failUnless(site == "ftp.us.debian.org:16999", "no match: %s" % site)
143 self.failUnless(baseDir == "/debian", "no match: %s" % baseDir)
144 self.failUnless(path == "/pool/d/dpkg/dpkg_1.2.1-1.tar.gz", "no match: %s" % path)
146 site, baseDir, path = self.client.extractPath('http://debian.camrdale.org/dists/unstable/Release')
147 self.failUnless(site == "debian.camrdale.org:80", "no match: %s" % site)
148 self.failUnless(baseDir == "", "no match: %s" % baseDir)
149 self.failUnless(path == "/dists/unstable/Release", "no match: %s" % path)
151 def verifyHash(self, found_hash, path, true_hash):
152 self.failUnless(found_hash.hexexpected() == true_hash,
153 "%s hashes don't match: %s != %s" % (path, found_hash.hexexpected(), true_hash))
155 def test_findHash(self):
156 """Tests finding the hash of an index file, binary package, source package, and another index file."""
157 # Find the largest index files that are for 'main'
158 self.packagesFile = os.popen('ls -Sr /var/lib/apt/lists/ | grep -E "_main_.*Packages$" | tail -n 1').read().rstrip('\n')
159 self.sourcesFile = os.popen('ls -Sr /var/lib/apt/lists/ | grep -E "_main_.*Sources$" | tail -n 1').read().rstrip('\n')
161 # Find the Release file corresponding to the found Packages file
162 for f in os.walk('/var/lib/apt/lists').next()[2]:
163 if f[-7:] == "Release" and self.packagesFile.startswith(f[:-7]):
167 # Add all the found files to the mirror
168 self.client.updatedFile('http://' + self.releaseFile.replace('_','/'),
169 FilePath('/var/lib/apt/lists/' + self.releaseFile))
170 self.client.updatedFile('http://' + self.releaseFile[:self.releaseFile.find('_dists_')+1].replace('_','/') +
171 self.packagesFile[self.packagesFile.find('_dists_')+1:].replace('_','/'),
172 FilePath('/var/lib/apt/lists/' + self.packagesFile))
173 self.client.updatedFile('http://' + self.releaseFile[:self.releaseFile.find('_dists_')+1].replace('_','/') +
174 self.sourcesFile[self.sourcesFile.find('_dists_')+1:].replace('_','/'),
175 FilePath('/var/lib/apt/lists/' + self.sourcesFile))
177 lastDefer = defer.Deferred()
179 # Lookup a Packages.bz2 file
180 idx_hash = os.popen('grep -A 3000 -E "^SHA1:" ' +
181 '/var/lib/apt/lists/' + self.releaseFile +
182 ' | grep -E " main/binary-i386/Packages.bz2$"'
183 ' | head -n 1 | cut -d\ -f 2').read().rstrip('\n')
184 idx_path = 'http://' + self.releaseFile.replace('_','/')[:-7] + 'main/binary-i386/Packages.bz2'
186 d = self.client.findHash(idx_path)
187 d.addCallback(self.verifyHash, idx_path, idx_hash)
189 # Lookup the binary 'dpkg' package
190 pkg_hash = os.popen('grep -A 30 -E "^Package: dpkg$" ' +
191 '/var/lib/apt/lists/' + self.packagesFile +
192 ' | grep -E "^SHA1:" | head -n 1' +
193 ' | cut -d\ -f 2').read().rstrip('\n')
194 pkg_path = 'http://' + self.releaseFile[:self.releaseFile.find('_dists_')+1].replace('_','/') + \
195 os.popen('grep -A 30 -E "^Package: dpkg$" ' +
196 '/var/lib/apt/lists/' + self.packagesFile +
197 ' | grep -E "^Filename:" | head -n 1' +
198 ' | cut -d\ -f 2').read().rstrip('\n')
200 d = self.client.findHash(pkg_path)
201 d.addCallback(self.verifyHash, pkg_path, pkg_hash)
203 # Lookup the source 'dpkg' package
204 src_dir = os.popen('grep -A 30 -E "^Package: dpkg$" ' +
205 '/var/lib/apt/lists/' + self.sourcesFile +
206 ' | grep -E "^Directory:" | head -n 1' +
207 ' | cut -d\ -f 2').read().rstrip('\n')
208 src_hashes = os.popen('grep -A 20 -E "^Package: dpkg$" ' +
209 '/var/lib/apt/lists/' + self.sourcesFile +
210 ' | grep -A 4 -E "^Files:" | grep -E "^ " ' +
211 ' | cut -d\ -f 2').read().split('\n')[:-1]
212 src_paths = os.popen('grep -A 20 -E "^Package: dpkg$" ' +
213 '/var/lib/apt/lists/' + self.sourcesFile +
214 ' | grep -A 4 -E "^Files:" | grep -E "^ " ' +
215 ' | cut -d\ -f 4').read().split('\n')[:-1]
217 for i in range(len(src_hashes)):
218 src_path = 'http://' + self.releaseFile[:self.releaseFile.find('_dists_')+1].replace('_','/') + src_dir + '/' + src_paths[i]
219 d = self.client.findHash(src_path)
220 d.addCallback(self.verifyHash, src_path, src_hashes[i])
222 # Lookup a Sources.bz2 file
223 idx_hash = os.popen('grep -A 3000 -E "^SHA1:" ' +
224 '/var/lib/apt/lists/' + self.releaseFile +
225 ' | grep -E " main/source/Sources.bz2$"'
226 ' | head -n 1 | cut -d\ -f 2').read().rstrip('\n')
227 idx_path = 'http://' + self.releaseFile.replace('_','/')[:-7] + 'main/source/Sources.bz2'
229 d = self.client.findHash(idx_path)
230 d.addCallback(self.verifyHash, idx_path, idx_hash)
232 d.addBoth(lastDefer.callback)
236 for p in self.pending_calls:
239 self.client.cleanup()