Use the apt_p2p_conf config import rather than passing parameters around.
[quix0rs-apt-p2p.git] / apt_p2p / MirrorManager.py
1
2 """Manage the multiple mirrors that may be requested.
3
4 @var aptpkg_dir: the name of the directory to use for mirror files
5 """
6
7 from urlparse import urlparse
8 import os
9
10 from twisted.python import log
11 from twisted.python.filepath import FilePath
12 from twisted.internet import defer
13 from twisted.trial import unittest
14 from twisted.web2.http import splitHostPort
15
16 from AptPackages import AptPackages
17
18 aptpkg_dir='apt-packages'
19
20 class MirrorError(Exception):
21     """Exception raised when there's a problem with the mirror."""
22
23 class MirrorManager:
24     """Manages all requests for mirror information.
25     
26     @type cache_dir: L{twisted.python.filepath.FilePath}
27     @ivar cache_dir: the directory to use for storing all files
28     @type apt_caches: C{dictionary}
29     @ivar apt_caches: the avaliable mirrors
30     """
31     
32     def __init__(self, cache_dir):
33         self.cache_dir = cache_dir
34         self.apt_caches = {}
35     
36     def extractPath(self, url):
37         """Break the full URI down into the site, base directory and path.
38         
39         Site is the host and port of the mirror. Base directory is the
40         directory to the mirror location (usually just '/debian'). Path is
41         the remaining path to get to the file.
42         
43         E.g. http://ftp.debian.org/debian/dists/sid/binary-i386/Packages.bz2
44         would return ('ftp.debian.org:80', '/debian', 
45         '/dists/sid/binary-i386/Packages.bz2').
46         
47         @param url: the URI of the file's location on the mirror
48         @rtype: (C{string}, C{string}, C{string})
49         @return: the site, base directory and path to the file
50         """
51         # Extract the host and port
52         parsed = urlparse(url)
53         host, port = splitHostPort(parsed[0], parsed[1])
54         site = host + ":" + str(port)
55         path = parsed[2]
56
57         # Try to find the base directory (most can be found this way)
58         i = max(path.rfind('/dists/'), path.rfind('/pool/'))
59         if i >= 0:
60             baseDir = path[:i]
61             path = path[i:]
62         else:
63             # Uh oh, this is not good
64             log.msg("Couldn't find a good base directory for path: %s" % (site + path))
65             
66             # Try to find an existing cache that starts with this one
67             # (fallback to using an empty base directory)
68             baseDir = ''
69             if site in self.apt_caches:
70                 longest_match = 0
71                 for base in self.apt_caches[site]:
72                     base_match = ''
73                     for dirs in path.split('/'):
74                         if base.startswith(base_match + '/' + dirs):
75                             base_match += '/' + dirs
76                         else:
77                             break
78                     if len(base_match) > longest_match:
79                         longest_match = len(base_match)
80                         baseDir = base_match
81             log.msg("Settled on baseDir: %s" % baseDir)
82         
83         return site, baseDir, path
84         
85     def init(self, site, baseDir):
86         """Make sure an L{AptPackages} exists for this mirror."""
87         if site not in self.apt_caches:
88             self.apt_caches[site] = {}
89             
90         if baseDir not in self.apt_caches[site]:
91             site_cache = self.cache_dir.child(aptpkg_dir).child('mirrors').child(site + baseDir.replace('/', '_'))
92             site_cache.makedirs
93             self.apt_caches[site][baseDir] = AptPackages(site_cache)
94     
95     def updatedFile(self, url, file_path):
96         """A file in the mirror has changed or been added.
97         
98         @see: L{AptPackages.PackageFileList.update_file}
99         """
100         site, baseDir, path = self.extractPath(url)
101         self.init(site, baseDir)
102         self.apt_caches[site][baseDir].file_updated(path, file_path)
103
104     def findHash(self, url):
105         """Find the hash for a given url.
106
107         @param url: the URI of the file's location on the mirror
108         @rtype: L{twisted.internet.defer.Deferred}
109         @return: a deferred that will fire with the returned L{Hash.HashObject}
110         """
111         site, baseDir, path = self.extractPath(url)
112         if site in self.apt_caches and baseDir in self.apt_caches[site]:
113             return self.apt_caches[site][baseDir].findHash(path)
114         d = defer.Deferred()
115         d.errback(MirrorError("Site Not Found"))
116         return d
117     
118     def cleanup(self):
119         for site in self.apt_caches.keys():
120             for baseDir in self.apt_caches[site].keys():
121                 self.apt_caches[site][baseDir].cleanup()
122                 del self.apt_caches[site][baseDir]
123             del self.apt_caches[site]
124     
125 class TestMirrorManager(unittest.TestCase):
126     """Unit tests for the mirror manager."""
127     
128     timeout = 20
129     pending_calls = []
130     client = None
131     
132     def setUp(self):
133         self.client = MirrorManager(FilePath('/tmp/.apt-p2p'))
134         
135     def test_extractPath(self):
136         """Test extracting the site and base directory from various mirrors."""
137         site, baseDir, path = self.client.extractPath('http://ftp.us.debian.org/debian/dists/unstable/Release')
138         self.failUnless(site == "ftp.us.debian.org:80", "no match: %s" % site)
139         self.failUnless(baseDir == "/debian", "no match: %s" % baseDir)
140         self.failUnless(path == "/dists/unstable/Release", "no match: %s" % path)
141
142         site, baseDir, path = self.client.extractPath('http://ftp.us.debian.org:16999/debian/pool/d/dpkg/dpkg_1.2.1-1.tar.gz')
143         self.failUnless(site == "ftp.us.debian.org:16999", "no match: %s" % site)
144         self.failUnless(baseDir == "/debian", "no match: %s" % baseDir)
145         self.failUnless(path == "/pool/d/dpkg/dpkg_1.2.1-1.tar.gz", "no match: %s" % path)
146
147         site, baseDir, path = self.client.extractPath('http://debian.camrdale.org/dists/unstable/Release')
148         self.failUnless(site == "debian.camrdale.org:80", "no match: %s" % site)
149         self.failUnless(baseDir == "", "no match: %s" % baseDir)
150         self.failUnless(path == "/dists/unstable/Release", "no match: %s" % path)
151
152     def verifyHash(self, found_hash, path, true_hash):
153         self.failUnless(found_hash.hexexpected() == true_hash, 
154                     "%s hashes don't match: %s != %s" % (path, found_hash.hexexpected(), true_hash))
155
156     def test_findHash(self):
157         """Tests finding the hash of an index file, binary package, source package, and another index file."""
158         # Find the largest index files that are for 'main'
159         self.packagesFile = os.popen('ls -Sr /var/lib/apt/lists/ | grep -E "_main_.*Packages$" | tail -n 1').read().rstrip('\n')
160         self.sourcesFile = os.popen('ls -Sr /var/lib/apt/lists/ | grep -E "_main_.*Sources$" | tail -n 1').read().rstrip('\n')
161         
162         # Find the Release file corresponding to the found Packages file
163         for f in os.walk('/var/lib/apt/lists').next()[2]:
164             if f[-7:] == "Release" and self.packagesFile.startswith(f[:-7]):
165                 self.releaseFile = f
166                 break
167         
168         # Add all the found files to the mirror
169         self.client.updatedFile('http://' + self.releaseFile.replace('_','/'), 
170                                 FilePath('/var/lib/apt/lists/' + self.releaseFile))
171         self.client.updatedFile('http://' + self.releaseFile[:self.releaseFile.find('_dists_')+1].replace('_','/') +
172                                 self.packagesFile[self.packagesFile.find('_dists_')+1:].replace('_','/'), 
173                                 FilePath('/var/lib/apt/lists/' + self.packagesFile))
174         self.client.updatedFile('http://' + self.releaseFile[:self.releaseFile.find('_dists_')+1].replace('_','/') +
175                                 self.sourcesFile[self.sourcesFile.find('_dists_')+1:].replace('_','/'), 
176                                 FilePath('/var/lib/apt/lists/' + self.sourcesFile))
177
178         lastDefer = defer.Deferred()
179         
180         # Lookup a Packages.bz2 file
181         idx_hash = os.popen('grep -A 3000 -E "^SHA1:" ' + 
182                             '/var/lib/apt/lists/' + self.releaseFile + 
183                             ' | grep -E " main/binary-i386/Packages.bz2$"'
184                             ' | head -n 1 | cut -d\  -f 2').read().rstrip('\n')
185         idx_path = 'http://' + self.releaseFile.replace('_','/')[:-7] + 'main/binary-i386/Packages.bz2'
186
187         d = self.client.findHash(idx_path)
188         d.addCallback(self.verifyHash, idx_path, idx_hash)
189
190         # Lookup the binary 'dpkg' package
191         pkg_hash = os.popen('grep -A 30 -E "^Package: dpkg$" ' + 
192                             '/var/lib/apt/lists/' + self.packagesFile + 
193                             ' | grep -E "^SHA1:" | head -n 1' + 
194                             ' | cut -d\  -f 2').read().rstrip('\n')
195         pkg_path = 'http://' + self.releaseFile[:self.releaseFile.find('_dists_')+1].replace('_','/') + \
196                    os.popen('grep -A 30 -E "^Package: dpkg$" ' + 
197                             '/var/lib/apt/lists/' + self.packagesFile + 
198                             ' | grep -E "^Filename:" | head -n 1' + 
199                             ' | cut -d\  -f 2').read().rstrip('\n')
200
201         d = self.client.findHash(pkg_path)
202         d.addCallback(self.verifyHash, pkg_path, pkg_hash)
203
204         # Lookup the source 'dpkg' package
205         src_dir = os.popen('grep -A 30 -E "^Package: dpkg$" ' + 
206                             '/var/lib/apt/lists/' + self.sourcesFile + 
207                             ' | grep -E "^Directory:" | head -n 1' + 
208                             ' | cut -d\  -f 2').read().rstrip('\n')
209         src_hashes = os.popen('grep -A 20 -E "^Package: dpkg$" ' + 
210                             '/var/lib/apt/lists/' + self.sourcesFile + 
211                             ' | grep -A 4 -E "^Files:" | grep -E "^ " ' + 
212                             ' | cut -d\  -f 2').read().split('\n')[:-1]
213         src_paths = os.popen('grep -A 20 -E "^Package: dpkg$" ' + 
214                             '/var/lib/apt/lists/' + self.sourcesFile + 
215                             ' | grep -A 4 -E "^Files:" | grep -E "^ " ' + 
216                             ' | cut -d\  -f 4').read().split('\n')[:-1]
217
218         for i in range(len(src_hashes)):
219             src_path = 'http://' + self.releaseFile[:self.releaseFile.find('_dists_')+1].replace('_','/') + src_dir + '/' + src_paths[i]
220             d = self.client.findHash(src_path)
221             d.addCallback(self.verifyHash, src_path, src_hashes[i])
222             
223         # Lookup a Sources.bz2 file
224         idx_hash = os.popen('grep -A 3000 -E "^SHA1:" ' + 
225                             '/var/lib/apt/lists/' + self.releaseFile + 
226                             ' | grep -E " main/source/Sources.bz2$"'
227                             ' | head -n 1 | cut -d\  -f 2').read().rstrip('\n')
228         idx_path = 'http://' + self.releaseFile.replace('_','/')[:-7] + 'main/source/Sources.bz2'
229
230         d = self.client.findHash(idx_path)
231         d.addCallback(self.verifyHash, idx_path, idx_hash)
232
233         d.addBoth(lastDefer.callback)
234         return lastDefer
235
236     def tearDown(self):
237         for p in self.pending_calls:
238             if p.active():
239                 p.cancel()
240         self.client.cleanup()
241         self.client = None
242