Fix some documentation errors.
[quix0rs-apt-p2p.git] / apt_p2p / MirrorManager.py
1
2 """Manage the multiple mirrors that may be requested.
3
4 @var aptpkg_dir: the name of the directory to use for mirror files
5 """
6
7 from urlparse import urlparse
8 import os
9
10 from twisted.python import log
11 from twisted.python.filepath import FilePath
12 from twisted.internet import defer
13 from twisted.trial import unittest
14 from twisted.web2.http import splitHostPort
15
16 from AptPackages import AptPackages
17
18 aptpkg_dir='apt-packages'
19
20 class MirrorError(Exception):
21     """Exception raised when there's a problem with the mirror."""
22
23 class MirrorManager:
24     """Manages all requests for mirror information.
25     
26     @type cache_dir: L{twisted.python.filepath.FilePath}
27     @ivar cache_dir: the directory to use for storing all files
28     @type apt_caches: C{dictionary}
29     @ivar apt_caches: the avaliable mirrors
30     """
31     
32     def __init__(self, cache_dir):
33         self.cache_dir = cache_dir
34         self.apt_caches = {}
35     
36     def extractPath(self, url):
37         """Break the full URI down into the site, base directory and path.
38         
39         Site is the host and port of the mirror. Base directory is the
40         directory to the mirror location (usually just '/debian'). Path is
41         the remaining path to get to the file.
42         
43         E.g. http://ftp.debian.org/debian/dists/sid/binary-i386/Packages.bz2
44         would return ('ftp.debian.org:80', '/debian', 
45         '/dists/sid/binary-i386/Packages.bz2').
46         
47         @param url: the URI of the file's location on the mirror
48         @rtype: (C{string}, C{string}, C{string})
49         @return: the site, base directory and path to the file
50         """
51         # Extract the host and port
52         parsed = urlparse(url)
53         host, port = splitHostPort(parsed[0], parsed[1])
54         site = host + ":" + str(port)
55         path = parsed[2]
56
57         # Try to find the base directory (most can be found this way)
58         i = max(path.rfind('/dists/'), path.rfind('/pool/'))
59         if i >= 0:
60             baseDir = path[:i]
61             path = path[i:]
62         else:
63             # Uh oh, this is not good
64             log.msg("Couldn't find a good base directory for path: %s" % (site + path))
65             
66             # Try to find an existing cache that starts with this one
67             # (fallback to using an empty base directory)
68             baseDir = ''
69             if site in self.apt_caches:
70                 longest_match = 0
71                 for base in self.apt_caches[site]:
72                     base_match = ''
73                     for dirs in path.split('/'):
74                         if base.startswith(base_match + '/' + dirs):
75                             base_match += '/' + dirs
76                         else:
77                             break
78                     if len(base_match) > longest_match:
79                         longest_match = len(base_match)
80                         baseDir = base_match
81             log.msg("Settled on baseDir: %s" % baseDir)
82         
83         return site, baseDir, path
84         
85     def init(self, site, baseDir):
86         """Make sure an L{AptPackages} exists for this mirror."""
87         if site not in self.apt_caches:
88             self.apt_caches[site] = {}
89             
90         if baseDir not in self.apt_caches[site]:
91             site_cache = self.cache_dir.child(aptpkg_dir).child('mirrors').child(site + baseDir.replace('/', '_'))
92             site_cache.makedirs
93             self.apt_caches[site][baseDir] = AptPackages(site_cache)
94     
95     def updatedFile(self, url, file_path):
96         """A file in the mirror has changed or been added.
97         
98         @see: L{AptPackages.PackageFileList.update_file}
99         """
100         site, baseDir, path = self.extractPath(url)
101         self.init(site, baseDir)
102         self.apt_caches[site][baseDir].file_updated(path, file_path)
103
104     def findHash(self, url):
105         """Find the hash for a given url.
106
107         @param url: the URI of the file's location on the mirror
108         @rtype: L{twisted.internet.defer.Deferred}
109         @return: a deferred that will fire with the returned L{Hash.HashObject}
110         """
111         site, baseDir, path = self.extractPath(url)
112         self.init(site, baseDir)
113         if site in self.apt_caches and baseDir in self.apt_caches[site]:
114             return self.apt_caches[site][baseDir].findHash(path)
115         return defer.fail(MirrorError("Site Not Found"))
116     
117     def cleanup(self):
118         for site in self.apt_caches.keys():
119             for baseDir in self.apt_caches[site].keys():
120                 self.apt_caches[site][baseDir].cleanup()
121                 del self.apt_caches[site][baseDir]
122             del self.apt_caches[site]
123     
124 class TestMirrorManager(unittest.TestCase):
125     """Unit tests for the mirror manager."""
126     
127     timeout = 20
128     pending_calls = []
129     client = None
130     
131     def setUp(self):
132         self.client = MirrorManager(FilePath('/tmp/.apt-p2p'))
133         
134     def test_extractPath(self):
135         """Test extracting the site and base directory from various mirrors."""
136         site, baseDir, path = self.client.extractPath('http://ftp.us.debian.org/debian/dists/unstable/Release')
137         self.failUnless(site == "ftp.us.debian.org:80", "no match: %s" % site)
138         self.failUnless(baseDir == "/debian", "no match: %s" % baseDir)
139         self.failUnless(path == "/dists/unstable/Release", "no match: %s" % path)
140
141         site, baseDir, path = self.client.extractPath('http://ftp.us.debian.org:16999/debian/pool/d/dpkg/dpkg_1.2.1-1.tar.gz')
142         self.failUnless(site == "ftp.us.debian.org:16999", "no match: %s" % site)
143         self.failUnless(baseDir == "/debian", "no match: %s" % baseDir)
144         self.failUnless(path == "/pool/d/dpkg/dpkg_1.2.1-1.tar.gz", "no match: %s" % path)
145
146         site, baseDir, path = self.client.extractPath('http://debian.camrdale.org/dists/unstable/Release')
147         self.failUnless(site == "debian.camrdale.org:80", "no match: %s" % site)
148         self.failUnless(baseDir == "", "no match: %s" % baseDir)
149         self.failUnless(path == "/dists/unstable/Release", "no match: %s" % path)
150
151     def verifyHash(self, found_hash, path, true_hash):
152         self.failUnless(found_hash.hexexpected() == true_hash, 
153                     "%s hashes don't match: %s != %s" % (path, found_hash.hexexpected(), true_hash))
154
155     def test_findHash(self):
156         """Tests finding the hash of an index file, binary package, source package, and another index file."""
157         # Find the largest index files that are for 'main'
158         self.packagesFile = os.popen('ls -Sr /var/lib/apt/lists/ | grep -E "_main_.*Packages$" | tail -n 1').read().rstrip('\n')
159         self.sourcesFile = os.popen('ls -Sr /var/lib/apt/lists/ | grep -E "_main_.*Sources$" | tail -n 1').read().rstrip('\n')
160         
161         # Find the Release file corresponding to the found Packages file
162         for f in os.walk('/var/lib/apt/lists').next()[2]:
163             if f[-7:] == "Release" and self.packagesFile.startswith(f[:-7]):
164                 self.releaseFile = f
165                 break
166         
167         # Add all the found files to the mirror
168         self.client.updatedFile('http://' + self.releaseFile.replace('_','/'), 
169                                 FilePath('/var/lib/apt/lists/' + self.releaseFile))
170         self.client.updatedFile('http://' + self.releaseFile[:self.releaseFile.find('_dists_')+1].replace('_','/') +
171                                 self.packagesFile[self.packagesFile.find('_dists_')+1:].replace('_','/'), 
172                                 FilePath('/var/lib/apt/lists/' + self.packagesFile))
173         self.client.updatedFile('http://' + self.releaseFile[:self.releaseFile.find('_dists_')+1].replace('_','/') +
174                                 self.sourcesFile[self.sourcesFile.find('_dists_')+1:].replace('_','/'), 
175                                 FilePath('/var/lib/apt/lists/' + self.sourcesFile))
176
177         lastDefer = defer.Deferred()
178         
179         # Lookup a Packages.bz2 file
180         idx_hash = os.popen('grep -A 3000 -E "^SHA1:" ' + 
181                             '/var/lib/apt/lists/' + self.releaseFile + 
182                             ' | grep -E " main/binary-i386/Packages.bz2$"'
183                             ' | head -n 1 | cut -d\  -f 2').read().rstrip('\n')
184         idx_path = 'http://' + self.releaseFile.replace('_','/')[:-7] + 'main/binary-i386/Packages.bz2'
185
186         d = self.client.findHash(idx_path)
187         d.addCallback(self.verifyHash, idx_path, idx_hash)
188
189         # Lookup the binary 'dpkg' package
190         pkg_hash = os.popen('grep -A 30 -E "^Package: dpkg$" ' + 
191                             '/var/lib/apt/lists/' + self.packagesFile + 
192                             ' | grep -E "^SHA1:" | head -n 1' + 
193                             ' | cut -d\  -f 2').read().rstrip('\n')
194         pkg_path = 'http://' + self.releaseFile[:self.releaseFile.find('_dists_')+1].replace('_','/') + \
195                    os.popen('grep -A 30 -E "^Package: dpkg$" ' + 
196                             '/var/lib/apt/lists/' + self.packagesFile + 
197                             ' | grep -E "^Filename:" | head -n 1' + 
198                             ' | cut -d\  -f 2').read().rstrip('\n')
199
200         d = self.client.findHash(pkg_path)
201         d.addCallback(self.verifyHash, pkg_path, pkg_hash)
202
203         # Lookup the source 'dpkg' package
204         src_dir = os.popen('grep -A 30 -E "^Package: dpkg$" ' + 
205                             '/var/lib/apt/lists/' + self.sourcesFile + 
206                             ' | grep -E "^Directory:" | head -n 1' + 
207                             ' | cut -d\  -f 2').read().rstrip('\n')
208         src_hashes = os.popen('grep -A 20 -E "^Package: dpkg$" ' + 
209                             '/var/lib/apt/lists/' + self.sourcesFile + 
210                             ' | grep -A 4 -E "^Files:" | grep -E "^ " ' + 
211                             ' | cut -d\  -f 2').read().split('\n')[:-1]
212         src_paths = os.popen('grep -A 20 -E "^Package: dpkg$" ' + 
213                             '/var/lib/apt/lists/' + self.sourcesFile + 
214                             ' | grep -A 4 -E "^Files:" | grep -E "^ " ' + 
215                             ' | cut -d\  -f 4').read().split('\n')[:-1]
216
217         for i in range(len(src_hashes)):
218             src_path = 'http://' + self.releaseFile[:self.releaseFile.find('_dists_')+1].replace('_','/') + src_dir + '/' + src_paths[i]
219             d = self.client.findHash(src_path)
220             d.addCallback(self.verifyHash, src_path, src_hashes[i])
221             
222         # Lookup a Sources.bz2 file
223         idx_hash = os.popen('grep -A 3000 -E "^SHA1:" ' + 
224                             '/var/lib/apt/lists/' + self.releaseFile + 
225                             ' | grep -E " main/source/Sources.bz2$"'
226                             ' | head -n 1 | cut -d\  -f 2').read().rstrip('\n')
227         idx_path = 'http://' + self.releaseFile.replace('_','/')[:-7] + 'main/source/Sources.bz2'
228
229         d = self.client.findHash(idx_path)
230         d.addCallback(self.verifyHash, idx_path, idx_hash)
231
232         d.addBoth(lastDefer.callback)
233         return lastDefer
234
235     def tearDown(self):
236         for p in self.pending_calls:
237             if p.active():
238                 p.cancel()
239         self.client.cleanup()
240         self.client = None
241