bccb2e53ff4bd3bec7fd6f83415163eb2308c13d
[quix0rs-apt-p2p.git] / apt_dht / MirrorManager.py
1
2 """Manage the multiple mirrors that may be requested.
3
4 @var aptpkg_dir: the name of the directory to use for mirror files
5 """
6
7 from urlparse import urlparse
8 import os
9
10 from twisted.python import log
11 from twisted.python.filepath import FilePath
12 from twisted.internet import defer
13 from twisted.trial import unittest
14 from twisted.web2.http import splitHostPort
15
16 from AptPackages import AptPackages
17
18 aptpkg_dir='apt-packages'
19
20 class MirrorError(Exception):
21     """Exception raised when there's a problem with the mirror."""
22
23 class MirrorManager:
24     """Manages all requests for mirror information.
25     
26     @type cache_dir: L{twisted.python.filepath.FilePath}
27     @ivar cache_dir: the directory to use for storing all files
28     @type unload_delay: C{int}
29     @ivar unload_delay: the time to wait before unloading the apt cache
30     @type apt_caches: C{dictionary}
31     @ivar apt_caches: the avaliable mirrors
32     """
33     
34     def __init__(self, cache_dir, unload_delay):
35         self.cache_dir = cache_dir
36         self.unload_delay = unload_delay
37         self.apt_caches = {}
38     
39     def extractPath(self, url):
40         """Break the full URI down into the site, base directory and path.
41         
42         Site is the host and port of the mirror. Base directory is the
43         directory to the mirror location (usually just '/debian'). Path is
44         the remaining path to get to the file.
45         
46         E.g. http://ftp.debian.org/debian/dists/sid/binary-i386/Packages.bz2
47         would return ('ftp.debian.org:80', '/debian', 
48         '/dists/sid/binary-i386/Packages.bz2').
49         
50         @param url: the URI of the file's location on the mirror
51         @rtype: (C{string}, C{string}, C{string})
52         @return: the site, base directory and path to the file
53         """
54         # Extract the host and port
55         parsed = urlparse(url)
56         host, port = splitHostPort(parsed[0], parsed[1])
57         site = host + ":" + str(port)
58         path = parsed[2]
59
60         # Try to find the base directory (most can be found this way)
61         i = max(path.rfind('/dists/'), path.rfind('/pool/'))
62         if i >= 0:
63             baseDir = path[:i]
64             path = path[i:]
65         else:
66             # Uh oh, this is not good
67             log.msg("Couldn't find a good base directory for path: %s" % (site + path))
68             
69             # Try to find an existing cache that starts with this one
70             # (fallback to using an empty base directory)
71             baseDir = ''
72             if site in self.apt_caches:
73                 longest_match = 0
74                 for base in self.apt_caches[site]:
75                     base_match = ''
76                     for dirs in path.split('/'):
77                         if base.startswith(base_match + '/' + dirs):
78                             base_match += '/' + dirs
79                         else:
80                             break
81                     if len(base_match) > longest_match:
82                         longest_match = len(base_match)
83                         baseDir = base_match
84             log.msg("Settled on baseDir: %s" % baseDir)
85         
86         return site, baseDir, path
87         
88     def init(self, site, baseDir):
89         """Make sure an L{AptPackages} exists for this mirror."""
90         if site not in self.apt_caches:
91             self.apt_caches[site] = {}
92             
93         if baseDir not in self.apt_caches[site]:
94             site_cache = self.cache_dir.child(aptpkg_dir).child('mirrors').child(site + baseDir.replace('/', '_'))
95             site_cache.makedirs
96             self.apt_caches[site][baseDir] = AptPackages(site_cache, self.unload_delay)
97     
98     def updatedFile(self, url, file_path):
99         """A file in the mirror has changed or been added.
100         
101         @see: L{AptPackages.PackageFileList.update_file}
102         """
103         site, baseDir, path = self.extractPath(url)
104         self.init(site, baseDir)
105         self.apt_caches[site][baseDir].file_updated(path, file_path)
106
107     def findHash(self, url):
108         """Find the hash for a given url.
109
110         @param url: the URI of the file's location on the mirror
111         @rtype: L{twisted.internet.defer.Deferred}
112         @return: a deferred that will fire with the returned L{Hash.HashObject}
113         """
114         site, baseDir, path = self.extractPath(url)
115         if site in self.apt_caches and baseDir in self.apt_caches[site]:
116             return self.apt_caches[site][baseDir].findHash(path)
117         d = defer.Deferred()
118         d.errback(MirrorError("Site Not Found"))
119         return d
120     
121     def cleanup(self):
122         for site in self.apt_caches.keys():
123             for baseDir in self.apt_caches[site].keys():
124                 self.apt_caches[site][baseDir].cleanup()
125                 del self.apt_caches[site][baseDir]
126             del self.apt_caches[site]
127     
128 class TestMirrorManager(unittest.TestCase):
129     """Unit tests for the mirror manager."""
130     
131     timeout = 20
132     pending_calls = []
133     client = None
134     
135     def setUp(self):
136         self.client = MirrorManager(FilePath('/tmp/.apt-dht'), 300)
137         
138     def test_extractPath(self):
139         """Test extracting the site and base directory from various mirrors."""
140         site, baseDir, path = self.client.extractPath('http://ftp.us.debian.org/debian/dists/unstable/Release')
141         self.failUnless(site == "ftp.us.debian.org:80", "no match: %s" % site)
142         self.failUnless(baseDir == "/debian", "no match: %s" % baseDir)
143         self.failUnless(path == "/dists/unstable/Release", "no match: %s" % path)
144
145         site, baseDir, path = self.client.extractPath('http://ftp.us.debian.org:16999/debian/pool/d/dpkg/dpkg_1.2.1-1.tar.gz')
146         self.failUnless(site == "ftp.us.debian.org:16999", "no match: %s" % site)
147         self.failUnless(baseDir == "/debian", "no match: %s" % baseDir)
148         self.failUnless(path == "/pool/d/dpkg/dpkg_1.2.1-1.tar.gz", "no match: %s" % path)
149
150         site, baseDir, path = self.client.extractPath('http://debian.camrdale.org/dists/unstable/Release')
151         self.failUnless(site == "debian.camrdale.org:80", "no match: %s" % site)
152         self.failUnless(baseDir == "", "no match: %s" % baseDir)
153         self.failUnless(path == "/dists/unstable/Release", "no match: %s" % path)
154
155     def verifyHash(self, found_hash, path, true_hash):
156         self.failUnless(found_hash.hexexpected() == true_hash, 
157                     "%s hashes don't match: %s != %s" % (path, found_hash.hexexpected(), true_hash))
158
159     def test_findHash(self):
160         """Tests finding the hash of an index file, binary package, source package, and another index file."""
161         # Find the largest index files that are for 'main'
162         self.packagesFile = os.popen('ls -Sr /var/lib/apt/lists/ | grep -E "_main_.*Packages$" | tail -n 1').read().rstrip('\n')
163         self.sourcesFile = os.popen('ls -Sr /var/lib/apt/lists/ | grep -E "_main_.*Sources$" | tail -n 1').read().rstrip('\n')
164         
165         # Find the Release file corresponding to the found Packages file
166         for f in os.walk('/var/lib/apt/lists').next()[2]:
167             if f[-7:] == "Release" and self.packagesFile.startswith(f[:-7]):
168                 self.releaseFile = f
169                 break
170         
171         # Add all the found files to the mirror
172         self.client.updatedFile('http://' + self.releaseFile.replace('_','/'), 
173                                 FilePath('/var/lib/apt/lists/' + self.releaseFile))
174         self.client.updatedFile('http://' + self.releaseFile[:self.releaseFile.find('_dists_')+1].replace('_','/') +
175                                 self.packagesFile[self.packagesFile.find('_dists_')+1:].replace('_','/'), 
176                                 FilePath('/var/lib/apt/lists/' + self.packagesFile))
177         self.client.updatedFile('http://' + self.releaseFile[:self.releaseFile.find('_dists_')+1].replace('_','/') +
178                                 self.sourcesFile[self.sourcesFile.find('_dists_')+1:].replace('_','/'), 
179                                 FilePath('/var/lib/apt/lists/' + self.sourcesFile))
180
181         lastDefer = defer.Deferred()
182         
183         # Lookup a Packages.bz2 file
184         idx_hash = os.popen('grep -A 3000 -E "^SHA1:" ' + 
185                             '/var/lib/apt/lists/' + self.releaseFile + 
186                             ' | grep -E " main/binary-i386/Packages.bz2$"'
187                             ' | head -n 1 | cut -d\  -f 2').read().rstrip('\n')
188         idx_path = 'http://' + self.releaseFile.replace('_','/')[:-7] + 'main/binary-i386/Packages.bz2'
189
190         d = self.client.findHash(idx_path)
191         d.addCallback(self.verifyHash, idx_path, idx_hash)
192
193         # Lookup the binary 'dpkg' package
194         pkg_hash = os.popen('grep -A 30 -E "^Package: dpkg$" ' + 
195                             '/var/lib/apt/lists/' + self.packagesFile + 
196                             ' | grep -E "^SHA1:" | head -n 1' + 
197                             ' | cut -d\  -f 2').read().rstrip('\n')
198         pkg_path = 'http://' + self.releaseFile[:self.releaseFile.find('_dists_')+1].replace('_','/') + \
199                    os.popen('grep -A 30 -E "^Package: dpkg$" ' + 
200                             '/var/lib/apt/lists/' + self.packagesFile + 
201                             ' | grep -E "^Filename:" | head -n 1' + 
202                             ' | cut -d\  -f 2').read().rstrip('\n')
203
204         d = self.client.findHash(pkg_path)
205         d.addCallback(self.verifyHash, pkg_path, pkg_hash)
206
207         # Lookup the source 'dpkg' package
208         src_dir = os.popen('grep -A 30 -E "^Package: dpkg$" ' + 
209                             '/var/lib/apt/lists/' + self.sourcesFile + 
210                             ' | grep -E "^Directory:" | head -n 1' + 
211                             ' | cut -d\  -f 2').read().rstrip('\n')
212         src_hashes = os.popen('grep -A 20 -E "^Package: dpkg$" ' + 
213                             '/var/lib/apt/lists/' + self.sourcesFile + 
214                             ' | grep -A 4 -E "^Files:" | grep -E "^ " ' + 
215                             ' | cut -d\  -f 2').read().split('\n')[:-1]
216         src_paths = os.popen('grep -A 20 -E "^Package: dpkg$" ' + 
217                             '/var/lib/apt/lists/' + self.sourcesFile + 
218                             ' | grep -A 4 -E "^Files:" | grep -E "^ " ' + 
219                             ' | cut -d\  -f 4').read().split('\n')[:-1]
220
221         for i in range(len(src_hashes)):
222             src_path = 'http://' + self.releaseFile[:self.releaseFile.find('_dists_')+1].replace('_','/') + src_dir + '/' + src_paths[i]
223             d = self.client.findHash(src_path)
224             d.addCallback(self.verifyHash, src_path, src_hashes[i])
225             
226         # Lookup a Sources.bz2 file
227         idx_hash = os.popen('grep -A 3000 -E "^SHA1:" ' + 
228                             '/var/lib/apt/lists/' + self.releaseFile + 
229                             ' | grep -E " main/source/Sources.bz2$"'
230                             ' | head -n 1 | cut -d\  -f 2').read().rstrip('\n')
231         idx_path = 'http://' + self.releaseFile.replace('_','/')[:-7] + 'main/source/Sources.bz2'
232
233         d = self.client.findHash(idx_path)
234         d.addCallback(self.verifyHash, idx_path, idx_hash)
235
236         d.addBoth(lastDefer.callback)
237         return lastDefer
238
239     def tearDown(self):
240         for p in self.pending_calls:
241             if p.active():
242                 p.cancel()
243         self.client.cleanup()
244         self.client = None
245