79f577ba6d8dc3e4cf8dbcafb5b978245c09b84b
[quix0rs-apt-p2p.git] / apt_dht / MirrorManager.py
1
2 """Manage the multiple mirrors that may be requested.
3
4 @var aptpkg_dir: the name of the directory to use for mirror files
5 """
6
7 from urlparse import urlparse
8 import os
9
10 from twisted.python import log
11 from twisted.python.filepath import FilePath
12 from twisted.internet import defer
13 from twisted.trial import unittest
14 from twisted.web2.http import splitHostPort
15
16 from AptPackages import AptPackages
17
18 aptpkg_dir='apt-packages'
19
20 class MirrorError(Exception):
21     """Exception raised when there's a problem with the mirror."""
22
23 class MirrorManager:
24     """Manages all requests for mirror information.
25     
26     @type cache_dir: L{twisted.python.filepath.FilePath}
27     @ivar cache_dir: the directory to use for storing all files
28     @type unload_delay: C{int}
29     @ivar unload_delay: the time to wait before unloading the apt cache
30     @type apt_caches: C{dictionary}
31     @ivar apt_caches: the avaliable mirrors
32     """
33     
34     def __init__(self, cache_dir, unload_delay):
35         self.cache_dir = cache_dir
36         self.unload_delay = unload_delay
37         self.apt_caches = {}
38     
39     def extractPath(self, url):
40         """Break the full URI down into the site, base directory and path.
41         
42         Site is the host and port of the mirror. Base directory is the
43         directory to the mirror location (usually just '/debian'). Path is
44         the remaining path to get to the file.
45         
46         E.g. http://ftp.debian.org/debian/dists/sid/binary-i386/Packages.bz2
47         would return ('ftp.debian.org:80', '/debian', 
48         '/dists/sid/binary-i386/Packages.bz2').
49         
50         @param url: the URI of the file's location on the mirror
51         @rtype: (C{string}, C{string}, C{string})
52         @return: the site, base directory and path to the file
53         """
54         # Extract the host and port
55         parsed = urlparse(url)
56         host, port = splitHostPort(parsed[0], parsed[1])
57         site = host + ":" + str(port)
58         path = parsed[2]
59
60         # Try to find the base directory (most can be found this way)
61         i = max(path.rfind('/dists/'), path.rfind('/pool/'))
62         if i >= 0:
63             baseDir = path[:i]
64             path = path[i:]
65         else:
66             # Uh oh, this is not good
67             log.msg("Couldn't find a good base directory for path: %s" % (site + path))
68             
69             # Try to find an existing cache that starts with this one
70             # (fallback to using an empty base directory)
71             baseDir = ''
72             if site in self.apt_caches:
73                 longest_match = 0
74                 for base in self.apt_caches[site]:
75                     base_match = ''
76                     for dirs in path.split('/'):
77                         if base.startswith(base_match + '/' + dirs):
78                             base_match += '/' + dirs
79                         else:
80                             break
81                     if len(base_match) > longest_match:
82                         longest_match = len(base_match)
83                         baseDir = base_match
84             log.msg("Settled on baseDir: %s" % baseDir)
85         
86         return site, baseDir, path
87         
88     def init(self, site, baseDir):
89         """Make sure an L{AptPackages} exists for this mirror."""
90         if site not in self.apt_caches:
91             self.apt_caches[site] = {}
92             
93         if baseDir not in self.apt_caches[site]:
94             site_cache = self.cache_dir.child(aptpkg_dir).child('mirrors').child(site + baseDir.replace('/', '_'))
95             site_cache.makedirs
96             self.apt_caches[site][baseDir] = AptPackages(site_cache, self.unload_delay)
97     
98     def updatedFile(self, url, file_path):
99         """A file in the mirror has changed or been added.
100         
101         @see: L{AptPackages.PackageFileList.update_file}
102         """
103         site, baseDir, path = self.extractPath(url)
104         self.init(site, baseDir)
105         self.apt_caches[site][baseDir].file_updated(path, file_path)
106
107     def findHash(self, url):
108         """Find the hash for a given url.
109
110         @param url: the URI of the file's location on the mirror
111         @rtype: L{twisted.internet.defer.Deferred}
112         @return: a deferred that will fire with the returned L{Hash.HashObject}
113         """
114         site, baseDir, path = self.extractPath(url)
115         if site in self.apt_caches and baseDir in self.apt_caches[site]:
116             return self.apt_caches[site][baseDir].findHash(path)
117         d = defer.Deferred()
118         d.errback(MirrorError("Site Not Found"))
119         return d
120     
121 class TestMirrorManager(unittest.TestCase):
122     """Unit tests for the mirror manager."""
123     
124     timeout = 20
125     pending_calls = []
126     client = None
127     
128     def setUp(self):
129         self.client = MirrorManager(FilePath('/tmp/.apt-dht'), 300)
130         
131     def test_extractPath(self):
132         """Test extracting the site and base directory from various mirrors."""
133         site, baseDir, path = self.client.extractPath('http://ftp.us.debian.org/debian/dists/unstable/Release')
134         self.failUnless(site == "ftp.us.debian.org:80", "no match: %s" % site)
135         self.failUnless(baseDir == "/debian", "no match: %s" % baseDir)
136         self.failUnless(path == "/dists/unstable/Release", "no match: %s" % path)
137
138         site, baseDir, path = self.client.extractPath('http://ftp.us.debian.org:16999/debian/pool/d/dpkg/dpkg_1.2.1-1.tar.gz')
139         self.failUnless(site == "ftp.us.debian.org:16999", "no match: %s" % site)
140         self.failUnless(baseDir == "/debian", "no match: %s" % baseDir)
141         self.failUnless(path == "/pool/d/dpkg/dpkg_1.2.1-1.tar.gz", "no match: %s" % path)
142
143         site, baseDir, path = self.client.extractPath('http://debian.camrdale.org/dists/unstable/Release')
144         self.failUnless(site == "debian.camrdale.org:80", "no match: %s" % site)
145         self.failUnless(baseDir == "", "no match: %s" % baseDir)
146         self.failUnless(path == "/dists/unstable/Release", "no match: %s" % path)
147
148     def verifyHash(self, found_hash, path, true_hash):
149         self.failUnless(found_hash.hexexpected() == true_hash, 
150                     "%s hashes don't match: %s != %s" % (path, found_hash.hexexpected(), true_hash))
151
152     def test_findHash(self):
153         """Tests finding the hash of an index file, binary package, source package, and another index file."""
154         # Find the largest index files that are for 'main'
155         self.packagesFile = os.popen('ls -Sr /var/lib/apt/lists/ | grep -E "_main_.*Packages$" | tail -n 1').read().rstrip('\n')
156         self.sourcesFile = os.popen('ls -Sr /var/lib/apt/lists/ | grep -E "_main_.*Sources$" | tail -n 1').read().rstrip('\n')
157         
158         # Find the Release file corresponding to the found Packages file
159         for f in os.walk('/var/lib/apt/lists').next()[2]:
160             if f[-7:] == "Release" and self.packagesFile.startswith(f[:-7]):
161                 self.releaseFile = f
162                 break
163         
164         # Add all the found files to the mirror
165         self.client.updatedFile('http://' + self.releaseFile.replace('_','/'), 
166                                 FilePath('/var/lib/apt/lists/' + self.releaseFile))
167         self.client.updatedFile('http://' + self.releaseFile[:self.releaseFile.find('_dists_')+1].replace('_','/') +
168                                 self.packagesFile[self.packagesFile.find('_dists_')+1:].replace('_','/'), 
169                                 FilePath('/var/lib/apt/lists/' + self.packagesFile))
170         self.client.updatedFile('http://' + self.releaseFile[:self.releaseFile.find('_dists_')+1].replace('_','/') +
171                                 self.sourcesFile[self.sourcesFile.find('_dists_')+1:].replace('_','/'), 
172                                 FilePath('/var/lib/apt/lists/' + self.sourcesFile))
173
174         lastDefer = defer.Deferred()
175         
176         # Lookup a Packages.bz2 file
177         idx_hash = os.popen('grep -A 3000 -E "^SHA1:" ' + 
178                             '/var/lib/apt/lists/' + self.releaseFile + 
179                             ' | grep -E " main/binary-i386/Packages.bz2$"'
180                             ' | head -n 1 | cut -d\  -f 2').read().rstrip('\n')
181         idx_path = 'http://' + self.releaseFile.replace('_','/')[:-7] + 'main/binary-i386/Packages.bz2'
182
183         d = self.client.findHash(idx_path)
184         d.addCallback(self.verifyHash, idx_path, idx_hash)
185
186         # Lookup the binary 'dpkg' package
187         pkg_hash = os.popen('grep -A 30 -E "^Package: dpkg$" ' + 
188                             '/var/lib/apt/lists/' + self.packagesFile + 
189                             ' | grep -E "^SHA1:" | head -n 1' + 
190                             ' | cut -d\  -f 2').read().rstrip('\n')
191         pkg_path = 'http://' + self.releaseFile[:self.releaseFile.find('_dists_')+1].replace('_','/') + \
192                    os.popen('grep -A 30 -E "^Package: dpkg$" ' + 
193                             '/var/lib/apt/lists/' + self.packagesFile + 
194                             ' | grep -E "^Filename:" | head -n 1' + 
195                             ' | cut -d\  -f 2').read().rstrip('\n')
196
197         d = self.client.findHash(pkg_path)
198         d.addCallback(self.verifyHash, pkg_path, pkg_hash)
199
200         # Lookup the source 'dpkg' package
201         src_dir = os.popen('grep -A 30 -E "^Package: dpkg$" ' + 
202                             '/var/lib/apt/lists/' + self.sourcesFile + 
203                             ' | grep -E "^Directory:" | head -n 1' + 
204                             ' | cut -d\  -f 2').read().rstrip('\n')
205         src_hashes = os.popen('grep -A 20 -E "^Package: dpkg$" ' + 
206                             '/var/lib/apt/lists/' + self.sourcesFile + 
207                             ' | grep -A 4 -E "^Files:" | grep -E "^ " ' + 
208                             ' | cut -d\  -f 2').read().split('\n')[:-1]
209         src_paths = os.popen('grep -A 20 -E "^Package: dpkg$" ' + 
210                             '/var/lib/apt/lists/' + self.sourcesFile + 
211                             ' | grep -A 4 -E "^Files:" | grep -E "^ " ' + 
212                             ' | cut -d\  -f 4').read().split('\n')[:-1]
213
214         for i in range(len(src_hashes)):
215             src_path = 'http://' + self.releaseFile[:self.releaseFile.find('_dists_')+1].replace('_','/') + src_dir + '/' + src_paths[i]
216             d = self.client.findHash(src_path)
217             d.addCallback(self.verifyHash, src_path, src_hashes[i])
218             
219         # Lookup a Sources.bz2 file
220         idx_hash = os.popen('grep -A 3000 -E "^SHA1:" ' + 
221                             '/var/lib/apt/lists/' + self.releaseFile + 
222                             ' | grep -E " main/source/Sources.bz2$"'
223                             ' | head -n 1 | cut -d\  -f 2').read().rstrip('\n')
224         idx_path = 'http://' + self.releaseFile.replace('_','/')[:-7] + 'main/source/Sources.bz2'
225
226         d = self.client.findHash(idx_path)
227         d.addCallback(self.verifyHash, idx_path, idx_hash)
228
229         d.addBoth(lastDefer.callback)
230         return lastDefer
231
232     def tearDown(self):
233         for p in self.pending_calls:
234             if p.active():
235                 p.cancel()
236         self.client = None
237