Add cleanup to the MirrorManager for the AptPackages.
[quix0rs-apt-p2p.git] / apt_dht / MirrorManager.py
1
2 from urlparse import urlparse
3 import os
4
5 from twisted.python import log
6 from twisted.python.filepath import FilePath
7 from twisted.internet import defer
8 from twisted.trial import unittest
9 from twisted.web2.http import splitHostPort
10
11 from AptPackages import AptPackages
12
13 aptpkg_dir='apt-packages'
14
15 class MirrorError(Exception):
16     """Exception raised when there's a problem with the mirror."""
17
18 class MirrorManager:
19     """Manages all requests for mirror objects."""
20     
21     def __init__(self, cache_dir, unload_delay):
22         self.cache_dir = cache_dir
23         self.unload_delay = unload_delay
24         self.apt_caches = {}
25     
26     def extractPath(self, url):
27         parsed = urlparse(url)
28         host, port = splitHostPort(parsed[0], parsed[1])
29         site = host + ":" + str(port)
30         path = parsed[2]
31             
32         i = max(path.rfind('/dists/'), path.rfind('/pool/'))
33         if i >= 0:
34             baseDir = path[:i]
35             path = path[i:]
36         else:
37             # Uh oh, this is not good
38             log.msg("Couldn't find a good base directory for path: %s" % (site + path))
39             baseDir = ''
40             if site in self.apt_caches:
41                 longest_match = 0
42                 for base in self.apt_caches[site]:
43                     base_match = ''
44                     for dirs in path.split('/'):
45                         if base.startswith(base_match + '/' + dirs):
46                             base_match += '/' + dirs
47                         else:
48                             break
49                     if len(base_match) > longest_match:
50                         longest_match = len(base_match)
51                         baseDir = base_match
52             log.msg("Settled on baseDir: %s" % baseDir)
53         
54         return site, baseDir, path
55         
56     def init(self, site, baseDir):
57         if site not in self.apt_caches:
58             self.apt_caches[site] = {}
59             
60         if baseDir not in self.apt_caches[site]:
61             site_cache = self.cache_dir.child(aptpkg_dir).child('mirrors').child(site + baseDir.replace('/', '_'))
62             site_cache.makedirs
63             self.apt_caches[site][baseDir] = AptPackages(site_cache, self.unload_delay)
64     
65     def updatedFile(self, url, file_path):
66         site, baseDir, path = self.extractPath(url)
67         self.init(site, baseDir)
68         self.apt_caches[site][baseDir].file_updated(path, file_path)
69
70     def findHash(self, url):
71         site, baseDir, path = self.extractPath(url)
72         if site in self.apt_caches and baseDir in self.apt_caches[site]:
73             return self.apt_caches[site][baseDir].findHash(path)
74         d = defer.Deferred()
75         d.errback(MirrorError("Site Not Found"))
76         return d
77     
78     def cleanup(self):
79         for site in self.apt_caches.keys():
80             for baseDir in self.apt_caches[site].keys():
81                 self.apt_caches[site][baseDir].cleanup()
82                 del self.apt_caches[site][baseDir]
83             del self.apt_caches[site]
84     
85 class TestMirrorManager(unittest.TestCase):
86     """Unit tests for the mirror manager."""
87     
88     timeout = 20
89     pending_calls = []
90     client = None
91     
92     def setUp(self):
93         self.client = MirrorManager(FilePath('/tmp/.apt-dht'), 300)
94         
95     def test_extractPath(self):
96         site, baseDir, path = self.client.extractPath('http://ftp.us.debian.org/debian/dists/unstable/Release')
97         self.failUnless(site == "ftp.us.debian.org:80", "no match: %s" % site)
98         self.failUnless(baseDir == "/debian", "no match: %s" % baseDir)
99         self.failUnless(path == "/dists/unstable/Release", "no match: %s" % path)
100
101         site, baseDir, path = self.client.extractPath('http://ftp.us.debian.org:16999/debian/pool/d/dpkg/dpkg_1.2.1-1.tar.gz')
102         self.failUnless(site == "ftp.us.debian.org:16999", "no match: %s" % site)
103         self.failUnless(baseDir == "/debian", "no match: %s" % baseDir)
104         self.failUnless(path == "/pool/d/dpkg/dpkg_1.2.1-1.tar.gz", "no match: %s" % path)
105
106         site, baseDir, path = self.client.extractPath('http://debian.camrdale.org/dists/unstable/Release')
107         self.failUnless(site == "debian.camrdale.org:80", "no match: %s" % site)
108         self.failUnless(baseDir == "", "no match: %s" % baseDir)
109         self.failUnless(path == "/dists/unstable/Release", "no match: %s" % path)
110
111     def verifyHash(self, found_hash, path, true_hash):
112         self.failUnless(found_hash.hexexpected() == true_hash, 
113                     "%s hashes don't match: %s != %s" % (path, found_hash.hexexpected(), true_hash))
114
115     def test_findHash(self):
116         self.packagesFile = os.popen('ls -Sr /var/lib/apt/lists/ | grep -E "_main_.*Packages$" | tail -n 1').read().rstrip('\n')
117         self.sourcesFile = os.popen('ls -Sr /var/lib/apt/lists/ | grep -E "_main_.*Sources$" | tail -n 1').read().rstrip('\n')
118         for f in os.walk('/var/lib/apt/lists').next()[2]:
119             if f[-7:] == "Release" and self.packagesFile.startswith(f[:-7]):
120                 self.releaseFile = f
121                 break
122         
123         self.client.updatedFile('http://' + self.releaseFile.replace('_','/'), 
124                                 FilePath('/var/lib/apt/lists/' + self.releaseFile))
125         self.client.updatedFile('http://' + self.releaseFile[:self.releaseFile.find('_dists_')+1].replace('_','/') +
126                                 self.packagesFile[self.packagesFile.find('_dists_')+1:].replace('_','/'), 
127                                 FilePath('/var/lib/apt/lists/' + self.packagesFile))
128         self.client.updatedFile('http://' + self.releaseFile[:self.releaseFile.find('_dists_')+1].replace('_','/') +
129                                 self.sourcesFile[self.sourcesFile.find('_dists_')+1:].replace('_','/'), 
130                                 FilePath('/var/lib/apt/lists/' + self.sourcesFile))
131
132         lastDefer = defer.Deferred()
133         
134         idx_hash = os.popen('grep -A 3000 -E "^SHA1:" ' + 
135                             '/var/lib/apt/lists/' + self.releaseFile + 
136                             ' | grep -E " main/binary-i386/Packages.bz2$"'
137                             ' | head -n 1 | cut -d\  -f 2').read().rstrip('\n')
138         idx_path = 'http://' + self.releaseFile.replace('_','/')[:-7] + 'main/binary-i386/Packages.bz2'
139
140         d = self.client.findHash(idx_path)
141         d.addCallback(self.verifyHash, idx_path, idx_hash)
142
143         pkg_hash = os.popen('grep -A 30 -E "^Package: dpkg$" ' + 
144                             '/var/lib/apt/lists/' + self.packagesFile + 
145                             ' | grep -E "^SHA1:" | head -n 1' + 
146                             ' | cut -d\  -f 2').read().rstrip('\n')
147         pkg_path = 'http://' + self.releaseFile[:self.releaseFile.find('_dists_')+1].replace('_','/') + \
148                    os.popen('grep -A 30 -E "^Package: dpkg$" ' + 
149                             '/var/lib/apt/lists/' + self.packagesFile + 
150                             ' | grep -E "^Filename:" | head -n 1' + 
151                             ' | cut -d\  -f 2').read().rstrip('\n')
152
153         d = self.client.findHash(pkg_path)
154         d.addCallback(self.verifyHash, pkg_path, pkg_hash)
155
156         src_dir = os.popen('grep -A 30 -E "^Package: dpkg$" ' + 
157                             '/var/lib/apt/lists/' + self.sourcesFile + 
158                             ' | grep -E "^Directory:" | head -n 1' + 
159                             ' | cut -d\  -f 2').read().rstrip('\n')
160         src_hashes = os.popen('grep -A 20 -E "^Package: dpkg$" ' + 
161                             '/var/lib/apt/lists/' + self.sourcesFile + 
162                             ' | grep -A 4 -E "^Files:" | grep -E "^ " ' + 
163                             ' | cut -d\  -f 2').read().split('\n')[:-1]
164         src_paths = os.popen('grep -A 20 -E "^Package: dpkg$" ' + 
165                             '/var/lib/apt/lists/' + self.sourcesFile + 
166                             ' | grep -A 4 -E "^Files:" | grep -E "^ " ' + 
167                             ' | cut -d\  -f 4').read().split('\n')[:-1]
168
169         for i in range(len(src_hashes)):
170             src_path = 'http://' + self.releaseFile[:self.releaseFile.find('_dists_')+1].replace('_','/') + src_dir + '/' + src_paths[i]
171             d = self.client.findHash(src_path)
172             d.addCallback(self.verifyHash, src_path, src_hashes[i])
173             
174         idx_hash = os.popen('grep -A 3000 -E "^SHA1:" ' + 
175                             '/var/lib/apt/lists/' + self.releaseFile + 
176                             ' | grep -E " main/source/Sources.bz2$"'
177                             ' | head -n 1 | cut -d\  -f 2').read().rstrip('\n')
178         idx_path = 'http://' + self.releaseFile.replace('_','/')[:-7] + 'main/source/Sources.bz2'
179
180         d = self.client.findHash(idx_path)
181         d.addCallback(self.verifyHash, idx_path, idx_hash)
182
183         d.addBoth(lastDefer.callback)
184         return lastDefer
185
186     def tearDown(self):
187         for p in self.pending_calls:
188             if p.active():
189                 p.cancel()
190         self.client.cleanup()
191         self.client = None
192