Fixed some bugs in the new hashing scheme and tests.
[quix0rs-apt-p2p.git] / apt_dht / AptPackages.py
1 # Disable the FutureWarning from the apt module
2 import warnings
3 warnings.simplefilter("ignore", FutureWarning)
4
5 import os, shelve
6 from random import choice
7 from shutil import rmtree
8 from copy import deepcopy
9 from UserDict import DictMixin
10
11 from twisted.internet import threads, defer
12 from twisted.python import log
13 from twisted.trial import unittest
14
15 import apt_pkg, apt_inst
16 from apt import OpProgress
17
18 from Hash import HashObject
19
20 apt_pkg.init()
21
22 TRACKED_FILES = ['release', 'sources', 'packages']
23
24 class PackageFileList(DictMixin):
25     """Manages a list of package files belonging to a backend.
26     
27     @type packages: C{shelve dictionary}
28     @ivar packages: the files stored for this backend
29     """
30     
31     def __init__(self, cache_dir):
32         self.cache_dir = cache_dir
33         if not os.path.exists(self.cache_dir):
34             os.makedirs(self.cache_dir)
35         self.packages = None
36         self.open()
37
38     def open(self):
39         """Open the persistent dictionary of files in this backend."""
40         if self.packages is None:
41             self.packages = shelve.open(self.cache_dir+'/packages.db')
42
43     def close(self):
44         """Close the persistent dictionary."""
45         if self.packages is not None:
46             self.packages.close()
47
48     def update_file(self, cache_path, file_path):
49         """Check if an updated file needs to be tracked.
50
51         Called from the mirror manager when files get updated so we can update our
52         fake lists and sources.list.
53         """
54         filename = cache_path.split('/')[-1]
55         if filename.lower() in TRACKED_FILES:
56             log.msg("Registering package file: "+cache_path)
57             self.packages[cache_path] = file_path
58             return True
59         return False
60
61     def check_files(self):
62         """Check all files in the database to make sure they exist."""
63         files = self.packages.keys()
64         for f in files:
65             if not os.path.exists(self.packages[f]):
66                 log.msg("File in packages database has been deleted: "+f)
67                 del self.packages[f]
68
69     # Standard dictionary implementation so this class can be used like a dictionary.
70     def __getitem__(self, key): return self.packages[key]
71     def __setitem__(self, key, item): self.packages[key] = item
72     def __delitem__(self, key): del self.packages[key]
73     def keys(self): return self.packages.keys()
74
75 class AptPackages:
76     """Uses python-apt to answer queries about packages.
77
78     Makes a fake configuration for python-apt for each backend.
79     """
80
81     DEFAULT_APT_CONFIG = {
82         #'APT' : '',
83         #'APT::Architecture' : 'i386',  # Commented so the machine's config will set this
84         #'APT::Default-Release' : 'unstable',
85         'Dir':'.', # /
86         'Dir::State' : 'apt/', # var/lib/apt/
87         'Dir::State::Lists': 'lists/', # lists/
88         #'Dir::State::cdroms' : 'cdroms.list',
89         'Dir::State::userstatus' : 'status.user',
90         'Dir::State::status': 'dpkg/status', # '/var/lib/dpkg/status'
91         'Dir::Cache' : '.apt/cache/', # var/cache/apt/
92         #'Dir::Cache::archives' : 'archives/',
93         'Dir::Cache::srcpkgcache' : 'srcpkgcache.bin',
94         'Dir::Cache::pkgcache' : 'pkgcache.bin',
95         'Dir::Etc' : 'apt/etc/', # etc/apt/
96         'Dir::Etc::sourcelist' : 'sources.list',
97         'Dir::Etc::vendorlist' : 'vendors.list',
98         'Dir::Etc::vendorparts' : 'vendors.list.d',
99         #'Dir::Etc::main' : 'apt.conf',
100         #'Dir::Etc::parts' : 'apt.conf.d',
101         #'Dir::Etc::preferences' : 'preferences',
102         'Dir::Bin' : '',
103         #'Dir::Bin::methods' : '', #'/usr/lib/apt/methods'
104         'Dir::Bin::dpkg' : '/usr/bin/dpkg',
105         #'DPkg' : '',
106         #'DPkg::Pre-Install-Pkgs' : '',
107         #'DPkg::Tools' : '',
108         #'DPkg::Tools::Options' : '',
109         #'DPkg::Tools::Options::/usr/bin/apt-listchanges' : '',
110         #'DPkg::Tools::Options::/usr/bin/apt-listchanges::Version' : '2',
111         #'DPkg::Post-Invoke' : '',
112         }
113     essential_dirs = ('apt', 'apt/cache', 'apt/dpkg', 'apt/etc', 'apt/lists',
114                       'apt/lists/partial')
115     essential_files = ('apt/dpkg/status', 'apt/etc/sources.list',)
116         
117     def __init__(self, cache_dir):
118         """Construct a new packages manager.
119
120         @ivar backendName: name of backend associated with this packages file
121         @ivar cache_dir: cache directory from config file
122         """
123         self.cache_dir = cache_dir
124         self.apt_config = deepcopy(self.DEFAULT_APT_CONFIG)
125
126         for dir in self.essential_dirs:
127             path = os.path.join(self.cache_dir, dir)
128             if not os.path.exists(path):
129                 os.makedirs(path)
130         for file in self.essential_files:
131             path = os.path.join(self.cache_dir, file)
132             if not os.path.exists(path):
133                 f = open(path,'w')
134                 f.close()
135                 del f
136                 
137         self.apt_config['Dir'] = self.cache_dir
138         self.apt_config['Dir::State::status'] = os.path.join(self.cache_dir, 
139                       self.apt_config['Dir::State'], self.apt_config['Dir::State::status'])
140         self.packages = PackageFileList(cache_dir)
141         self.loaded = 0
142         self.loading = None
143         
144     def __del__(self):
145         self.cleanup()
146         self.packages.close()
147         
148     def addRelease(self, cache_path, file_path):
149         """Dirty hack until python-apt supports apt-pkg/indexrecords.h
150         (see Bug #456141)
151         """
152         self.indexrecords[cache_path] = {}
153
154         read_packages = False
155         f = open(file_path, 'r')
156         
157         for line in f:
158             line = line.rstrip()
159     
160             if line[:1] != " ":
161                 read_packages = False
162                 try:
163                     # Read the various headers from the file
164                     h, v = line.split(":", 1)
165                     if h == "MD5Sum" or h == "SHA1" or h == "SHA256":
166                         read_packages = True
167                         hash_type = h
168                 except:
169                     # Bad header line, just ignore it
170                     log.msg("WARNING: Ignoring badly formatted Release line: %s" % line)
171     
172                 # Skip to the next line
173                 continue
174             
175             # Read file names from the multiple hash sections of the file
176             if read_packages:
177                 p = line.split()
178                 self.indexrecords[cache_path].setdefault(p[2], {})[hash_type] = (p[0], p[1])
179         
180         f.close()
181
182     def file_updated(self, cache_path, file_path):
183         """A file in the backend has changed, manage it.
184         
185         If this affects us, unload our apt database
186         """
187         if self.packages.update_file(cache_path, file_path):
188             self.unload()
189
190     def load(self):
191         """Make sure the package is initialized and loaded."""
192         if self.loading is None:
193             self.loading = threads.deferToThread(self._load)
194             self.loading.addCallback(self.doneLoading)
195         return self.loading
196         
197     def doneLoading(self, loadResult):
198         """Cache is loaded."""
199         self.loading = None
200         # Must pass on the result for the next callback
201         return loadResult
202         
203     def _load(self):
204         """Regenerates the fake configuration and load the packages cache."""
205         if self.loaded: return True
206         apt_pkg.InitSystem()
207         rmtree(os.path.join(self.cache_dir, self.apt_config['Dir::State'], 
208                             self.apt_config['Dir::State::Lists']))
209         os.makedirs(os.path.join(self.cache_dir, self.apt_config['Dir::State'], 
210                                  self.apt_config['Dir::State::Lists'], 'partial'))
211         sources_filename = os.path.join(self.cache_dir, self.apt_config['Dir::Etc'], 
212                                         self.apt_config['Dir::Etc::sourcelist'])
213         sources = open(sources_filename, 'w')
214         sources_count = 0
215         deb_src_added = False
216         self.packages.check_files()
217         self.indexrecords = {}
218         for f in self.packages:
219             # we should probably clear old entries from self.packages and
220             # take into account the recorded mtime as optimization
221             filepath = self.packages[f]
222             if f.split('/')[-1] == "Release":
223                 self.addRelease(f, filepath)
224             fake_uri='http://apt-dht'+f
225             fake_dirname = '/'.join(fake_uri.split('/')[:-1])
226             if f.endswith('Sources'):
227                 deb_src_added = True
228                 source_line='deb-src '+fake_dirname+'/ /'
229             else:
230                 source_line='deb '+fake_dirname+'/ /'
231             listpath=(os.path.join(self.cache_dir, self.apt_config['Dir::State'], 
232                                    self.apt_config['Dir::State::Lists'], 
233                                    apt_pkg.URItoFileName(fake_uri)))
234             sources.write(source_line+'\n')
235             log.msg("Sources line: " + source_line)
236             sources_count = sources_count + 1
237
238             try:
239                 #we should empty the directory instead
240                 os.unlink(listpath)
241             except:
242                 pass
243             os.symlink(filepath, listpath)
244         sources.close()
245
246         if sources_count == 0:
247             log.msg("No Packages files available for %s backend"%(self.cache_dir))
248             return False
249
250         log.msg("Loading Packages database for "+self.cache_dir)
251         for key, value in self.apt_config.items():
252             apt_pkg.Config[key] = value
253
254         self.cache = apt_pkg.GetCache(OpProgress())
255         self.records = apt_pkg.GetPkgRecords(self.cache)
256         if deb_src_added:
257             self.srcrecords = apt_pkg.GetPkgSrcRecords()
258         else:
259             self.srcrecords = None
260
261         self.loaded = 1
262         return True
263
264     def unload(self):
265         """Tries to make the packages server quit."""
266         if self.loaded:
267             del self.cache
268             del self.records
269             del self.srcrecords
270             del self.indexrecords
271             self.loaded = 0
272
273     def cleanup(self):
274         """Cleanup and close any loaded caches."""
275         self.unload()
276         self.packages.close()
277         
278     def findHash(self, path):
279         """Find the hash for a given path in this mirror.
280         
281         Returns a deferred so it can make sure the cache is loaded first.
282         """
283         d = defer.Deferred()
284
285         deferLoad = self.load()
286         deferLoad.addCallback(self._findHash, path, d)
287         deferLoad.addErrback(self._findHash_error, path, d)
288         
289         return d
290
291     def _findHash_error(self, failure, path, d):
292         """An error occurred while trying to find a hash."""
293         log.msg('An error occurred while looking up a hash for: %s' % path)
294         log.err(failure)
295         d.callback(HashObject())
296
297     def _findHash(self, loadResult, path, d):
298         """Really find the hash for a path.
299         
300         Have to pass the returned loadResult on in case other calls to this
301         function are pending.
302         """
303         if not loadResult:
304             d.callback(HashObject())
305             return loadResult
306         
307         # First look for the path in the cache of index files
308         for release in self.indexrecords:
309             if path.startswith(release[:-7]):
310                 for indexFile in self.indexrecords[release]:
311                     if release[:-7] + indexFile == path:
312                         h = HashObject()
313                         h.setFromIndexRecord(self.indexrecords[release][indexFile])
314                         d.callback(h)
315                         return loadResult
316         
317         package = path.split('/')[-1].split('_')[0]
318
319         # Check the binary packages
320         try:
321             for version in self.cache[package].VersionList:
322                 size = version.Size
323                 for verFile in version.FileList:
324                     if self.records.Lookup(verFile):
325                         if '/' + self.records.FileName == path:
326                             h = HashObject()
327                             h.setFromPkgRecord(self.records, size)
328                             d.callback(h)
329                             return loadResult
330         except KeyError:
331             pass
332
333         # Check the source packages' files
334         if self.srcrecords:
335             self.srcrecords.Restart()
336             if self.srcrecords.Lookup(package):
337                 for f in self.srcrecords.Files:
338                     if path == '/' + f[2]:
339                         h = HashObject()
340                         h.setFromSrcRecord(f)
341                         d.callback(h)
342                         return loadResult
343         
344         d.callback(HashObject())
345         return loadResult
346
347 class TestAptPackages(unittest.TestCase):
348     """Unit tests for the AptPackages cache."""
349     
350     pending_calls = []
351     client = None
352     timeout = 10
353     packagesFile = ''
354     sourcesFile = ''
355     releaseFile = ''
356     
357     def setUp(self):
358         self.client = AptPackages('/tmp/.apt-dht')
359     
360         self.packagesFile = os.popen('ls -Sr /var/lib/apt/lists/ | grep -E "_main_.*Packages$" | tail -n 1').read().rstrip('\n')
361         self.sourcesFile = os.popen('ls -Sr /var/lib/apt/lists/ | grep -E "_main_.*Sources$" | tail -n 1').read().rstrip('\n')
362         for f in os.walk('/var/lib/apt/lists').next()[2]:
363             if f[-7:] == "Release" and self.packagesFile.startswith(f[:-7]):
364                 self.releaseFile = f
365                 break
366         
367         self.client.file_updated(self.releaseFile[self.releaseFile.find('_dists_'):].replace('_','/'), 
368                                  '/var/lib/apt/lists/' + self.releaseFile)
369         self.client.file_updated(self.packagesFile[self.packagesFile.find('_dists_'):].replace('_','/'), 
370                                  '/var/lib/apt/lists/' + self.packagesFile)
371         self.client.file_updated(self.sourcesFile[self.sourcesFile.find('_dists_'):].replace('_','/'), 
372                                  '/var/lib/apt/lists/' + self.sourcesFile)
373     
374     def test_pkg_hash(self):
375         self.client._load()
376
377         self.client.records.Lookup(self.client.cache['dpkg'].VersionList[0].FileList[0])
378         
379         pkg_hash = os.popen('grep -A 30 -E "^Package: dpkg$" ' + 
380                             '/var/lib/apt/lists/' + self.packagesFile + 
381                             ' | grep -E "^SHA1:" | head -n 1' + 
382                             ' | cut -d\  -f 2').read().rstrip('\n')
383
384         self.failUnless(self.client.records.SHA1Hash == pkg_hash, 
385                         "Hashes don't match: %s != %s" % (self.client.records.SHA1Hash, pkg_hash))
386
387     def test_src_hash(self):
388         self.client._load()
389
390         self.client.srcrecords.Lookup('dpkg')
391
392         src_hashes = os.popen('grep -A 20 -E "^Package: dpkg$" ' + 
393                             '/var/lib/apt/lists/' + self.sourcesFile + 
394                             ' | grep -A 4 -E "^Files:" | grep -E "^ " ' + 
395                             ' | cut -d\  -f 2').read().split('\n')[:-1]
396
397         for f in self.client.srcrecords.Files:
398             self.failUnless(f[0] in src_hashes, "Couldn't find %s in: %r" % (f[0], src_hashes))
399
400     def test_index_hash(self):
401         self.client._load()
402
403         indexhash = self.client.indexrecords[self.releaseFile[self.releaseFile.find('_dists_'):].replace('_','/')]['main/binary-i386/Packages.bz2']['SHA1'][0]
404
405         idx_hash = os.popen('grep -A 3000 -E "^SHA1:" ' + 
406                             '/var/lib/apt/lists/' + self.releaseFile + 
407                             ' | grep -E " main/binary-i386/Packages.bz2$"'
408                             ' | head -n 1 | cut -d\  -f 2').read().rstrip('\n')
409
410         self.failUnless(indexhash == idx_hash, "Hashes don't match: %s != %s" % (indexhash, idx_hash))
411
412     def verifyHash(self, found_hash, path, true_hash):
413         self.failUnless(found_hash.hexexpected() == true_hash, 
414                     "%s hashes don't match: %s != %s" % (path, found_hash.hexexpected(), true_hash))
415
416     def test_findIndexHash(self):
417         lastDefer = defer.Deferred()
418         
419         idx_hash = os.popen('grep -A 3000 -E "^SHA1:" ' + 
420                             '/var/lib/apt/lists/' + self.releaseFile + 
421                             ' | grep -E " main/binary-i386/Packages.bz2$"'
422                             ' | head -n 1 | cut -d\  -f 2').read().rstrip('\n')
423         idx_path = '/' + self.releaseFile[self.releaseFile.find('_dists_')+1:].replace('_','/')[:-7] + 'main/binary-i386/Packages.bz2'
424
425         d = self.client.findHash(idx_path)
426         d.addCallback(self.verifyHash, idx_path, idx_hash)
427
428         d.addBoth(lastDefer.callback)
429         return lastDefer
430
431     def test_findPkgHash(self):
432         lastDefer = defer.Deferred()
433         
434         pkg_hash = os.popen('grep -A 30 -E "^Package: dpkg$" ' + 
435                             '/var/lib/apt/lists/' + self.packagesFile + 
436                             ' | grep -E "^SHA1:" | head -n 1' + 
437                             ' | cut -d\  -f 2').read().rstrip('\n')
438         pkg_path = '/' + os.popen('grep -A 30 -E "^Package: dpkg$" ' + 
439                             '/var/lib/apt/lists/' + self.packagesFile + 
440                             ' | grep -E "^Filename:" | head -n 1' + 
441                             ' | cut -d\  -f 2').read().rstrip('\n')
442
443         d = self.client.findHash(pkg_path)
444         d.addCallback(self.verifyHash, pkg_path, pkg_hash)
445
446         d.addBoth(lastDefer.callback)
447         return lastDefer
448
449     def test_findSrcHash(self):
450         lastDefer = defer.Deferred()
451         
452         src_dir = '/' + os.popen('grep -A 30 -E "^Package: dpkg$" ' + 
453                             '/var/lib/apt/lists/' + self.sourcesFile + 
454                             ' | grep -E "^Directory:" | head -n 1' + 
455                             ' | cut -d\  -f 2').read().rstrip('\n')
456         src_hashes = os.popen('grep -A 20 -E "^Package: dpkg$" ' + 
457                             '/var/lib/apt/lists/' + self.sourcesFile + 
458                             ' | grep -A 4 -E "^Files:" | grep -E "^ " ' + 
459                             ' | cut -d\  -f 2').read().split('\n')[:-1]
460         src_paths = os.popen('grep -A 20 -E "^Package: dpkg$" ' + 
461                             '/var/lib/apt/lists/' + self.sourcesFile + 
462                             ' | grep -A 4 -E "^Files:" | grep -E "^ " ' + 
463                             ' | cut -d\  -f 4').read().split('\n')[:-1]
464
465         i = choice(range(len(src_hashes)))
466         d = self.client.findHash(src_dir + '/' + src_paths[i])
467         d.addCallback(self.verifyHash, src_dir + '/' + src_paths[i], src_hashes[i])
468             
469         d.addBoth(lastDefer.callback)
470         return lastDefer
471
472     def test_multipleFindHash(self):
473         lastDefer = defer.Deferred()
474         
475         idx_hash = os.popen('grep -A 3000 -E "^SHA1:" ' + 
476                             '/var/lib/apt/lists/' + self.releaseFile + 
477                             ' | grep -E " main/binary-i386/Packages.bz2$"'
478                             ' | head -n 1 | cut -d\  -f 2').read().rstrip('\n')
479         idx_path = '/' + self.releaseFile[self.releaseFile.find('_dists_')+1:].replace('_','/')[:-7] + 'main/binary-i386/Packages.bz2'
480
481         d = self.client.findHash(idx_path)
482         d.addCallback(self.verifyHash, idx_path, idx_hash)
483
484         pkg_hash = os.popen('grep -A 30 -E "^Package: dpkg$" ' + 
485                             '/var/lib/apt/lists/' + self.packagesFile + 
486                             ' | grep -E "^SHA1:" | head -n 1' + 
487                             ' | cut -d\  -f 2').read().rstrip('\n')
488         pkg_path = '/' + os.popen('grep -A 30 -E "^Package: dpkg$" ' + 
489                             '/var/lib/apt/lists/' + self.packagesFile + 
490                             ' | grep -E "^Filename:" | head -n 1' + 
491                             ' | cut -d\  -f 2').read().rstrip('\n')
492
493         d = self.client.findHash(pkg_path)
494         d.addCallback(self.verifyHash, pkg_path, pkg_hash)
495
496         src_dir = '/' + os.popen('grep -A 30 -E "^Package: dpkg$" ' + 
497                             '/var/lib/apt/lists/' + self.sourcesFile + 
498                             ' | grep -E "^Directory:" | head -n 1' + 
499                             ' | cut -d\  -f 2').read().rstrip('\n')
500         src_hashes = os.popen('grep -A 20 -E "^Package: dpkg$" ' + 
501                             '/var/lib/apt/lists/' + self.sourcesFile + 
502                             ' | grep -A 4 -E "^Files:" | grep -E "^ " ' + 
503                             ' | cut -d\  -f 2').read().split('\n')[:-1]
504         src_paths = os.popen('grep -A 20 -E "^Package: dpkg$" ' + 
505                             '/var/lib/apt/lists/' + self.sourcesFile + 
506                             ' | grep -A 4 -E "^Files:" | grep -E "^ " ' + 
507                             ' | cut -d\  -f 4').read().split('\n')[:-1]
508
509         for i in range(len(src_hashes)):
510             d = self.client.findHash(src_dir + '/' + src_paths[i])
511             d.addCallback(self.verifyHash, src_dir + '/' + src_paths[i], src_hashes[i])
512             
513         idx_hash = os.popen('grep -A 3000 -E "^SHA1:" ' + 
514                             '/var/lib/apt/lists/' + self.releaseFile + 
515                             ' | grep -E " main/source/Sources.bz2$"'
516                             ' | head -n 1 | cut -d\  -f 2').read().rstrip('\n')
517         idx_path = '/' + self.releaseFile[self.releaseFile.find('_dists_')+1:].replace('_','/')[:-7] + 'main/source/Sources.bz2'
518
519         d = self.client.findHash(idx_path)
520         d.addCallback(self.verifyHash, idx_path, idx_hash)
521
522         d.addBoth(lastDefer.callback)
523         return lastDefer
524
525     def tearDown(self):
526         for p in self.pending_calls:
527             if p.active():
528                 p.cancel()
529         self.pending_calls = []
530         self.client.cleanup()
531         self.client = None