Fix some minor bugs in the previous commits.
[quix0rs-apt-p2p.git] / apt_dht / AptPackages.py
1 # Disable the FutureWarning from the apt module
2 import warnings
3 warnings.simplefilter("ignore", FutureWarning)
4
5 import os, shelve
6 from random import choice
7 from shutil import rmtree
8 from copy import deepcopy
9 from UserDict import DictMixin
10
11 from twisted.internet import threads, defer
12 from twisted.python import log
13 from twisted.python.filepath import FilePath
14 from twisted.trial import unittest
15
16 import apt_pkg, apt_inst
17 from apt import OpProgress
18 from debian_bundle import deb822
19
20 from Hash import HashObject
21
22 apt_pkg.init()
23
24 TRACKED_FILES = ['release', 'sources', 'packages']
25
26 class PackageFileList(DictMixin):
27     """Manages a list of package files belonging to a backend.
28     
29     @type packages: C{shelve dictionary}
30     @ivar packages: the files stored for this backend
31     """
32     
33     def __init__(self, cache_dir):
34         self.cache_dir = cache_dir
35         self.cache_dir.restat(False)
36         if not self.cache_dir.exists():
37             self.cache_dir.makedirs()
38         self.packages = None
39         self.open()
40
41     def open(self):
42         """Open the persistent dictionary of files in this backend."""
43         if self.packages is None:
44             self.packages = shelve.open(self.cache_dir.child('packages.db').path)
45
46     def close(self):
47         """Close the persistent dictionary."""
48         if self.packages is not None:
49             self.packages.close()
50
51     def update_file(self, cache_path, file_path):
52         """Check if an updated file needs to be tracked.
53
54         Called from the mirror manager when files get updated so we can update our
55         fake lists and sources.list.
56         """
57         filename = cache_path.split('/')[-1]
58         if filename.lower() in TRACKED_FILES:
59             log.msg("Registering package file: "+cache_path)
60             self.packages[cache_path] = file_path
61             return True
62         return False
63
64     def check_files(self):
65         """Check all files in the database to make sure they exist."""
66         files = self.packages.keys()
67         for f in files:
68             self.packages[f].restat(False)
69             if not self.packages[f].exists():
70                 log.msg("File in packages database has been deleted: "+f)
71                 del self.packages[f]
72
73     # Standard dictionary implementation so this class can be used like a dictionary.
74     def __getitem__(self, key): return self.packages[key]
75     def __setitem__(self, key, item): self.packages[key] = item
76     def __delitem__(self, key): del self.packages[key]
77     def keys(self): return self.packages.keys()
78
79 class AptPackages:
80     """Uses python-apt to answer queries about packages.
81
82     Makes a fake configuration for python-apt for each backend.
83     """
84
85     DEFAULT_APT_CONFIG = {
86         #'APT' : '',
87         #'APT::Architecture' : 'i386',  # Commented so the machine's config will set this
88         #'APT::Default-Release' : 'unstable',
89         'Dir':'.', # /
90         'Dir::State' : 'apt/', # var/lib/apt/
91         'Dir::State::Lists': 'lists/', # lists/
92         #'Dir::State::cdroms' : 'cdroms.list',
93         'Dir::State::userstatus' : 'status.user',
94         'Dir::State::status': 'dpkg/status', # '/var/lib/dpkg/status'
95         'Dir::Cache' : '.apt/cache/', # var/cache/apt/
96         #'Dir::Cache::archives' : 'archives/',
97         'Dir::Cache::srcpkgcache' : 'srcpkgcache.bin',
98         'Dir::Cache::pkgcache' : 'pkgcache.bin',
99         'Dir::Etc' : 'apt/etc/', # etc/apt/
100         'Dir::Etc::sourcelist' : 'sources.list',
101         'Dir::Etc::vendorlist' : 'vendors.list',
102         'Dir::Etc::vendorparts' : 'vendors.list.d',
103         #'Dir::Etc::main' : 'apt.conf',
104         #'Dir::Etc::parts' : 'apt.conf.d',
105         #'Dir::Etc::preferences' : 'preferences',
106         'Dir::Bin' : '',
107         #'Dir::Bin::methods' : '', #'/usr/lib/apt/methods'
108         'Dir::Bin::dpkg' : '/usr/bin/dpkg',
109         #'DPkg' : '',
110         #'DPkg::Pre-Install-Pkgs' : '',
111         #'DPkg::Tools' : '',
112         #'DPkg::Tools::Options' : '',
113         #'DPkg::Tools::Options::/usr/bin/apt-listchanges' : '',
114         #'DPkg::Tools::Options::/usr/bin/apt-listchanges::Version' : '2',
115         #'DPkg::Post-Invoke' : '',
116         }
117     essential_dirs = ('apt', 'apt/cache', 'apt/dpkg', 'apt/etc', 'apt/lists',
118                       'apt/lists/partial')
119     essential_files = ('apt/dpkg/status', 'apt/etc/sources.list',)
120         
121     def __init__(self, cache_dir):
122         """Construct a new packages manager.
123
124         @ivar backendName: name of backend associated with this packages file
125         @ivar cache_dir: cache directory from config file
126         """
127         self.cache_dir = cache_dir
128         self.apt_config = deepcopy(self.DEFAULT_APT_CONFIG)
129
130         for dir in self.essential_dirs:
131             path = self.cache_dir.preauthChild(dir)
132             if not path.exists():
133                 path.makedirs()
134         for file in self.essential_files:
135             path = self.cache_dir.preauthChild(file)
136             if not path.exists():
137                 path.touch()
138                 
139         self.apt_config['Dir'] = self.cache_dir.path
140         self.apt_config['Dir::State::status'] = self.cache_dir.preauthChild(self.apt_config['Dir::State']).preauthChild(self.apt_config['Dir::State::status']).path
141         self.packages = PackageFileList(cache_dir)
142         self.loaded = 0
143         self.loading = None
144         
145     def __del__(self):
146         self.cleanup()
147         self.packages.close()
148         
149     def addRelease(self, cache_path, file_path):
150         """Dirty hack until python-apt supports apt-pkg/indexrecords.h
151         (see Bug #456141)
152         """
153         self.indexrecords[cache_path] = {}
154
155         read_packages = False
156         f = file_path.open('r')
157         
158         rel = deb822.Release(f, fields = ['MD5Sum', 'SHA1', 'SHA256'])
159         for hash_type in rel:
160             for file in rel[hash_type]:
161                 self.indexrecords[cache_path].setdefault(file['name'], {})[hash_type.upper()] = (file[hash_type], file['size'])
162             
163         f.close()
164
165     def file_updated(self, cache_path, file_path):
166         """A file in the backend has changed, manage it.
167         
168         If this affects us, unload our apt database
169         """
170         if self.packages.update_file(cache_path, file_path):
171             self.unload()
172
173     def load(self):
174         """Make sure the package is initialized and loaded."""
175         if self.loading is None:
176             self.loading = threads.deferToThread(self._load)
177             self.loading.addCallback(self.doneLoading)
178         return self.loading
179         
180     def doneLoading(self, loadResult):
181         """Cache is loaded."""
182         self.loading = None
183         # Must pass on the result for the next callback
184         return loadResult
185         
186     def _load(self):
187         """Regenerates the fake configuration and load the packages cache."""
188         if self.loaded: return True
189         apt_pkg.InitSystem()
190         self.cache_dir.preauthChild(self.apt_config['Dir::State']
191                      ).preauthChild(self.apt_config['Dir::State::Lists']).remove()
192         self.cache_dir.preauthChild(self.apt_config['Dir::State']
193                      ).preauthChild(self.apt_config['Dir::State::Lists']
194                      ).child('partial').makedirs()
195         sources_file = self.cache_dir.preauthChild(self.apt_config['Dir::Etc']
196                                ).preauthChild(self.apt_config['Dir::Etc::sourcelist'])
197         sources = sources_file.open('w')
198         sources_count = 0
199         deb_src_added = False
200         self.packages.check_files()
201         self.indexrecords = {}
202         for f in self.packages:
203             # we should probably clear old entries from self.packages and
204             # take into account the recorded mtime as optimization
205             file = self.packages[f]
206             if f.split('/')[-1] == "Release":
207                 self.addRelease(f, file)
208             fake_uri='http://apt-dht'+f
209             fake_dirname = '/'.join(fake_uri.split('/')[:-1])
210             if f.endswith('Sources'):
211                 deb_src_added = True
212                 source_line='deb-src '+fake_dirname+'/ /'
213             else:
214                 source_line='deb '+fake_dirname+'/ /'
215             listpath = self.cache_dir.preauthChild(self.apt_config['Dir::State']
216                                     ).preauthChild(self.apt_config['Dir::State::Lists']
217                                     ).child(apt_pkg.URItoFileName(fake_uri))
218             sources.write(source_line+'\n')
219             log.msg("Sources line: " + source_line)
220             sources_count = sources_count + 1
221
222             if listpath.exists():
223                 #we should empty the directory instead
224                 listpath.remove()
225             os.symlink(file.path, listpath.path)
226         sources.close()
227
228         if sources_count == 0:
229             log.msg("No Packages files available for %s backend"%(self.cache_dir.path))
230             return False
231
232         log.msg("Loading Packages database for "+self.cache_dir.path)
233         for key, value in self.apt_config.items():
234             apt_pkg.Config[key] = value
235
236         self.cache = apt_pkg.GetCache(OpProgress())
237         self.records = apt_pkg.GetPkgRecords(self.cache)
238         if deb_src_added:
239             self.srcrecords = apt_pkg.GetPkgSrcRecords()
240         else:
241             self.srcrecords = None
242
243         self.loaded = 1
244         return True
245
246     def unload(self):
247         """Tries to make the packages server quit."""
248         if self.loaded:
249             del self.cache
250             del self.records
251             del self.srcrecords
252             del self.indexrecords
253             self.loaded = 0
254
255     def cleanup(self):
256         """Cleanup and close any loaded caches."""
257         self.unload()
258         self.packages.close()
259         
260     def findHash(self, path):
261         """Find the hash for a given path in this mirror.
262         
263         Returns a deferred so it can make sure the cache is loaded first.
264         """
265         d = defer.Deferred()
266
267         deferLoad = self.load()
268         deferLoad.addCallback(self._findHash, path, d)
269         deferLoad.addErrback(self._findHash_error, path, d)
270         
271         return d
272
273     def _findHash_error(self, failure, path, d):
274         """An error occurred while trying to find a hash."""
275         log.msg('An error occurred while looking up a hash for: %s' % path)
276         log.err(failure)
277         d.callback(HashObject())
278
279     def _findHash(self, loadResult, path, d):
280         """Really find the hash for a path.
281         
282         Have to pass the returned loadResult on in case other calls to this
283         function are pending.
284         """
285         if not loadResult:
286             d.callback(HashObject())
287             return loadResult
288         
289         # First look for the path in the cache of index files
290         for release in self.indexrecords:
291             if path.startswith(release[:-7]):
292                 for indexFile in self.indexrecords[release]:
293                     if release[:-7] + indexFile == path:
294                         h = HashObject()
295                         h.setFromIndexRecord(self.indexrecords[release][indexFile])
296                         d.callback(h)
297                         return loadResult
298         
299         package = path.split('/')[-1].split('_')[0]
300
301         # Check the binary packages
302         try:
303             for version in self.cache[package].VersionList:
304                 size = version.Size
305                 for verFile in version.FileList:
306                     if self.records.Lookup(verFile):
307                         if '/' + self.records.FileName == path:
308                             h = HashObject()
309                             h.setFromPkgRecord(self.records, size)
310                             d.callback(h)
311                             return loadResult
312         except KeyError:
313             pass
314
315         # Check the source packages' files
316         if self.srcrecords:
317             self.srcrecords.Restart()
318             if self.srcrecords.Lookup(package):
319                 for f in self.srcrecords.Files:
320                     if path == '/' + f[2]:
321                         h = HashObject()
322                         h.setFromSrcRecord(f)
323                         d.callback(h)
324                         return loadResult
325         
326         d.callback(HashObject())
327         return loadResult
328
329 class TestAptPackages(unittest.TestCase):
330     """Unit tests for the AptPackages cache."""
331     
332     pending_calls = []
333     client = None
334     timeout = 10
335     packagesFile = ''
336     sourcesFile = ''
337     releaseFile = ''
338     
339     def setUp(self):
340         self.client = AptPackages(FilePath('/tmp/.apt-dht'))
341     
342         self.packagesFile = os.popen('ls -Sr /var/lib/apt/lists/ | grep -E "_main_.*Packages$" | tail -n 1').read().rstrip('\n')
343         self.sourcesFile = os.popen('ls -Sr /var/lib/apt/lists/ | grep -E "_main_.*Sources$" | tail -n 1').read().rstrip('\n')
344         for f in os.walk('/var/lib/apt/lists').next()[2]:
345             if f[-7:] == "Release" and self.packagesFile.startswith(f[:-7]):
346                 self.releaseFile = f
347                 break
348         
349         self.client.file_updated(self.releaseFile[self.releaseFile.find('_dists_'):].replace('_','/'), 
350                                  FilePath('/var/lib/apt/lists/' + self.releaseFile))
351         self.client.file_updated(self.packagesFile[self.packagesFile.find('_dists_'):].replace('_','/'), 
352                                  FilePath('/var/lib/apt/lists/' + self.packagesFile))
353         self.client.file_updated(self.sourcesFile[self.sourcesFile.find('_dists_'):].replace('_','/'), 
354                                  FilePath('/var/lib/apt/lists/' + self.sourcesFile))
355     
356     def test_pkg_hash(self):
357         self.client._load()
358
359         self.client.records.Lookup(self.client.cache['dpkg'].VersionList[0].FileList[0])
360         
361         pkg_hash = os.popen('grep -A 30 -E "^Package: dpkg$" ' + 
362                             '/var/lib/apt/lists/' + self.packagesFile + 
363                             ' | grep -E "^SHA1:" | head -n 1' + 
364                             ' | cut -d\  -f 2').read().rstrip('\n')
365
366         self.failUnless(self.client.records.SHA1Hash == pkg_hash, 
367                         "Hashes don't match: %s != %s" % (self.client.records.SHA1Hash, pkg_hash))
368
369     def test_src_hash(self):
370         self.client._load()
371
372         self.client.srcrecords.Lookup('dpkg')
373
374         src_hashes = os.popen('grep -A 20 -E "^Package: dpkg$" ' + 
375                             '/var/lib/apt/lists/' + self.sourcesFile + 
376                             ' | grep -A 4 -E "^Files:" | grep -E "^ " ' + 
377                             ' | cut -d\  -f 2').read().split('\n')[:-1]
378
379         for f in self.client.srcrecords.Files:
380             self.failUnless(f[0] in src_hashes, "Couldn't find %s in: %r" % (f[0], src_hashes))
381
382     def test_index_hash(self):
383         self.client._load()
384
385         indexhash = self.client.indexrecords[self.releaseFile[self.releaseFile.find('_dists_'):].replace('_','/')]['main/binary-i386/Packages.bz2']['SHA1'][0]
386
387         idx_hash = os.popen('grep -A 3000 -E "^SHA1:" ' + 
388                             '/var/lib/apt/lists/' + self.releaseFile + 
389                             ' | grep -E " main/binary-i386/Packages.bz2$"'
390                             ' | head -n 1 | cut -d\  -f 2').read().rstrip('\n')
391
392         self.failUnless(indexhash == idx_hash, "Hashes don't match: %s != %s" % (indexhash, idx_hash))
393
394     def verifyHash(self, found_hash, path, true_hash):
395         self.failUnless(found_hash.hexexpected() == true_hash, 
396                     "%s hashes don't match: %s != %s" % (path, found_hash.hexexpected(), true_hash))
397
398     def test_findIndexHash(self):
399         lastDefer = defer.Deferred()
400         
401         idx_hash = os.popen('grep -A 3000 -E "^SHA1:" ' + 
402                             '/var/lib/apt/lists/' + self.releaseFile + 
403                             ' | grep -E " main/binary-i386/Packages.bz2$"'
404                             ' | head -n 1 | cut -d\  -f 2').read().rstrip('\n')
405         idx_path = '/' + self.releaseFile[self.releaseFile.find('_dists_')+1:].replace('_','/')[:-7] + 'main/binary-i386/Packages.bz2'
406
407         d = self.client.findHash(idx_path)
408         d.addCallback(self.verifyHash, idx_path, idx_hash)
409
410         d.addBoth(lastDefer.callback)
411         return lastDefer
412
413     def test_findPkgHash(self):
414         lastDefer = defer.Deferred()
415         
416         pkg_hash = os.popen('grep -A 30 -E "^Package: dpkg$" ' + 
417                             '/var/lib/apt/lists/' + self.packagesFile + 
418                             ' | grep -E "^SHA1:" | head -n 1' + 
419                             ' | cut -d\  -f 2').read().rstrip('\n')
420         pkg_path = '/' + os.popen('grep -A 30 -E "^Package: dpkg$" ' + 
421                             '/var/lib/apt/lists/' + self.packagesFile + 
422                             ' | grep -E "^Filename:" | head -n 1' + 
423                             ' | cut -d\  -f 2').read().rstrip('\n')
424
425         d = self.client.findHash(pkg_path)
426         d.addCallback(self.verifyHash, pkg_path, pkg_hash)
427
428         d.addBoth(lastDefer.callback)
429         return lastDefer
430
431     def test_findSrcHash(self):
432         lastDefer = defer.Deferred()
433         
434         src_dir = '/' + os.popen('grep -A 30 -E "^Package: dpkg$" ' + 
435                             '/var/lib/apt/lists/' + self.sourcesFile + 
436                             ' | grep -E "^Directory:" | head -n 1' + 
437                             ' | cut -d\  -f 2').read().rstrip('\n')
438         src_hashes = os.popen('grep -A 20 -E "^Package: dpkg$" ' + 
439                             '/var/lib/apt/lists/' + self.sourcesFile + 
440                             ' | grep -A 4 -E "^Files:" | grep -E "^ " ' + 
441                             ' | cut -d\  -f 2').read().split('\n')[:-1]
442         src_paths = os.popen('grep -A 20 -E "^Package: dpkg$" ' + 
443                             '/var/lib/apt/lists/' + self.sourcesFile + 
444                             ' | grep -A 4 -E "^Files:" | grep -E "^ " ' + 
445                             ' | cut -d\  -f 4').read().split('\n')[:-1]
446
447         i = choice(range(len(src_hashes)))
448         d = self.client.findHash(src_dir + '/' + src_paths[i])
449         d.addCallback(self.verifyHash, src_dir + '/' + src_paths[i], src_hashes[i])
450             
451         d.addBoth(lastDefer.callback)
452         return lastDefer
453
454     def test_multipleFindHash(self):
455         lastDefer = defer.Deferred()
456         
457         idx_hash = os.popen('grep -A 3000 -E "^SHA1:" ' + 
458                             '/var/lib/apt/lists/' + self.releaseFile + 
459                             ' | grep -E " main/binary-i386/Packages.bz2$"'
460                             ' | head -n 1 | cut -d\  -f 2').read().rstrip('\n')
461         idx_path = '/' + self.releaseFile[self.releaseFile.find('_dists_')+1:].replace('_','/')[:-7] + 'main/binary-i386/Packages.bz2'
462
463         d = self.client.findHash(idx_path)
464         d.addCallback(self.verifyHash, idx_path, idx_hash)
465
466         pkg_hash = os.popen('grep -A 30 -E "^Package: dpkg$" ' + 
467                             '/var/lib/apt/lists/' + self.packagesFile + 
468                             ' | grep -E "^SHA1:" | head -n 1' + 
469                             ' | cut -d\  -f 2').read().rstrip('\n')
470         pkg_path = '/' + os.popen('grep -A 30 -E "^Package: dpkg$" ' + 
471                             '/var/lib/apt/lists/' + self.packagesFile + 
472                             ' | grep -E "^Filename:" | head -n 1' + 
473                             ' | cut -d\  -f 2').read().rstrip('\n')
474
475         d = self.client.findHash(pkg_path)
476         d.addCallback(self.verifyHash, pkg_path, pkg_hash)
477
478         src_dir = '/' + os.popen('grep -A 30 -E "^Package: dpkg$" ' + 
479                             '/var/lib/apt/lists/' + self.sourcesFile + 
480                             ' | grep -E "^Directory:" | head -n 1' + 
481                             ' | cut -d\  -f 2').read().rstrip('\n')
482         src_hashes = os.popen('grep -A 20 -E "^Package: dpkg$" ' + 
483                             '/var/lib/apt/lists/' + self.sourcesFile + 
484                             ' | grep -A 4 -E "^Files:" | grep -E "^ " ' + 
485                             ' | cut -d\  -f 2').read().split('\n')[:-1]
486         src_paths = os.popen('grep -A 20 -E "^Package: dpkg$" ' + 
487                             '/var/lib/apt/lists/' + self.sourcesFile + 
488                             ' | grep -A 4 -E "^Files:" | grep -E "^ " ' + 
489                             ' | cut -d\  -f 4').read().split('\n')[:-1]
490
491         for i in range(len(src_hashes)):
492             d = self.client.findHash(src_dir + '/' + src_paths[i])
493             d.addCallback(self.verifyHash, src_dir + '/' + src_paths[i], src_hashes[i])
494             
495         idx_hash = os.popen('grep -A 3000 -E "^SHA1:" ' + 
496                             '/var/lib/apt/lists/' + self.releaseFile + 
497                             ' | grep -E " main/source/Sources.bz2$"'
498                             ' | head -n 1 | cut -d\  -f 2').read().rstrip('\n')
499         idx_path = '/' + self.releaseFile[self.releaseFile.find('_dists_')+1:].replace('_','/')[:-7] + 'main/source/Sources.bz2'
500
501         d = self.client.findHash(idx_path)
502         d.addCallback(self.verifyHash, idx_path, idx_hash)
503
504         d.addBoth(lastDefer.callback)
505         return lastDefer
506
507     def tearDown(self):
508         for p in self.pending_calls:
509             if p.active():
510                 p.cancel()
511         self.pending_calls = []
512         self.client.cleanup()
513         self.client = None