0711802cf94debd10c96869de0cb67f641fae3c6
[quix0rs-apt-p2p.git] / apt_dht / AptPackages.py
1 #
2 # Copyright (C) 2002 Manuel Estrada Sainz <ranty@debian.org>
3 # Copyright (C) 2008 Cameron Dale <camrdale@gmail.com>
4 #
5 # This library is free software; you can redistribute it and/or
6 # modify it under the terms of version 2.1 of the GNU General Public
7 # License as published by the Free Software Foundation.
8 #
9 # This library is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12 # General Public License for more details.
13 #
14 # You should have received a copy of the GNU General Public
15 # License along with this library; if not, write to the Free Software
16 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
17
18 # Disable the FutureWarning from the apt module
19 import warnings
20 warnings.simplefilter("ignore", FutureWarning)
21
22 import os, shelve
23 from random import choice
24 from shutil import rmtree
25 from copy import deepcopy
26 from UserDict import DictMixin
27
28 from twisted.internet import threads, defer
29 from twisted.python import log
30 from twisted.python.filepath import FilePath
31 from twisted.trial import unittest
32
33 import apt_pkg, apt_inst
34 from apt import OpProgress
35 from debian_bundle import deb822
36
37 from Hash import HashObject
38
39 apt_pkg.init()
40
41 TRACKED_FILES = ['release', 'sources', 'packages']
42
43 class PackageFileList(DictMixin):
44     """Manages a list of package files belonging to a backend.
45     
46     @type packages: C{shelve dictionary}
47     @ivar packages: the files stored for this backend
48     """
49     
50     def __init__(self, cache_dir):
51         self.cache_dir = cache_dir
52         self.cache_dir.restat(False)
53         if not self.cache_dir.exists():
54             self.cache_dir.makedirs()
55         self.packages = None
56         self.open()
57
58     def open(self):
59         """Open the persistent dictionary of files in this backend."""
60         if self.packages is None:
61             self.packages = shelve.open(self.cache_dir.child('packages.db').path)
62
63     def close(self):
64         """Close the persistent dictionary."""
65         if self.packages is not None:
66             self.packages.close()
67
68     def update_file(self, cache_path, file_path):
69         """Check if an updated file needs to be tracked.
70
71         Called from the mirror manager when files get updated so we can update our
72         fake lists and sources.list.
73         """
74         filename = cache_path.split('/')[-1]
75         if filename.lower() in TRACKED_FILES:
76             log.msg("Registering package file: "+cache_path)
77             self.packages[cache_path] = file_path
78             return True
79         return False
80
81     def check_files(self):
82         """Check all files in the database to make sure they exist."""
83         files = self.packages.keys()
84         for f in files:
85             self.packages[f].restat(False)
86             if not self.packages[f].exists():
87                 log.msg("File in packages database has been deleted: "+f)
88                 del self.packages[f]
89
90     # Standard dictionary implementation so this class can be used like a dictionary.
91     def __getitem__(self, key): return self.packages[key]
92     def __setitem__(self, key, item): self.packages[key] = item
93     def __delitem__(self, key): del self.packages[key]
94     def keys(self): return self.packages.keys()
95
96 class AptPackages:
97     """Uses python-apt to answer queries about packages.
98
99     Makes a fake configuration for python-apt for each backend.
100     """
101
102     DEFAULT_APT_CONFIG = {
103         #'APT' : '',
104         #'APT::Architecture' : 'i386',  # Commented so the machine's config will set this
105         #'APT::Default-Release' : 'unstable',
106         'Dir':'.', # /
107         'Dir::State' : 'apt/', # var/lib/apt/
108         'Dir::State::Lists': 'lists/', # lists/
109         #'Dir::State::cdroms' : 'cdroms.list',
110         'Dir::State::userstatus' : 'status.user',
111         'Dir::State::status': 'dpkg/status', # '/var/lib/dpkg/status'
112         'Dir::Cache' : '.apt/cache/', # var/cache/apt/
113         #'Dir::Cache::archives' : 'archives/',
114         'Dir::Cache::srcpkgcache' : 'srcpkgcache.bin',
115         'Dir::Cache::pkgcache' : 'pkgcache.bin',
116         'Dir::Etc' : 'apt/etc/', # etc/apt/
117         'Dir::Etc::sourcelist' : 'sources.list',
118         'Dir::Etc::vendorlist' : 'vendors.list',
119         'Dir::Etc::vendorparts' : 'vendors.list.d',
120         #'Dir::Etc::main' : 'apt.conf',
121         #'Dir::Etc::parts' : 'apt.conf.d',
122         #'Dir::Etc::preferences' : 'preferences',
123         'Dir::Bin' : '',
124         #'Dir::Bin::methods' : '', #'/usr/lib/apt/methods'
125         'Dir::Bin::dpkg' : '/usr/bin/dpkg',
126         #'DPkg' : '',
127         #'DPkg::Pre-Install-Pkgs' : '',
128         #'DPkg::Tools' : '',
129         #'DPkg::Tools::Options' : '',
130         #'DPkg::Tools::Options::/usr/bin/apt-listchanges' : '',
131         #'DPkg::Tools::Options::/usr/bin/apt-listchanges::Version' : '2',
132         #'DPkg::Post-Invoke' : '',
133         }
134     essential_dirs = ('apt', 'apt/cache', 'apt/dpkg', 'apt/etc', 'apt/lists',
135                       'apt/lists/partial')
136     essential_files = ('apt/dpkg/status', 'apt/etc/sources.list',)
137         
138     def __init__(self, cache_dir):
139         """Construct a new packages manager.
140
141         @param cache_dir: cache directory from config file
142         """
143         self.cache_dir = cache_dir
144         self.apt_config = deepcopy(self.DEFAULT_APT_CONFIG)
145
146         for dir in self.essential_dirs:
147             path = self.cache_dir.preauthChild(dir)
148             if not path.exists():
149                 path.makedirs()
150         for file in self.essential_files:
151             path = self.cache_dir.preauthChild(file)
152             if not path.exists():
153                 path.touch()
154                 
155         self.apt_config['Dir'] = self.cache_dir.path
156         self.apt_config['Dir::State::status'] = self.cache_dir.preauthChild(self.apt_config['Dir::State']).preauthChild(self.apt_config['Dir::State::status']).path
157         self.packages = PackageFileList(cache_dir)
158         self.loaded = 0
159         self.loading = None
160         
161     def __del__(self):
162         self.cleanup()
163         self.packages.close()
164         
165     def addRelease(self, cache_path, file_path):
166         """Dirty hack until python-apt supports apt-pkg/indexrecords.h
167         (see Bug #456141)
168         """
169         self.indexrecords[cache_path] = {}
170
171         read_packages = False
172         f = file_path.open('r')
173         
174         rel = deb822.Release(f, fields = ['MD5Sum', 'SHA1', 'SHA256'])
175         for hash_type in rel:
176             for file in rel[hash_type]:
177                 self.indexrecords[cache_path].setdefault(file['name'], {})[hash_type.upper()] = (file[hash_type], file['size'])
178             
179         f.close()
180
181     def file_updated(self, cache_path, file_path):
182         """A file in the backend has changed, manage it.
183         
184         If this affects us, unload our apt database
185         """
186         if self.packages.update_file(cache_path, file_path):
187             self.unload()
188
189     def load(self):
190         """Make sure the package is initialized and loaded."""
191         if self.loading is None:
192             self.loading = threads.deferToThread(self._load)
193             self.loading.addCallback(self.doneLoading)
194         return self.loading
195         
196     def doneLoading(self, loadResult):
197         """Cache is loaded."""
198         self.loading = None
199         # Must pass on the result for the next callback
200         return loadResult
201         
202     def _load(self):
203         """Regenerates the fake configuration and load the packages cache."""
204         if self.loaded: return True
205         apt_pkg.InitSystem()
206         self.cache_dir.preauthChild(self.apt_config['Dir::State']
207                      ).preauthChild(self.apt_config['Dir::State::Lists']).remove()
208         self.cache_dir.preauthChild(self.apt_config['Dir::State']
209                      ).preauthChild(self.apt_config['Dir::State::Lists']
210                      ).child('partial').makedirs()
211         sources_file = self.cache_dir.preauthChild(self.apt_config['Dir::Etc']
212                                ).preauthChild(self.apt_config['Dir::Etc::sourcelist'])
213         sources = sources_file.open('w')
214         sources_count = 0
215         deb_src_added = False
216         self.packages.check_files()
217         self.indexrecords = {}
218         for f in self.packages:
219             # we should probably clear old entries from self.packages and
220             # take into account the recorded mtime as optimization
221             file = self.packages[f]
222             if f.split('/')[-1] == "Release":
223                 self.addRelease(f, file)
224             fake_uri='http://apt-dht'+f
225             fake_dirname = '/'.join(fake_uri.split('/')[:-1])
226             if f.endswith('Sources'):
227                 deb_src_added = True
228                 source_line='deb-src '+fake_dirname+'/ /'
229             else:
230                 source_line='deb '+fake_dirname+'/ /'
231             listpath = self.cache_dir.preauthChild(self.apt_config['Dir::State']
232                                     ).preauthChild(self.apt_config['Dir::State::Lists']
233                                     ).child(apt_pkg.URItoFileName(fake_uri))
234             sources.write(source_line+'\n')
235             log.msg("Sources line: " + source_line)
236             sources_count = sources_count + 1
237
238             if listpath.exists():
239                 #we should empty the directory instead
240                 listpath.remove()
241             os.symlink(file.path, listpath.path)
242         sources.close()
243
244         if sources_count == 0:
245             log.msg("No Packages files available for %s backend"%(self.cache_dir.path))
246             return False
247
248         log.msg("Loading Packages database for "+self.cache_dir.path)
249         for key, value in self.apt_config.items():
250             apt_pkg.Config[key] = value
251
252         self.cache = apt_pkg.GetCache(OpProgress())
253         self.records = apt_pkg.GetPkgRecords(self.cache)
254         if deb_src_added:
255             self.srcrecords = apt_pkg.GetPkgSrcRecords()
256         else:
257             self.srcrecords = None
258
259         self.loaded = 1
260         return True
261
262     def unload(self):
263         """Tries to make the packages server quit."""
264         if self.loaded:
265             del self.cache
266             del self.records
267             del self.srcrecords
268             del self.indexrecords
269             self.loaded = 0
270
271     def cleanup(self):
272         """Cleanup and close any loaded caches."""
273         self.unload()
274         self.packages.close()
275         
276     def findHash(self, path):
277         """Find the hash for a given path in this mirror.
278         
279         Returns a deferred so it can make sure the cache is loaded first.
280         """
281         d = defer.Deferred()
282
283         deferLoad = self.load()
284         deferLoad.addCallback(self._findHash, path, d)
285         deferLoad.addErrback(self._findHash_error, path, d)
286         
287         return d
288
289     def _findHash_error(self, failure, path, d):
290         """An error occurred while trying to find a hash."""
291         log.msg('An error occurred while looking up a hash for: %s' % path)
292         log.err(failure)
293         d.callback(HashObject())
294
295     def _findHash(self, loadResult, path, d):
296         """Really find the hash for a path.
297         
298         Have to pass the returned loadResult on in case other calls to this
299         function are pending.
300         """
301         if not loadResult:
302             d.callback(HashObject())
303             return loadResult
304         
305         # First look for the path in the cache of index files
306         for release in self.indexrecords:
307             if path.startswith(release[:-7]):
308                 for indexFile in self.indexrecords[release]:
309                     if release[:-7] + indexFile == path:
310                         h = HashObject()
311                         h.setFromIndexRecord(self.indexrecords[release][indexFile])
312                         d.callback(h)
313                         return loadResult
314         
315         package = path.split('/')[-1].split('_')[0]
316
317         # Check the binary packages
318         try:
319             for version in self.cache[package].VersionList:
320                 size = version.Size
321                 for verFile in version.FileList:
322                     if self.records.Lookup(verFile):
323                         if '/' + self.records.FileName == path:
324                             h = HashObject()
325                             h.setFromPkgRecord(self.records, size)
326                             d.callback(h)
327                             return loadResult
328         except KeyError:
329             pass
330
331         # Check the source packages' files
332         if self.srcrecords:
333             self.srcrecords.Restart()
334             if self.srcrecords.Lookup(package):
335                 for f in self.srcrecords.Files:
336                     if path == '/' + f[2]:
337                         h = HashObject()
338                         h.setFromSrcRecord(f)
339                         d.callback(h)
340                         return loadResult
341         
342         d.callback(HashObject())
343         return loadResult
344
345 class TestAptPackages(unittest.TestCase):
346     """Unit tests for the AptPackages cache."""
347     
348     pending_calls = []
349     client = None
350     timeout = 10
351     packagesFile = ''
352     sourcesFile = ''
353     releaseFile = ''
354     
355     def setUp(self):
356         self.client = AptPackages(FilePath('/tmp/.apt-dht'))
357     
358         self.packagesFile = os.popen('ls -Sr /var/lib/apt/lists/ | grep -E "_main_.*Packages$" | tail -n 1').read().rstrip('\n')
359         self.sourcesFile = os.popen('ls -Sr /var/lib/apt/lists/ | grep -E "_main_.*Sources$" | tail -n 1').read().rstrip('\n')
360         for f in os.walk('/var/lib/apt/lists').next()[2]:
361             if f[-7:] == "Release" and self.packagesFile.startswith(f[:-7]):
362                 self.releaseFile = f
363                 break
364         
365         self.client.file_updated(self.releaseFile[self.releaseFile.find('_dists_'):].replace('_','/'), 
366                                  FilePath('/var/lib/apt/lists/' + self.releaseFile))
367         self.client.file_updated(self.packagesFile[self.packagesFile.find('_dists_'):].replace('_','/'), 
368                                  FilePath('/var/lib/apt/lists/' + self.packagesFile))
369         self.client.file_updated(self.sourcesFile[self.sourcesFile.find('_dists_'):].replace('_','/'), 
370                                  FilePath('/var/lib/apt/lists/' + self.sourcesFile))
371     
372     def test_pkg_hash(self):
373         self.client._load()
374
375         self.client.records.Lookup(self.client.cache['dpkg'].VersionList[0].FileList[0])
376         
377         pkg_hash = os.popen('grep -A 30 -E "^Package: dpkg$" ' + 
378                             '/var/lib/apt/lists/' + self.packagesFile + 
379                             ' | grep -E "^SHA1:" | head -n 1' + 
380                             ' | cut -d\  -f 2').read().rstrip('\n')
381
382         self.failUnless(self.client.records.SHA1Hash == pkg_hash, 
383                         "Hashes don't match: %s != %s" % (self.client.records.SHA1Hash, pkg_hash))
384
385     def test_src_hash(self):
386         self.client._load()
387
388         self.client.srcrecords.Lookup('dpkg')
389
390         src_hashes = os.popen('grep -A 20 -E "^Package: dpkg$" ' + 
391                             '/var/lib/apt/lists/' + self.sourcesFile + 
392                             ' | grep -A 4 -E "^Files:" | grep -E "^ " ' + 
393                             ' | cut -d\  -f 2').read().split('\n')[:-1]
394
395         for f in self.client.srcrecords.Files:
396             self.failUnless(f[0] in src_hashes, "Couldn't find %s in: %r" % (f[0], src_hashes))
397
398     def test_index_hash(self):
399         self.client._load()
400
401         indexhash = self.client.indexrecords[self.releaseFile[self.releaseFile.find('_dists_'):].replace('_','/')]['main/binary-i386/Packages.bz2']['SHA1'][0]
402
403         idx_hash = os.popen('grep -A 3000 -E "^SHA1:" ' + 
404                             '/var/lib/apt/lists/' + self.releaseFile + 
405                             ' | grep -E " main/binary-i386/Packages.bz2$"'
406                             ' | head -n 1 | cut -d\  -f 2').read().rstrip('\n')
407
408         self.failUnless(indexhash == idx_hash, "Hashes don't match: %s != %s" % (indexhash, idx_hash))
409
410     def verifyHash(self, found_hash, path, true_hash):
411         self.failUnless(found_hash.hexexpected() == true_hash, 
412                     "%s hashes don't match: %s != %s" % (path, found_hash.hexexpected(), true_hash))
413
414     def test_findIndexHash(self):
415         lastDefer = defer.Deferred()
416         
417         idx_hash = os.popen('grep -A 3000 -E "^SHA1:" ' + 
418                             '/var/lib/apt/lists/' + self.releaseFile + 
419                             ' | grep -E " main/binary-i386/Packages.bz2$"'
420                             ' | head -n 1 | cut -d\  -f 2').read().rstrip('\n')
421         idx_path = '/' + self.releaseFile[self.releaseFile.find('_dists_')+1:].replace('_','/')[:-7] + 'main/binary-i386/Packages.bz2'
422
423         d = self.client.findHash(idx_path)
424         d.addCallback(self.verifyHash, idx_path, idx_hash)
425
426         d.addBoth(lastDefer.callback)
427         return lastDefer
428
429     def test_findPkgHash(self):
430         lastDefer = defer.Deferred()
431         
432         pkg_hash = os.popen('grep -A 30 -E "^Package: dpkg$" ' + 
433                             '/var/lib/apt/lists/' + self.packagesFile + 
434                             ' | grep -E "^SHA1:" | head -n 1' + 
435                             ' | cut -d\  -f 2').read().rstrip('\n')
436         pkg_path = '/' + os.popen('grep -A 30 -E "^Package: dpkg$" ' + 
437                             '/var/lib/apt/lists/' + self.packagesFile + 
438                             ' | grep -E "^Filename:" | head -n 1' + 
439                             ' | cut -d\  -f 2').read().rstrip('\n')
440
441         d = self.client.findHash(pkg_path)
442         d.addCallback(self.verifyHash, pkg_path, pkg_hash)
443
444         d.addBoth(lastDefer.callback)
445         return lastDefer
446
447     def test_findSrcHash(self):
448         lastDefer = defer.Deferred()
449         
450         src_dir = '/' + os.popen('grep -A 30 -E "^Package: dpkg$" ' + 
451                             '/var/lib/apt/lists/' + self.sourcesFile + 
452                             ' | grep -E "^Directory:" | head -n 1' + 
453                             ' | cut -d\  -f 2').read().rstrip('\n')
454         src_hashes = os.popen('grep -A 20 -E "^Package: dpkg$" ' + 
455                             '/var/lib/apt/lists/' + self.sourcesFile + 
456                             ' | grep -A 4 -E "^Files:" | grep -E "^ " ' + 
457                             ' | cut -d\  -f 2').read().split('\n')[:-1]
458         src_paths = os.popen('grep -A 20 -E "^Package: dpkg$" ' + 
459                             '/var/lib/apt/lists/' + self.sourcesFile + 
460                             ' | grep -A 4 -E "^Files:" | grep -E "^ " ' + 
461                             ' | cut -d\  -f 4').read().split('\n')[:-1]
462
463         i = choice(range(len(src_hashes)))
464         d = self.client.findHash(src_dir + '/' + src_paths[i])
465         d.addCallback(self.verifyHash, src_dir + '/' + src_paths[i], src_hashes[i])
466             
467         d.addBoth(lastDefer.callback)
468         return lastDefer
469
470     def test_multipleFindHash(self):
471         lastDefer = defer.Deferred()
472         
473         idx_hash = os.popen('grep -A 3000 -E "^SHA1:" ' + 
474                             '/var/lib/apt/lists/' + self.releaseFile + 
475                             ' | grep -E " main/binary-i386/Packages.bz2$"'
476                             ' | head -n 1 | cut -d\  -f 2').read().rstrip('\n')
477         idx_path = '/' + self.releaseFile[self.releaseFile.find('_dists_')+1:].replace('_','/')[:-7] + 'main/binary-i386/Packages.bz2'
478
479         d = self.client.findHash(idx_path)
480         d.addCallback(self.verifyHash, idx_path, idx_hash)
481
482         pkg_hash = os.popen('grep -A 30 -E "^Package: dpkg$" ' + 
483                             '/var/lib/apt/lists/' + self.packagesFile + 
484                             ' | grep -E "^SHA1:" | head -n 1' + 
485                             ' | cut -d\  -f 2').read().rstrip('\n')
486         pkg_path = '/' + os.popen('grep -A 30 -E "^Package: dpkg$" ' + 
487                             '/var/lib/apt/lists/' + self.packagesFile + 
488                             ' | grep -E "^Filename:" | head -n 1' + 
489                             ' | cut -d\  -f 2').read().rstrip('\n')
490
491         d = self.client.findHash(pkg_path)
492         d.addCallback(self.verifyHash, pkg_path, pkg_hash)
493
494         src_dir = '/' + os.popen('grep -A 30 -E "^Package: dpkg$" ' + 
495                             '/var/lib/apt/lists/' + self.sourcesFile + 
496                             ' | grep -E "^Directory:" | head -n 1' + 
497                             ' | cut -d\  -f 2').read().rstrip('\n')
498         src_hashes = os.popen('grep -A 20 -E "^Package: dpkg$" ' + 
499                             '/var/lib/apt/lists/' + self.sourcesFile + 
500                             ' | grep -A 4 -E "^Files:" | grep -E "^ " ' + 
501                             ' | cut -d\  -f 2').read().split('\n')[:-1]
502         src_paths = os.popen('grep -A 20 -E "^Package: dpkg$" ' + 
503                             '/var/lib/apt/lists/' + self.sourcesFile + 
504                             ' | grep -A 4 -E "^Files:" | grep -E "^ " ' + 
505                             ' | cut -d\  -f 4').read().split('\n')[:-1]
506
507         for i in range(len(src_hashes)):
508             d = self.client.findHash(src_dir + '/' + src_paths[i])
509             d.addCallback(self.verifyHash, src_dir + '/' + src_paths[i], src_hashes[i])
510             
511         idx_hash = os.popen('grep -A 3000 -E "^SHA1:" ' + 
512                             '/var/lib/apt/lists/' + self.releaseFile + 
513                             ' | grep -E " main/source/Sources.bz2$"'
514                             ' | head -n 1 | cut -d\  -f 2').read().rstrip('\n')
515         idx_path = '/' + self.releaseFile[self.releaseFile.find('_dists_')+1:].replace('_','/')[:-7] + 'main/source/Sources.bz2'
516
517         d = self.client.findHash(idx_path)
518         d.addCallback(self.verifyHash, idx_path, idx_hash)
519
520         d.addBoth(lastDefer.callback)
521         return lastDefer
522
523     def tearDown(self):
524         for p in self.pending_calls:
525             if p.active():
526                 p.cancel()
527         self.pending_calls = []
528         self.client.cleanup()
529         self.client = None