Remove the originated time from the DHT value storage.
[quix0rs-apt-p2p.git] / apt_dht / AptPackages.py
1 # Disable the FutureWarning from the apt module
2 import warnings
3 warnings.simplefilter("ignore", FutureWarning)
4
5 import os, shelve
6 from random import choice
7 from shutil import rmtree
8 from copy import deepcopy
9 from UserDict import DictMixin
10
11 from twisted.internet import threads, defer
12 from twisted.python import log
13 from twisted.python.filepath import FilePath
14 from twisted.trial import unittest
15
16 import apt_pkg, apt_inst
17 from apt import OpProgress
18 from debian_bundle import deb822
19
20 from Hash import HashObject
21
22 apt_pkg.init()
23
24 TRACKED_FILES = ['release', 'sources', 'packages']
25
26 class PackageFileList(DictMixin):
27     """Manages a list of package files belonging to a backend.
28     
29     @type packages: C{shelve dictionary}
30     @ivar packages: the files stored for this backend
31     """
32     
33     def __init__(self, cache_dir):
34         self.cache_dir = cache_dir
35         self.cache_dir.restat(False)
36         if not self.cache_dir.exists():
37             self.cache_dir.makedirs()
38         self.packages = None
39         self.open()
40
41     def open(self):
42         """Open the persistent dictionary of files in this backend."""
43         if self.packages is None:
44             self.packages = shelve.open(self.cache_dir.child('packages.db').path)
45
46     def close(self):
47         """Close the persistent dictionary."""
48         if self.packages is not None:
49             self.packages.close()
50
51     def update_file(self, cache_path, file_path):
52         """Check if an updated file needs to be tracked.
53
54         Called from the mirror manager when files get updated so we can update our
55         fake lists and sources.list.
56         """
57         filename = cache_path.split('/')[-1]
58         if filename.lower() in TRACKED_FILES:
59             log.msg("Registering package file: "+cache_path)
60             self.packages[cache_path] = file_path
61             return True
62         return False
63
64     def check_files(self):
65         """Check all files in the database to make sure they exist."""
66         files = self.packages.keys()
67         for f in files:
68             self.packages[f].restat(False)
69             if not self.packages[f].exists():
70                 log.msg("File in packages database has been deleted: "+f)
71                 del self.packages[f]
72
73     # Standard dictionary implementation so this class can be used like a dictionary.
74     def __getitem__(self, key): return self.packages[key]
75     def __setitem__(self, key, item): self.packages[key] = item
76     def __delitem__(self, key): del self.packages[key]
77     def keys(self): return self.packages.keys()
78
79 class AptPackages:
80     """Uses python-apt to answer queries about packages.
81
82     Makes a fake configuration for python-apt for each backend.
83     """
84
85     DEFAULT_APT_CONFIG = {
86         #'APT' : '',
87         #'APT::Architecture' : 'i386',  # Commented so the machine's config will set this
88         #'APT::Default-Release' : 'unstable',
89         'Dir':'.', # /
90         'Dir::State' : 'apt/', # var/lib/apt/
91         'Dir::State::Lists': 'lists/', # lists/
92         #'Dir::State::cdroms' : 'cdroms.list',
93         'Dir::State::userstatus' : 'status.user',
94         'Dir::State::status': 'dpkg/status', # '/var/lib/dpkg/status'
95         'Dir::Cache' : '.apt/cache/', # var/cache/apt/
96         #'Dir::Cache::archives' : 'archives/',
97         'Dir::Cache::srcpkgcache' : 'srcpkgcache.bin',
98         'Dir::Cache::pkgcache' : 'pkgcache.bin',
99         'Dir::Etc' : 'apt/etc/', # etc/apt/
100         'Dir::Etc::sourcelist' : 'sources.list',
101         'Dir::Etc::vendorlist' : 'vendors.list',
102         'Dir::Etc::vendorparts' : 'vendors.list.d',
103         #'Dir::Etc::main' : 'apt.conf',
104         #'Dir::Etc::parts' : 'apt.conf.d',
105         #'Dir::Etc::preferences' : 'preferences',
106         'Dir::Bin' : '',
107         #'Dir::Bin::methods' : '', #'/usr/lib/apt/methods'
108         'Dir::Bin::dpkg' : '/usr/bin/dpkg',
109         #'DPkg' : '',
110         #'DPkg::Pre-Install-Pkgs' : '',
111         #'DPkg::Tools' : '',
112         #'DPkg::Tools::Options' : '',
113         #'DPkg::Tools::Options::/usr/bin/apt-listchanges' : '',
114         #'DPkg::Tools::Options::/usr/bin/apt-listchanges::Version' : '2',
115         #'DPkg::Post-Invoke' : '',
116         }
117     essential_dirs = ('apt', 'apt/cache', 'apt/dpkg', 'apt/etc', 'apt/lists',
118                       'apt/lists/partial')
119     essential_files = ('apt/dpkg/status', 'apt/etc/sources.list',)
120         
121     def __init__(self, cache_dir):
122         """Construct a new packages manager.
123
124         @param cache_dir: cache directory from config file
125         """
126         self.cache_dir = cache_dir
127         self.apt_config = deepcopy(self.DEFAULT_APT_CONFIG)
128
129         for dir in self.essential_dirs:
130             path = self.cache_dir.preauthChild(dir)
131             if not path.exists():
132                 path.makedirs()
133         for file in self.essential_files:
134             path = self.cache_dir.preauthChild(file)
135             if not path.exists():
136                 path.touch()
137                 
138         self.apt_config['Dir'] = self.cache_dir.path
139         self.apt_config['Dir::State::status'] = self.cache_dir.preauthChild(self.apt_config['Dir::State']).preauthChild(self.apt_config['Dir::State::status']).path
140         self.packages = PackageFileList(cache_dir)
141         self.loaded = 0
142         self.loading = None
143         
144     def __del__(self):
145         self.cleanup()
146         self.packages.close()
147         
148     def addRelease(self, cache_path, file_path):
149         """Dirty hack until python-apt supports apt-pkg/indexrecords.h
150         (see Bug #456141)
151         """
152         self.indexrecords[cache_path] = {}
153
154         read_packages = False
155         f = file_path.open('r')
156         
157         rel = deb822.Release(f, fields = ['MD5Sum', 'SHA1', 'SHA256'])
158         for hash_type in rel:
159             for file in rel[hash_type]:
160                 self.indexrecords[cache_path].setdefault(file['name'], {})[hash_type.upper()] = (file[hash_type], file['size'])
161             
162         f.close()
163
164     def file_updated(self, cache_path, file_path):
165         """A file in the backend has changed, manage it.
166         
167         If this affects us, unload our apt database
168         """
169         if self.packages.update_file(cache_path, file_path):
170             self.unload()
171
172     def load(self):
173         """Make sure the package is initialized and loaded."""
174         if self.loading is None:
175             self.loading = threads.deferToThread(self._load)
176             self.loading.addCallback(self.doneLoading)
177         return self.loading
178         
179     def doneLoading(self, loadResult):
180         """Cache is loaded."""
181         self.loading = None
182         # Must pass on the result for the next callback
183         return loadResult
184         
185     def _load(self):
186         """Regenerates the fake configuration and load the packages cache."""
187         if self.loaded: return True
188         apt_pkg.InitSystem()
189         self.cache_dir.preauthChild(self.apt_config['Dir::State']
190                      ).preauthChild(self.apt_config['Dir::State::Lists']).remove()
191         self.cache_dir.preauthChild(self.apt_config['Dir::State']
192                      ).preauthChild(self.apt_config['Dir::State::Lists']
193                      ).child('partial').makedirs()
194         sources_file = self.cache_dir.preauthChild(self.apt_config['Dir::Etc']
195                                ).preauthChild(self.apt_config['Dir::Etc::sourcelist'])
196         sources = sources_file.open('w')
197         sources_count = 0
198         deb_src_added = False
199         self.packages.check_files()
200         self.indexrecords = {}
201         for f in self.packages:
202             # we should probably clear old entries from self.packages and
203             # take into account the recorded mtime as optimization
204             file = self.packages[f]
205             if f.split('/')[-1] == "Release":
206                 self.addRelease(f, file)
207             fake_uri='http://apt-dht'+f
208             fake_dirname = '/'.join(fake_uri.split('/')[:-1])
209             if f.endswith('Sources'):
210                 deb_src_added = True
211                 source_line='deb-src '+fake_dirname+'/ /'
212             else:
213                 source_line='deb '+fake_dirname+'/ /'
214             listpath = self.cache_dir.preauthChild(self.apt_config['Dir::State']
215                                     ).preauthChild(self.apt_config['Dir::State::Lists']
216                                     ).child(apt_pkg.URItoFileName(fake_uri))
217             sources.write(source_line+'\n')
218             log.msg("Sources line: " + source_line)
219             sources_count = sources_count + 1
220
221             if listpath.exists():
222                 #we should empty the directory instead
223                 listpath.remove()
224             os.symlink(file.path, listpath.path)
225         sources.close()
226
227         if sources_count == 0:
228             log.msg("No Packages files available for %s backend"%(self.cache_dir.path))
229             return False
230
231         log.msg("Loading Packages database for "+self.cache_dir.path)
232         for key, value in self.apt_config.items():
233             apt_pkg.Config[key] = value
234
235         self.cache = apt_pkg.GetCache(OpProgress())
236         self.records = apt_pkg.GetPkgRecords(self.cache)
237         if deb_src_added:
238             self.srcrecords = apt_pkg.GetPkgSrcRecords()
239         else:
240             self.srcrecords = None
241
242         self.loaded = 1
243         return True
244
245     def unload(self):
246         """Tries to make the packages server quit."""
247         if self.loaded:
248             del self.cache
249             del self.records
250             del self.srcrecords
251             del self.indexrecords
252             self.loaded = 0
253
254     def cleanup(self):
255         """Cleanup and close any loaded caches."""
256         self.unload()
257         self.packages.close()
258         
259     def findHash(self, path):
260         """Find the hash for a given path in this mirror.
261         
262         Returns a deferred so it can make sure the cache is loaded first.
263         """
264         d = defer.Deferred()
265
266         deferLoad = self.load()
267         deferLoad.addCallback(self._findHash, path, d)
268         deferLoad.addErrback(self._findHash_error, path, d)
269         
270         return d
271
272     def _findHash_error(self, failure, path, d):
273         """An error occurred while trying to find a hash."""
274         log.msg('An error occurred while looking up a hash for: %s' % path)
275         log.err(failure)
276         d.callback(HashObject())
277
278     def _findHash(self, loadResult, path, d):
279         """Really find the hash for a path.
280         
281         Have to pass the returned loadResult on in case other calls to this
282         function are pending.
283         """
284         if not loadResult:
285             d.callback(HashObject())
286             return loadResult
287         
288         # First look for the path in the cache of index files
289         for release in self.indexrecords:
290             if path.startswith(release[:-7]):
291                 for indexFile in self.indexrecords[release]:
292                     if release[:-7] + indexFile == path:
293                         h = HashObject()
294                         h.setFromIndexRecord(self.indexrecords[release][indexFile])
295                         d.callback(h)
296                         return loadResult
297         
298         package = path.split('/')[-1].split('_')[0]
299
300         # Check the binary packages
301         try:
302             for version in self.cache[package].VersionList:
303                 size = version.Size
304                 for verFile in version.FileList:
305                     if self.records.Lookup(verFile):
306                         if '/' + self.records.FileName == path:
307                             h = HashObject()
308                             h.setFromPkgRecord(self.records, size)
309                             d.callback(h)
310                             return loadResult
311         except KeyError:
312             pass
313
314         # Check the source packages' files
315         if self.srcrecords:
316             self.srcrecords.Restart()
317             if self.srcrecords.Lookup(package):
318                 for f in self.srcrecords.Files:
319                     if path == '/' + f[2]:
320                         h = HashObject()
321                         h.setFromSrcRecord(f)
322                         d.callback(h)
323                         return loadResult
324         
325         d.callback(HashObject())
326         return loadResult
327
328 class TestAptPackages(unittest.TestCase):
329     """Unit tests for the AptPackages cache."""
330     
331     pending_calls = []
332     client = None
333     timeout = 10
334     packagesFile = ''
335     sourcesFile = ''
336     releaseFile = ''
337     
338     def setUp(self):
339         self.client = AptPackages(FilePath('/tmp/.apt-dht'))
340     
341         self.packagesFile = os.popen('ls -Sr /var/lib/apt/lists/ | grep -E "_main_.*Packages$" | tail -n 1').read().rstrip('\n')
342         self.sourcesFile = os.popen('ls -Sr /var/lib/apt/lists/ | grep -E "_main_.*Sources$" | tail -n 1').read().rstrip('\n')
343         for f in os.walk('/var/lib/apt/lists').next()[2]:
344             if f[-7:] == "Release" and self.packagesFile.startswith(f[:-7]):
345                 self.releaseFile = f
346                 break
347         
348         self.client.file_updated(self.releaseFile[self.releaseFile.find('_dists_'):].replace('_','/'), 
349                                  FilePath('/var/lib/apt/lists/' + self.releaseFile))
350         self.client.file_updated(self.packagesFile[self.packagesFile.find('_dists_'):].replace('_','/'), 
351                                  FilePath('/var/lib/apt/lists/' + self.packagesFile))
352         self.client.file_updated(self.sourcesFile[self.sourcesFile.find('_dists_'):].replace('_','/'), 
353                                  FilePath('/var/lib/apt/lists/' + self.sourcesFile))
354     
355     def test_pkg_hash(self):
356         self.client._load()
357
358         self.client.records.Lookup(self.client.cache['dpkg'].VersionList[0].FileList[0])
359         
360         pkg_hash = os.popen('grep -A 30 -E "^Package: dpkg$" ' + 
361                             '/var/lib/apt/lists/' + self.packagesFile + 
362                             ' | grep -E "^SHA1:" | head -n 1' + 
363                             ' | cut -d\  -f 2').read().rstrip('\n')
364
365         self.failUnless(self.client.records.SHA1Hash == pkg_hash, 
366                         "Hashes don't match: %s != %s" % (self.client.records.SHA1Hash, pkg_hash))
367
368     def test_src_hash(self):
369         self.client._load()
370
371         self.client.srcrecords.Lookup('dpkg')
372
373         src_hashes = os.popen('grep -A 20 -E "^Package: dpkg$" ' + 
374                             '/var/lib/apt/lists/' + self.sourcesFile + 
375                             ' | grep -A 4 -E "^Files:" | grep -E "^ " ' + 
376                             ' | cut -d\  -f 2').read().split('\n')[:-1]
377
378         for f in self.client.srcrecords.Files:
379             self.failUnless(f[0] in src_hashes, "Couldn't find %s in: %r" % (f[0], src_hashes))
380
381     def test_index_hash(self):
382         self.client._load()
383
384         indexhash = self.client.indexrecords[self.releaseFile[self.releaseFile.find('_dists_'):].replace('_','/')]['main/binary-i386/Packages.bz2']['SHA1'][0]
385
386         idx_hash = os.popen('grep -A 3000 -E "^SHA1:" ' + 
387                             '/var/lib/apt/lists/' + self.releaseFile + 
388                             ' | grep -E " main/binary-i386/Packages.bz2$"'
389                             ' | head -n 1 | cut -d\  -f 2').read().rstrip('\n')
390
391         self.failUnless(indexhash == idx_hash, "Hashes don't match: %s != %s" % (indexhash, idx_hash))
392
393     def verifyHash(self, found_hash, path, true_hash):
394         self.failUnless(found_hash.hexexpected() == true_hash, 
395                     "%s hashes don't match: %s != %s" % (path, found_hash.hexexpected(), true_hash))
396
397     def test_findIndexHash(self):
398         lastDefer = defer.Deferred()
399         
400         idx_hash = os.popen('grep -A 3000 -E "^SHA1:" ' + 
401                             '/var/lib/apt/lists/' + self.releaseFile + 
402                             ' | grep -E " main/binary-i386/Packages.bz2$"'
403                             ' | head -n 1 | cut -d\  -f 2').read().rstrip('\n')
404         idx_path = '/' + self.releaseFile[self.releaseFile.find('_dists_')+1:].replace('_','/')[:-7] + 'main/binary-i386/Packages.bz2'
405
406         d = self.client.findHash(idx_path)
407         d.addCallback(self.verifyHash, idx_path, idx_hash)
408
409         d.addBoth(lastDefer.callback)
410         return lastDefer
411
412     def test_findPkgHash(self):
413         lastDefer = defer.Deferred()
414         
415         pkg_hash = os.popen('grep -A 30 -E "^Package: dpkg$" ' + 
416                             '/var/lib/apt/lists/' + self.packagesFile + 
417                             ' | grep -E "^SHA1:" | head -n 1' + 
418                             ' | cut -d\  -f 2').read().rstrip('\n')
419         pkg_path = '/' + os.popen('grep -A 30 -E "^Package: dpkg$" ' + 
420                             '/var/lib/apt/lists/' + self.packagesFile + 
421                             ' | grep -E "^Filename:" | head -n 1' + 
422                             ' | cut -d\  -f 2').read().rstrip('\n')
423
424         d = self.client.findHash(pkg_path)
425         d.addCallback(self.verifyHash, pkg_path, pkg_hash)
426
427         d.addBoth(lastDefer.callback)
428         return lastDefer
429
430     def test_findSrcHash(self):
431         lastDefer = defer.Deferred()
432         
433         src_dir = '/' + os.popen('grep -A 30 -E "^Package: dpkg$" ' + 
434                             '/var/lib/apt/lists/' + self.sourcesFile + 
435                             ' | grep -E "^Directory:" | head -n 1' + 
436                             ' | cut -d\  -f 2').read().rstrip('\n')
437         src_hashes = os.popen('grep -A 20 -E "^Package: dpkg$" ' + 
438                             '/var/lib/apt/lists/' + self.sourcesFile + 
439                             ' | grep -A 4 -E "^Files:" | grep -E "^ " ' + 
440                             ' | cut -d\  -f 2').read().split('\n')[:-1]
441         src_paths = os.popen('grep -A 20 -E "^Package: dpkg$" ' + 
442                             '/var/lib/apt/lists/' + self.sourcesFile + 
443                             ' | grep -A 4 -E "^Files:" | grep -E "^ " ' + 
444                             ' | cut -d\  -f 4').read().split('\n')[:-1]
445
446         i = choice(range(len(src_hashes)))
447         d = self.client.findHash(src_dir + '/' + src_paths[i])
448         d.addCallback(self.verifyHash, src_dir + '/' + src_paths[i], src_hashes[i])
449             
450         d.addBoth(lastDefer.callback)
451         return lastDefer
452
453     def test_multipleFindHash(self):
454         lastDefer = defer.Deferred()
455         
456         idx_hash = os.popen('grep -A 3000 -E "^SHA1:" ' + 
457                             '/var/lib/apt/lists/' + self.releaseFile + 
458                             ' | grep -E " main/binary-i386/Packages.bz2$"'
459                             ' | head -n 1 | cut -d\  -f 2').read().rstrip('\n')
460         idx_path = '/' + self.releaseFile[self.releaseFile.find('_dists_')+1:].replace('_','/')[:-7] + 'main/binary-i386/Packages.bz2'
461
462         d = self.client.findHash(idx_path)
463         d.addCallback(self.verifyHash, idx_path, idx_hash)
464
465         pkg_hash = os.popen('grep -A 30 -E "^Package: dpkg$" ' + 
466                             '/var/lib/apt/lists/' + self.packagesFile + 
467                             ' | grep -E "^SHA1:" | head -n 1' + 
468                             ' | cut -d\  -f 2').read().rstrip('\n')
469         pkg_path = '/' + os.popen('grep -A 30 -E "^Package: dpkg$" ' + 
470                             '/var/lib/apt/lists/' + self.packagesFile + 
471                             ' | grep -E "^Filename:" | head -n 1' + 
472                             ' | cut -d\  -f 2').read().rstrip('\n')
473
474         d = self.client.findHash(pkg_path)
475         d.addCallback(self.verifyHash, pkg_path, pkg_hash)
476
477         src_dir = '/' + os.popen('grep -A 30 -E "^Package: dpkg$" ' + 
478                             '/var/lib/apt/lists/' + self.sourcesFile + 
479                             ' | grep -E "^Directory:" | head -n 1' + 
480                             ' | cut -d\  -f 2').read().rstrip('\n')
481         src_hashes = os.popen('grep -A 20 -E "^Package: dpkg$" ' + 
482                             '/var/lib/apt/lists/' + self.sourcesFile + 
483                             ' | grep -A 4 -E "^Files:" | grep -E "^ " ' + 
484                             ' | cut -d\  -f 2').read().split('\n')[:-1]
485         src_paths = os.popen('grep -A 20 -E "^Package: dpkg$" ' + 
486                             '/var/lib/apt/lists/' + self.sourcesFile + 
487                             ' | grep -A 4 -E "^Files:" | grep -E "^ " ' + 
488                             ' | cut -d\  -f 4').read().split('\n')[:-1]
489
490         for i in range(len(src_hashes)):
491             d = self.client.findHash(src_dir + '/' + src_paths[i])
492             d.addCallback(self.verifyHash, src_dir + '/' + src_paths[i], src_hashes[i])
493             
494         idx_hash = os.popen('grep -A 3000 -E "^SHA1:" ' + 
495                             '/var/lib/apt/lists/' + self.releaseFile + 
496                             ' | grep -E " main/source/Sources.bz2$"'
497                             ' | head -n 1 | cut -d\  -f 2').read().rstrip('\n')
498         idx_path = '/' + self.releaseFile[self.releaseFile.find('_dists_')+1:].replace('_','/')[:-7] + 'main/source/Sources.bz2'
499
500         d = self.client.findHash(idx_path)
501         d.addCallback(self.verifyHash, idx_path, idx_hash)
502
503         d.addBoth(lastDefer.callback)
504         return lastDefer
505
506     def tearDown(self):
507         for p in self.pending_calls:
508             if p.active():
509                 p.cancel()
510         self.pending_calls = []
511         self.client.cleanup()
512         self.client = None