Scanning cache directories on startup waits for DHT storeValue to return.
[quix0rs-apt-p2p.git] / apt_dht / AptPackages.py
1 # Disable the FutureWarning from the apt module
2 import warnings
3 warnings.simplefilter("ignore", FutureWarning)
4
5 import os, shelve
6 from random import choice
7 from shutil import rmtree
8 from copy import deepcopy
9 from UserDict import DictMixin
10
11 from twisted.internet import threads, defer
12 from twisted.python import log
13 from twisted.python.filepath import FilePath
14 from twisted.trial import unittest
15
16 import apt_pkg, apt_inst
17 from apt import OpProgress
18
19 from Hash import HashObject
20
21 apt_pkg.init()
22
23 TRACKED_FILES = ['release', 'sources', 'packages']
24
25 class PackageFileList(DictMixin):
26     """Manages a list of package files belonging to a backend.
27     
28     @type packages: C{shelve dictionary}
29     @ivar packages: the files stored for this backend
30     """
31     
32     def __init__(self, cache_dir):
33         self.cache_dir = cache_dir
34         self.cache_dir.restat(False)
35         if not self.cache_dir.exists():
36             self.cache_dir.makedirs()
37         self.packages = None
38         self.open()
39
40     def open(self):
41         """Open the persistent dictionary of files in this backend."""
42         if self.packages is None:
43             self.packages = shelve.open(self.cache_dir.child('packages.db').path)
44
45     def close(self):
46         """Close the persistent dictionary."""
47         if self.packages is not None:
48             self.packages.close()
49
50     def update_file(self, cache_path, file_path):
51         """Check if an updated file needs to be tracked.
52
53         Called from the mirror manager when files get updated so we can update our
54         fake lists and sources.list.
55         """
56         filename = cache_path.split('/')[-1]
57         if filename.lower() in TRACKED_FILES:
58             log.msg("Registering package file: "+cache_path)
59             self.packages[cache_path] = file_path
60             return True
61         return False
62
63     def check_files(self):
64         """Check all files in the database to make sure they exist."""
65         files = self.packages.keys()
66         for f in files:
67             self.packages[f].restat(False)
68             if not self.packages[f].exists():
69                 log.msg("File in packages database has been deleted: "+f)
70                 del self.packages[f]
71
72     # Standard dictionary implementation so this class can be used like a dictionary.
73     def __getitem__(self, key): return self.packages[key]
74     def __setitem__(self, key, item): self.packages[key] = item
75     def __delitem__(self, key): del self.packages[key]
76     def keys(self): return self.packages.keys()
77
78 class AptPackages:
79     """Uses python-apt to answer queries about packages.
80
81     Makes a fake configuration for python-apt for each backend.
82     """
83
84     DEFAULT_APT_CONFIG = {
85         #'APT' : '',
86         #'APT::Architecture' : 'i386',  # Commented so the machine's config will set this
87         #'APT::Default-Release' : 'unstable',
88         'Dir':'.', # /
89         'Dir::State' : 'apt/', # var/lib/apt/
90         'Dir::State::Lists': 'lists/', # lists/
91         #'Dir::State::cdroms' : 'cdroms.list',
92         'Dir::State::userstatus' : 'status.user',
93         'Dir::State::status': 'dpkg/status', # '/var/lib/dpkg/status'
94         'Dir::Cache' : '.apt/cache/', # var/cache/apt/
95         #'Dir::Cache::archives' : 'archives/',
96         'Dir::Cache::srcpkgcache' : 'srcpkgcache.bin',
97         'Dir::Cache::pkgcache' : 'pkgcache.bin',
98         'Dir::Etc' : 'apt/etc/', # etc/apt/
99         'Dir::Etc::sourcelist' : 'sources.list',
100         'Dir::Etc::vendorlist' : 'vendors.list',
101         'Dir::Etc::vendorparts' : 'vendors.list.d',
102         #'Dir::Etc::main' : 'apt.conf',
103         #'Dir::Etc::parts' : 'apt.conf.d',
104         #'Dir::Etc::preferences' : 'preferences',
105         'Dir::Bin' : '',
106         #'Dir::Bin::methods' : '', #'/usr/lib/apt/methods'
107         'Dir::Bin::dpkg' : '/usr/bin/dpkg',
108         #'DPkg' : '',
109         #'DPkg::Pre-Install-Pkgs' : '',
110         #'DPkg::Tools' : '',
111         #'DPkg::Tools::Options' : '',
112         #'DPkg::Tools::Options::/usr/bin/apt-listchanges' : '',
113         #'DPkg::Tools::Options::/usr/bin/apt-listchanges::Version' : '2',
114         #'DPkg::Post-Invoke' : '',
115         }
116     essential_dirs = ('apt', 'apt/cache', 'apt/dpkg', 'apt/etc', 'apt/lists',
117                       'apt/lists/partial')
118     essential_files = ('apt/dpkg/status', 'apt/etc/sources.list',)
119         
120     def __init__(self, cache_dir):
121         """Construct a new packages manager.
122
123         @ivar backendName: name of backend associated with this packages file
124         @ivar cache_dir: cache directory from config file
125         """
126         self.cache_dir = cache_dir
127         self.apt_config = deepcopy(self.DEFAULT_APT_CONFIG)
128
129         for dir in self.essential_dirs:
130             path = self.cache_dir.preauthChild(dir)
131             if not path.exists():
132                 path.makedirs()
133         for file in self.essential_files:
134             path = self.cache_dir.preauthChild(file)
135             if not path.exists():
136                 path.touch()
137                 
138         self.apt_config['Dir'] = self.cache_dir.path
139         self.apt_config['Dir::State::status'] = self.cache_dir.preauthChild(self.apt_config['Dir::State']).preauthChild(self.apt_config['Dir::State::status']).path
140         self.packages = PackageFileList(cache_dir)
141         self.loaded = 0
142         self.loading = None
143         
144     def __del__(self):
145         self.cleanup()
146         self.packages.close()
147         
148     def addRelease(self, cache_path, file_path):
149         """Dirty hack until python-apt supports apt-pkg/indexrecords.h
150         (see Bug #456141)
151         """
152         self.indexrecords[cache_path] = {}
153
154         read_packages = False
155         f = file_path.open('r')
156         
157         for line in f:
158             line = line.rstrip()
159     
160             if line[:1] != " ":
161                 read_packages = False
162                 try:
163                     # Read the various headers from the file
164                     h, v = line.split(":", 1)
165                     if h == "MD5Sum" or h == "SHA1" or h == "SHA256":
166                         read_packages = True
167                         hash_type = h
168                 except:
169                     # Bad header line, just ignore it
170                     log.msg("WARNING: Ignoring badly formatted Release line: %s" % line)
171     
172                 # Skip to the next line
173                 continue
174             
175             # Read file names from the multiple hash sections of the file
176             if read_packages:
177                 p = line.split()
178                 self.indexrecords[cache_path].setdefault(p[2], {})[hash_type] = (p[0], p[1])
179         
180         f.close()
181
182     def file_updated(self, cache_path, file_path):
183         """A file in the backend has changed, manage it.
184         
185         If this affects us, unload our apt database
186         """
187         if self.packages.update_file(cache_path, file_path):
188             self.unload()
189
190     def load(self):
191         """Make sure the package is initialized and loaded."""
192         if self.loading is None:
193             self.loading = threads.deferToThread(self._load)
194             self.loading.addCallback(self.doneLoading)
195         return self.loading
196         
197     def doneLoading(self, loadResult):
198         """Cache is loaded."""
199         self.loading = None
200         # Must pass on the result for the next callback
201         return loadResult
202         
203     def _load(self):
204         """Regenerates the fake configuration and load the packages cache."""
205         if self.loaded: return True
206         apt_pkg.InitSystem()
207         self.cache_dir.preauthChild(self.apt_config['Dir::State']
208                      ).preauthChild(self.apt_config['Dir::State::Lists']).remove()
209         self.cache_dir.preauthChild(self.apt_config['Dir::State']
210                      ).preauthChild(self.apt_config['Dir::State::Lists']
211                      ).child('partial').makedirs()
212         sources_file = self.cache_dir.preauthChild(self.apt_config['Dir::Etc']
213                                ).preauthChild(self.apt_config['Dir::Etc::sourcelist'])
214         sources = sources_file.open('w')
215         sources_count = 0
216         deb_src_added = False
217         self.packages.check_files()
218         self.indexrecords = {}
219         for f in self.packages:
220             # we should probably clear old entries from self.packages and
221             # take into account the recorded mtime as optimization
222             file = self.packages[f]
223             if f.split('/')[-1] == "Release":
224                 self.addRelease(f, file)
225             fake_uri='http://apt-dht'+f
226             fake_dirname = '/'.join(fake_uri.split('/')[:-1])
227             if f.endswith('Sources'):
228                 deb_src_added = True
229                 source_line='deb-src '+fake_dirname+'/ /'
230             else:
231                 source_line='deb '+fake_dirname+'/ /'
232             listpath = self.cache_dir.preauthChild(self.apt_config['Dir::State']
233                                     ).preauthChild(self.apt_config['Dir::State::Lists']
234                                     ).child(apt_pkg.URItoFileName(fake_uri))
235             sources.write(source_line+'\n')
236             log.msg("Sources line: " + source_line)
237             sources_count = sources_count + 1
238
239             if listpath.exists():
240                 #we should empty the directory instead
241                 listpath.remove()
242             os.symlink(file.path, listpath.path)
243         sources.close()
244
245         if sources_count == 0:
246             log.msg("No Packages files available for %s backend"%(self.cache_dir.path))
247             return False
248
249         log.msg("Loading Packages database for "+self.cache_dir.path)
250         for key, value in self.apt_config.items():
251             apt_pkg.Config[key] = value
252
253         self.cache = apt_pkg.GetCache(OpProgress())
254         self.records = apt_pkg.GetPkgRecords(self.cache)
255         if deb_src_added:
256             self.srcrecords = apt_pkg.GetPkgSrcRecords()
257         else:
258             self.srcrecords = None
259
260         self.loaded = 1
261         return True
262
263     def unload(self):
264         """Tries to make the packages server quit."""
265         if self.loaded:
266             del self.cache
267             del self.records
268             del self.srcrecords
269             del self.indexrecords
270             self.loaded = 0
271
272     def cleanup(self):
273         """Cleanup and close any loaded caches."""
274         self.unload()
275         self.packages.close()
276         
277     def findHash(self, path):
278         """Find the hash for a given path in this mirror.
279         
280         Returns a deferred so it can make sure the cache is loaded first.
281         """
282         d = defer.Deferred()
283
284         deferLoad = self.load()
285         deferLoad.addCallback(self._findHash, path, d)
286         deferLoad.addErrback(self._findHash_error, path, d)
287         
288         return d
289
290     def _findHash_error(self, failure, path, d):
291         """An error occurred while trying to find a hash."""
292         log.msg('An error occurred while looking up a hash for: %s' % path)
293         log.err(failure)
294         d.callback(HashObject())
295
296     def _findHash(self, loadResult, path, d):
297         """Really find the hash for a path.
298         
299         Have to pass the returned loadResult on in case other calls to this
300         function are pending.
301         """
302         if not loadResult:
303             d.callback(HashObject())
304             return loadResult
305         
306         # First look for the path in the cache of index files
307         for release in self.indexrecords:
308             if path.startswith(release[:-7]):
309                 for indexFile in self.indexrecords[release]:
310                     if release[:-7] + indexFile == path:
311                         h = HashObject()
312                         h.setFromIndexRecord(self.indexrecords[release][indexFile])
313                         d.callback(h)
314                         return loadResult
315         
316         package = path.split('/')[-1].split('_')[0]
317
318         # Check the binary packages
319         try:
320             for version in self.cache[package].VersionList:
321                 size = version.Size
322                 for verFile in version.FileList:
323                     if self.records.Lookup(verFile):
324                         if '/' + self.records.FileName == path:
325                             h = HashObject()
326                             h.setFromPkgRecord(self.records, size)
327                             d.callback(h)
328                             return loadResult
329         except KeyError:
330             pass
331
332         # Check the source packages' files
333         if self.srcrecords:
334             self.srcrecords.Restart()
335             if self.srcrecords.Lookup(package):
336                 for f in self.srcrecords.Files:
337                     if path == '/' + f[2]:
338                         h = HashObject()
339                         h.setFromSrcRecord(f)
340                         d.callback(h)
341                         return loadResult
342         
343         d.callback(HashObject())
344         return loadResult
345
346 class TestAptPackages(unittest.TestCase):
347     """Unit tests for the AptPackages cache."""
348     
349     pending_calls = []
350     client = None
351     timeout = 10
352     packagesFile = ''
353     sourcesFile = ''
354     releaseFile = ''
355     
356     def setUp(self):
357         self.client = AptPackages(FilePath('/tmp/.apt-dht'))
358     
359         self.packagesFile = os.popen('ls -Sr /var/lib/apt/lists/ | grep -E "_main_.*Packages$" | tail -n 1').read().rstrip('\n')
360         self.sourcesFile = os.popen('ls -Sr /var/lib/apt/lists/ | grep -E "_main_.*Sources$" | tail -n 1').read().rstrip('\n')
361         for f in os.walk('/var/lib/apt/lists').next()[2]:
362             if f[-7:] == "Release" and self.packagesFile.startswith(f[:-7]):
363                 self.releaseFile = f
364                 break
365         
366         self.client.file_updated(self.releaseFile[self.releaseFile.find('_dists_'):].replace('_','/'), 
367                                  FilePath('/var/lib/apt/lists/' + self.releaseFile))
368         self.client.file_updated(self.packagesFile[self.packagesFile.find('_dists_'):].replace('_','/'), 
369                                  FilePath('/var/lib/apt/lists/' + self.packagesFile))
370         self.client.file_updated(self.sourcesFile[self.sourcesFile.find('_dists_'):].replace('_','/'), 
371                                  FilePath('/var/lib/apt/lists/' + self.sourcesFile))
372     
373     def test_pkg_hash(self):
374         self.client._load()
375
376         self.client.records.Lookup(self.client.cache['dpkg'].VersionList[0].FileList[0])
377         
378         pkg_hash = os.popen('grep -A 30 -E "^Package: dpkg$" ' + 
379                             '/var/lib/apt/lists/' + self.packagesFile + 
380                             ' | grep -E "^SHA1:" | head -n 1' + 
381                             ' | cut -d\  -f 2').read().rstrip('\n')
382
383         self.failUnless(self.client.records.SHA1Hash == pkg_hash, 
384                         "Hashes don't match: %s != %s" % (self.client.records.SHA1Hash, pkg_hash))
385
386     def test_src_hash(self):
387         self.client._load()
388
389         self.client.srcrecords.Lookup('dpkg')
390
391         src_hashes = os.popen('grep -A 20 -E "^Package: dpkg$" ' + 
392                             '/var/lib/apt/lists/' + self.sourcesFile + 
393                             ' | grep -A 4 -E "^Files:" | grep -E "^ " ' + 
394                             ' | cut -d\  -f 2').read().split('\n')[:-1]
395
396         for f in self.client.srcrecords.Files:
397             self.failUnless(f[0] in src_hashes, "Couldn't find %s in: %r" % (f[0], src_hashes))
398
399     def test_index_hash(self):
400         self.client._load()
401
402         indexhash = self.client.indexrecords[self.releaseFile[self.releaseFile.find('_dists_'):].replace('_','/')]['main/binary-i386/Packages.bz2']['SHA1'][0]
403
404         idx_hash = os.popen('grep -A 3000 -E "^SHA1:" ' + 
405                             '/var/lib/apt/lists/' + self.releaseFile + 
406                             ' | grep -E " main/binary-i386/Packages.bz2$"'
407                             ' | head -n 1 | cut -d\  -f 2').read().rstrip('\n')
408
409         self.failUnless(indexhash == idx_hash, "Hashes don't match: %s != %s" % (indexhash, idx_hash))
410
411     def verifyHash(self, found_hash, path, true_hash):
412         self.failUnless(found_hash.hexexpected() == true_hash, 
413                     "%s hashes don't match: %s != %s" % (path, found_hash.hexexpected(), true_hash))
414
415     def test_findIndexHash(self):
416         lastDefer = defer.Deferred()
417         
418         idx_hash = os.popen('grep -A 3000 -E "^SHA1:" ' + 
419                             '/var/lib/apt/lists/' + self.releaseFile + 
420                             ' | grep -E " main/binary-i386/Packages.bz2$"'
421                             ' | head -n 1 | cut -d\  -f 2').read().rstrip('\n')
422         idx_path = '/' + self.releaseFile[self.releaseFile.find('_dists_')+1:].replace('_','/')[:-7] + 'main/binary-i386/Packages.bz2'
423
424         d = self.client.findHash(idx_path)
425         d.addCallback(self.verifyHash, idx_path, idx_hash)
426
427         d.addBoth(lastDefer.callback)
428         return lastDefer
429
430     def test_findPkgHash(self):
431         lastDefer = defer.Deferred()
432         
433         pkg_hash = os.popen('grep -A 30 -E "^Package: dpkg$" ' + 
434                             '/var/lib/apt/lists/' + self.packagesFile + 
435                             ' | grep -E "^SHA1:" | head -n 1' + 
436                             ' | cut -d\  -f 2').read().rstrip('\n')
437         pkg_path = '/' + os.popen('grep -A 30 -E "^Package: dpkg$" ' + 
438                             '/var/lib/apt/lists/' + self.packagesFile + 
439                             ' | grep -E "^Filename:" | head -n 1' + 
440                             ' | cut -d\  -f 2').read().rstrip('\n')
441
442         d = self.client.findHash(pkg_path)
443         d.addCallback(self.verifyHash, pkg_path, pkg_hash)
444
445         d.addBoth(lastDefer.callback)
446         return lastDefer
447
448     def test_findSrcHash(self):
449         lastDefer = defer.Deferred()
450         
451         src_dir = '/' + os.popen('grep -A 30 -E "^Package: dpkg$" ' + 
452                             '/var/lib/apt/lists/' + self.sourcesFile + 
453                             ' | grep -E "^Directory:" | head -n 1' + 
454                             ' | cut -d\  -f 2').read().rstrip('\n')
455         src_hashes = os.popen('grep -A 20 -E "^Package: dpkg$" ' + 
456                             '/var/lib/apt/lists/' + self.sourcesFile + 
457                             ' | grep -A 4 -E "^Files:" | grep -E "^ " ' + 
458                             ' | cut -d\  -f 2').read().split('\n')[:-1]
459         src_paths = os.popen('grep -A 20 -E "^Package: dpkg$" ' + 
460                             '/var/lib/apt/lists/' + self.sourcesFile + 
461                             ' | grep -A 4 -E "^Files:" | grep -E "^ " ' + 
462                             ' | cut -d\  -f 4').read().split('\n')[:-1]
463
464         i = choice(range(len(src_hashes)))
465         d = self.client.findHash(src_dir + '/' + src_paths[i])
466         d.addCallback(self.verifyHash, src_dir + '/' + src_paths[i], src_hashes[i])
467             
468         d.addBoth(lastDefer.callback)
469         return lastDefer
470
471     def test_multipleFindHash(self):
472         lastDefer = defer.Deferred()
473         
474         idx_hash = os.popen('grep -A 3000 -E "^SHA1:" ' + 
475                             '/var/lib/apt/lists/' + self.releaseFile + 
476                             ' | grep -E " main/binary-i386/Packages.bz2$"'
477                             ' | head -n 1 | cut -d\  -f 2').read().rstrip('\n')
478         idx_path = '/' + self.releaseFile[self.releaseFile.find('_dists_')+1:].replace('_','/')[:-7] + 'main/binary-i386/Packages.bz2'
479
480         d = self.client.findHash(idx_path)
481         d.addCallback(self.verifyHash, idx_path, idx_hash)
482
483         pkg_hash = os.popen('grep -A 30 -E "^Package: dpkg$" ' + 
484                             '/var/lib/apt/lists/' + self.packagesFile + 
485                             ' | grep -E "^SHA1:" | head -n 1' + 
486                             ' | cut -d\  -f 2').read().rstrip('\n')
487         pkg_path = '/' + os.popen('grep -A 30 -E "^Package: dpkg$" ' + 
488                             '/var/lib/apt/lists/' + self.packagesFile + 
489                             ' | grep -E "^Filename:" | head -n 1' + 
490                             ' | cut -d\  -f 2').read().rstrip('\n')
491
492         d = self.client.findHash(pkg_path)
493         d.addCallback(self.verifyHash, pkg_path, pkg_hash)
494
495         src_dir = '/' + os.popen('grep -A 30 -E "^Package: dpkg$" ' + 
496                             '/var/lib/apt/lists/' + self.sourcesFile + 
497                             ' | grep -E "^Directory:" | head -n 1' + 
498                             ' | cut -d\  -f 2').read().rstrip('\n')
499         src_hashes = os.popen('grep -A 20 -E "^Package: dpkg$" ' + 
500                             '/var/lib/apt/lists/' + self.sourcesFile + 
501                             ' | grep -A 4 -E "^Files:" | grep -E "^ " ' + 
502                             ' | cut -d\  -f 2').read().split('\n')[:-1]
503         src_paths = os.popen('grep -A 20 -E "^Package: dpkg$" ' + 
504                             '/var/lib/apt/lists/' + self.sourcesFile + 
505                             ' | grep -A 4 -E "^Files:" | grep -E "^ " ' + 
506                             ' | cut -d\  -f 4').read().split('\n')[:-1]
507
508         for i in range(len(src_hashes)):
509             d = self.client.findHash(src_dir + '/' + src_paths[i])
510             d.addCallback(self.verifyHash, src_dir + '/' + src_paths[i], src_hashes[i])
511             
512         idx_hash = os.popen('grep -A 3000 -E "^SHA1:" ' + 
513                             '/var/lib/apt/lists/' + self.releaseFile + 
514                             ' | grep -E " main/source/Sources.bz2$"'
515                             ' | head -n 1 | cut -d\  -f 2').read().rstrip('\n')
516         idx_path = '/' + self.releaseFile[self.releaseFile.find('_dists_')+1:].replace('_','/')[:-7] + 'main/source/Sources.bz2'
517
518         d = self.client.findHash(idx_path)
519         d.addCallback(self.verifyHash, idx_path, idx_hash)
520
521         d.addBoth(lastDefer.callback)
522         return lastDefer
523
524     def tearDown(self):
525         for p in self.pending_calls:
526             if p.active():
527                 p.cancel()
528         self.pending_calls = []
529         self.client.cleanup()
530         self.client = None