Fix an error that occurred when there were no Sources in AptPackages.
[quix0rs-apt-p2p.git] / apt_dht / AptPackages.py
1 # Disable the FutureWarning from the apt module
2 import warnings
3 warnings.simplefilter("ignore", FutureWarning)
4
5 import os, shelve
6 from random import choice
7 from shutil import rmtree
8 from copy import deepcopy
9 from UserDict import DictMixin
10
11 from twisted.internet import threads, defer
12 from twisted.python import log
13 from twisted.trial import unittest
14
15 import apt_pkg, apt_inst
16 from apt import OpProgress
17
18 apt_pkg.init()
19
20 class PackageFileList(DictMixin):
21     """Manages a list of package files belonging to a backend.
22     
23     @type packages: C{shelve dictionary}
24     @ivar packages: the files stored for this backend
25     """
26     
27     def __init__(self, cache_dir):
28         self.cache_dir = cache_dir
29         if not os.path.exists(self.cache_dir):
30             os.makedirs(self.cache_dir)
31         self.packages = None
32         self.open()
33
34     def open(self):
35         """Open the persistent dictionary of files in this backend."""
36         if self.packages is None:
37             self.packages = shelve.open(self.cache_dir+'/packages.db')
38
39     def close(self):
40         """Close the persistent dictionary."""
41         if self.packages is not None:
42             self.packages.close()
43
44     def update_file(self, cache_path, file_path):
45         """Check if an updated file needs to be tracked.
46
47         Called from the mirror manager when files get updated so we can update our
48         fake lists and sources.list.
49         """
50         filename = cache_path.split('/')[-1]
51         if filename=="Packages" or filename=="Release" or filename=="Sources":
52             log.msg("Registering package file: "+cache_path)
53             self.packages[cache_path] = file_path
54             return True
55         return False
56
57     def check_files(self):
58         """Check all files in the database to make sure they exist."""
59         files = self.packages.keys()
60         for f in files:
61             if not os.path.exists(self.packages[f]):
62                 log.msg("File in packages database has been deleted: "+f)
63                 del self.packages[f]
64
65     # Standard dictionary implementation so this class can be used like a dictionary.
66     def __getitem__(self, key): return self.packages[key]
67     def __setitem__(self, key, item): self.packages[key] = item
68     def __delitem__(self, key): del self.packages[key]
69     def keys(self): return self.packages.keys()
70
71 class AptPackages:
72     """Uses python-apt to answer queries about packages.
73
74     Makes a fake configuration for python-apt for each backend.
75     """
76
77     DEFAULT_APT_CONFIG = {
78         #'APT' : '',
79         #'APT::Architecture' : 'i386',  # Commented so the machine's config will set this
80         #'APT::Default-Release' : 'unstable',
81         'Dir':'.', # /
82         'Dir::State' : 'apt/', # var/lib/apt/
83         'Dir::State::Lists': 'lists/', # lists/
84         #'Dir::State::cdroms' : 'cdroms.list',
85         'Dir::State::userstatus' : 'status.user',
86         'Dir::State::status': 'dpkg/status', # '/var/lib/dpkg/status'
87         'Dir::Cache' : '.apt/cache/', # var/cache/apt/
88         #'Dir::Cache::archives' : 'archives/',
89         'Dir::Cache::srcpkgcache' : 'srcpkgcache.bin',
90         'Dir::Cache::pkgcache' : 'pkgcache.bin',
91         'Dir::Etc' : 'apt/etc/', # etc/apt/
92         'Dir::Etc::sourcelist' : 'sources.list',
93         'Dir::Etc::vendorlist' : 'vendors.list',
94         'Dir::Etc::vendorparts' : 'vendors.list.d',
95         #'Dir::Etc::main' : 'apt.conf',
96         #'Dir::Etc::parts' : 'apt.conf.d',
97         #'Dir::Etc::preferences' : 'preferences',
98         'Dir::Bin' : '',
99         #'Dir::Bin::methods' : '', #'/usr/lib/apt/methods'
100         'Dir::Bin::dpkg' : '/usr/bin/dpkg',
101         #'DPkg' : '',
102         #'DPkg::Pre-Install-Pkgs' : '',
103         #'DPkg::Tools' : '',
104         #'DPkg::Tools::Options' : '',
105         #'DPkg::Tools::Options::/usr/bin/apt-listchanges' : '',
106         #'DPkg::Tools::Options::/usr/bin/apt-listchanges::Version' : '2',
107         #'DPkg::Post-Invoke' : '',
108         }
109     essential_dirs = ('apt', 'apt/cache', 'apt/dpkg', 'apt/etc', 'apt/lists',
110                       'apt/lists/partial')
111     essential_files = ('apt/dpkg/status', 'apt/etc/sources.list',)
112         
113     def __init__(self, cache_dir):
114         """Construct a new packages manager.
115
116         @ivar backendName: name of backend associated with this packages file
117         @ivar cache_dir: cache directory from config file
118         """
119         self.cache_dir = cache_dir
120         self.apt_config = deepcopy(self.DEFAULT_APT_CONFIG)
121
122         for dir in self.essential_dirs:
123             path = os.path.join(self.cache_dir, dir)
124             if not os.path.exists(path):
125                 os.makedirs(path)
126         for file in self.essential_files:
127             path = os.path.join(self.cache_dir, file)
128             if not os.path.exists(path):
129                 f = open(path,'w')
130                 f.close()
131                 del f
132                 
133         self.apt_config['Dir'] = self.cache_dir
134         self.apt_config['Dir::State::status'] = os.path.join(self.cache_dir, 
135                       self.apt_config['Dir::State'], self.apt_config['Dir::State::status'])
136         self.packages = PackageFileList(cache_dir)
137         self.loaded = 0
138         self.loading = None
139         
140     def __del__(self):
141         self.cleanup()
142         self.packages.close()
143         
144     def addRelease(self, cache_path, file_path):
145         """Dirty hack until python-apt supports apt-pkg/indexrecords.h
146         (see Bug #456141)
147         """
148         self.indexrecords[cache_path] = {}
149
150         read_packages = False
151         f = open(file_path, 'r')
152         
153         for line in f:
154             line = line.rstrip()
155     
156             if line[:1] != " ":
157                 read_packages = False
158                 try:
159                     # Read the various headers from the file
160                     h, v = line.split(":", 1)
161                     if h == "MD5Sum" or h == "SHA1" or h == "SHA256":
162                         read_packages = True
163                         hash_type = h
164                 except:
165                     # Bad header line, just ignore it
166                     log.msg("WARNING: Ignoring badly formatted Release line: %s" % line)
167     
168                 # Skip to the next line
169                 continue
170             
171             # Read file names from the multiple hash sections of the file
172             if read_packages:
173                 p = line.split()
174                 self.indexrecords[cache_path].setdefault(p[2], {})[hash_type] = (p[0], p[1])
175         
176         f.close()
177
178     def file_updated(self, cache_path, file_path):
179         """A file in the backend has changed, manage it.
180         
181         If this affects us, unload our apt database
182         """
183         if self.packages.update_file(cache_path, file_path):
184             self.unload()
185
186     def load(self):
187         """Make sure the package is initialized and loaded."""
188         if self.loading is None:
189             self.loading = threads.deferToThread(self._load)
190             self.loading.addCallback(self.doneLoading)
191         return self.loading
192         
193     def doneLoading(self, loadResult):
194         """Cache is loaded."""
195         self.loading = None
196         # Must pass on the result for the next callback
197         return loadResult
198         
199     def _load(self):
200         """Regenerates the fake configuration and load the packages cache."""
201         if self.loaded: return True
202         apt_pkg.InitSystem()
203         rmtree(os.path.join(self.cache_dir, self.apt_config['Dir::State'], 
204                             self.apt_config['Dir::State::Lists']))
205         os.makedirs(os.path.join(self.cache_dir, self.apt_config['Dir::State'], 
206                                  self.apt_config['Dir::State::Lists'], 'partial'))
207         sources_filename = os.path.join(self.cache_dir, self.apt_config['Dir::Etc'], 
208                                         self.apt_config['Dir::Etc::sourcelist'])
209         sources = open(sources_filename, 'w')
210         sources_count = 0
211         deb_src_added = False
212         self.packages.check_files()
213         self.indexrecords = {}
214         for f in self.packages:
215             # we should probably clear old entries from self.packages and
216             # take into account the recorded mtime as optimization
217             filepath = self.packages[f]
218             if f.split('/')[-1] == "Release":
219                 self.addRelease(f, filepath)
220             fake_uri='http://apt-dht'+f
221             fake_dirname = '/'.join(fake_uri.split('/')[:-1])
222             if f.endswith('Sources'):
223                 deb_src_added = True
224                 source_line='deb-src '+fake_dirname+'/ /'
225             else:
226                 source_line='deb '+fake_dirname+'/ /'
227             listpath=(os.path.join(self.cache_dir, self.apt_config['Dir::State'], 
228                                    self.apt_config['Dir::State::Lists'], 
229                                    apt_pkg.URItoFileName(fake_uri)))
230             sources.write(source_line+'\n')
231             log.msg("Sources line: " + source_line)
232             sources_count = sources_count + 1
233
234             try:
235                 #we should empty the directory instead
236                 os.unlink(listpath)
237             except:
238                 pass
239             os.symlink(filepath, listpath)
240         sources.close()
241
242         if sources_count == 0:
243             log.msg("No Packages files available for %s backend"%(self.cache_dir))
244             return False
245
246         log.msg("Loading Packages database for "+self.cache_dir)
247         for key, value in self.apt_config.items():
248             apt_pkg.Config[key] = value
249
250         self.cache = apt_pkg.GetCache(OpProgress())
251         self.records = apt_pkg.GetPkgRecords(self.cache)
252         if deb_src_added:
253             self.srcrecords = apt_pkg.GetPkgSrcRecords()
254         else:
255             self.srcrecords = None
256
257         self.loaded = 1
258         return True
259
260     def unload(self):
261         """Tries to make the packages server quit."""
262         if self.loaded:
263             del self.cache
264             del self.records
265             del self.srcrecords
266             del self.indexrecords
267             self.loaded = 0
268
269     def cleanup(self):
270         """Cleanup and close any loaded caches."""
271         self.unload()
272         self.packages.close()
273         
274     def findHash(self, path):
275         """Find the hash for a given path in this mirror.
276         
277         Returns a deferred so it can make sure the cache is loaded first.
278         """
279         d = defer.Deferred()
280
281         deferLoad = self.load()
282         deferLoad.addCallback(self._findHash, path, d)
283         deferLoad.addErrback(self._findHash_error, path, d)
284         
285         return d
286
287     def _findHash_error(self, failure, path, d):
288         """An error occurred while trying to find a hash."""
289         log.msg('An error occurred while looking up a hash for: %s' % path)
290         log.err(failure)
291         d.callback((None, None))
292
293     def _findHash(self, loadResult, path, d):
294         """Really find the hash for a path.
295         
296         Have to pass the returned loadResult on in case other calls to this
297         function are pending.
298         """
299         if not loadResult:
300             d.callback((None, None))
301             return loadResult
302         
303         # First look for the path in the cache of index files
304         for release in self.indexrecords:
305             if path.startswith(release[:-7]):
306                 for indexFile in self.indexrecords[release]:
307                     if release[:-7] + indexFile == path:
308                         d.callback(self.indexrecords[release][indexFile]['SHA1'])
309                         return loadResult
310         
311         package = path.split('/')[-1].split('_')[0]
312
313         # Check the binary packages
314         try:
315             for version in self.cache[package].VersionList:
316                 size = version.Size
317                 for verFile in version.FileList:
318                     if self.records.Lookup(verFile):
319                         if '/' + self.records.FileName == path:
320                             d.callback((self.records.SHA1Hash, size))
321                             return loadResult
322         except KeyError:
323             pass
324
325         # Check the source packages' files
326         if self.srcrecords:
327             self.srcrecords.Restart()
328             if self.srcrecords.Lookup(package):
329                 for f in self.srcrecords.Files:
330                     if path == '/' + f[2]:
331                         d.callback((f[0], f[1]))
332                         return loadResult
333         
334         d.callback((None, None))
335         return loadResult
336
337 class TestAptPackages(unittest.TestCase):
338     """Unit tests for the AptPackages cache."""
339     
340     pending_calls = []
341     client = None
342     timeout = 10
343     packagesFile = ''
344     sourcesFile = ''
345     releaseFile = ''
346     
347     def setUp(self):
348         self.client = AptPackages('/tmp/.apt-dht')
349     
350         self.packagesFile = os.popen('ls -Sr /var/lib/apt/lists/ | grep -E "_main_.*Packages$" | tail -n 1').read().rstrip('\n')
351         self.sourcesFile = os.popen('ls -Sr /var/lib/apt/lists/ | grep -E "_main_.*Sources$" | tail -n 1').read().rstrip('\n')
352         for f in os.walk('/var/lib/apt/lists').next()[2]:
353             if f[-7:] == "Release" and self.packagesFile.startswith(f[:-7]):
354                 self.releaseFile = f
355                 break
356         
357         self.client.file_updated(self.releaseFile[self.releaseFile.find('_dists_'):].replace('_','/'), 
358                                  '/var/lib/apt/lists/' + self.releaseFile)
359         self.client.file_updated(self.packagesFile[self.packagesFile.find('_dists_'):].replace('_','/'), 
360                                  '/var/lib/apt/lists/' + self.packagesFile)
361         self.client.file_updated(self.sourcesFile[self.sourcesFile.find('_dists_'):].replace('_','/'), 
362                                  '/var/lib/apt/lists/' + self.sourcesFile)
363     
364     def test_pkg_hash(self):
365         self.client._load()
366
367         self.client.records.Lookup(self.client.cache['dpkg'].VersionList[0].FileList[0])
368         
369         pkg_hash = os.popen('grep -A 30 -E "^Package: dpkg$" ' + 
370                             '/var/lib/apt/lists/' + self.packagesFile + 
371                             ' | grep -E "^SHA1:" | head -n 1' + 
372                             ' | cut -d\  -f 2').read().rstrip('\n')
373
374         self.failUnless(self.client.records.SHA1Hash == pkg_hash, 
375                         "Hashes don't match: %s != %s" % (self.client.records.SHA1Hash, pkg_hash))
376
377     def test_src_hash(self):
378         self.client._load()
379
380         self.client.srcrecords.Lookup('dpkg')
381
382         src_hashes = os.popen('grep -A 20 -E "^Package: dpkg$" ' + 
383                             '/var/lib/apt/lists/' + self.sourcesFile + 
384                             ' | grep -A 4 -E "^Files:" | grep -E "^ " ' + 
385                             ' | cut -d\  -f 2').read().split('\n')[:-1]
386
387         for f in self.client.srcrecords.Files:
388             self.failUnless(f[0] in src_hashes, "Couldn't find %s in: %r" % (f[0], src_hashes))
389
390     def test_index_hash(self):
391         self.client._load()
392
393         indexhash = self.client.indexrecords[self.releaseFile[self.releaseFile.find('_dists_'):].replace('_','/')]['main/binary-i386/Packages.bz2']['SHA1'][0]
394
395         idx_hash = os.popen('grep -A 3000 -E "^SHA1:" ' + 
396                             '/var/lib/apt/lists/' + self.releaseFile + 
397                             ' | grep -E " main/binary-i386/Packages.bz2$"'
398                             ' | head -n 1 | cut -d\  -f 2').read().rstrip('\n')
399
400         self.failUnless(indexhash == idx_hash, "Hashes don't match: %s != %s" % (indexhash, idx_hash))
401
402     def verifyHash(self, found_hash, path, true_hash):
403         self.failUnless(found_hash[0] == true_hash, 
404                     "%s hashes don't match: %s != %s" % (path, found_hash[0], true_hash))
405
406     def test_findIndexHash(self):
407         lastDefer = defer.Deferred()
408         
409         idx_hash = os.popen('grep -A 3000 -E "^SHA1:" ' + 
410                             '/var/lib/apt/lists/' + self.releaseFile + 
411                             ' | grep -E " main/binary-i386/Packages.bz2$"'
412                             ' | head -n 1 | cut -d\  -f 2').read().rstrip('\n')
413         idx_path = '/' + self.releaseFile[self.releaseFile.find('_dists_')+1:].replace('_','/')[:-7] + 'main/binary-i386/Packages.bz2'
414
415         d = self.client.findHash(idx_path)
416         d.addCallback(self.verifyHash, idx_path, idx_hash)
417
418         d.addBoth(lastDefer.callback)
419         return lastDefer
420
421     def test_findPkgHash(self):
422         lastDefer = defer.Deferred()
423         
424         pkg_hash = os.popen('grep -A 30 -E "^Package: dpkg$" ' + 
425                             '/var/lib/apt/lists/' + self.packagesFile + 
426                             ' | grep -E "^SHA1:" | head -n 1' + 
427                             ' | cut -d\  -f 2').read().rstrip('\n')
428         pkg_path = '/' + os.popen('grep -A 30 -E "^Package: dpkg$" ' + 
429                             '/var/lib/apt/lists/' + self.packagesFile + 
430                             ' | grep -E "^Filename:" | head -n 1' + 
431                             ' | cut -d\  -f 2').read().rstrip('\n')
432
433         d = self.client.findHash(pkg_path)
434         d.addCallback(self.verifyHash, pkg_path, pkg_hash)
435
436         d.addBoth(lastDefer.callback)
437         return lastDefer
438
439     def test_findSrcHash(self):
440         lastDefer = defer.Deferred()
441         
442         src_dir = '/' + os.popen('grep -A 30 -E "^Package: dpkg$" ' + 
443                             '/var/lib/apt/lists/' + self.sourcesFile + 
444                             ' | grep -E "^Directory:" | head -n 1' + 
445                             ' | cut -d\  -f 2').read().rstrip('\n')
446         src_hashes = os.popen('grep -A 20 -E "^Package: dpkg$" ' + 
447                             '/var/lib/apt/lists/' + self.sourcesFile + 
448                             ' | grep -A 4 -E "^Files:" | grep -E "^ " ' + 
449                             ' | cut -d\  -f 2').read().split('\n')[:-1]
450         src_paths = os.popen('grep -A 20 -E "^Package: dpkg$" ' + 
451                             '/var/lib/apt/lists/' + self.sourcesFile + 
452                             ' | grep -A 4 -E "^Files:" | grep -E "^ " ' + 
453                             ' | cut -d\  -f 4').read().split('\n')[:-1]
454
455         i = choice(range(len(src_hashes)))
456         d = self.client.findHash(src_dir + '/' + src_paths[i])
457         d.addCallback(self.verifyHash, src_dir + '/' + src_paths[i], src_hashes[i])
458             
459         d.addBoth(lastDefer.callback)
460         return lastDefer
461
462     def test_multipleFindHash(self):
463         lastDefer = defer.Deferred()
464         
465         idx_hash = os.popen('grep -A 3000 -E "^SHA1:" ' + 
466                             '/var/lib/apt/lists/' + self.releaseFile + 
467                             ' | grep -E " main/binary-i386/Packages.bz2$"'
468                             ' | head -n 1 | cut -d\  -f 2').read().rstrip('\n')
469         idx_path = '/' + self.releaseFile[self.releaseFile.find('_dists_')+1:].replace('_','/')[:-7] + 'main/binary-i386/Packages.bz2'
470
471         d = self.client.findHash(idx_path)
472         d.addCallback(self.verifyHash, idx_path, idx_hash)
473
474         pkg_hash = os.popen('grep -A 30 -E "^Package: dpkg$" ' + 
475                             '/var/lib/apt/lists/' + self.packagesFile + 
476                             ' | grep -E "^SHA1:" | head -n 1' + 
477                             ' | cut -d\  -f 2').read().rstrip('\n')
478         pkg_path = '/' + os.popen('grep -A 30 -E "^Package: dpkg$" ' + 
479                             '/var/lib/apt/lists/' + self.packagesFile + 
480                             ' | grep -E "^Filename:" | head -n 1' + 
481                             ' | cut -d\  -f 2').read().rstrip('\n')
482
483         d = self.client.findHash(pkg_path)
484         d.addCallback(self.verifyHash, pkg_path, pkg_hash)
485
486         src_dir = '/' + os.popen('grep -A 30 -E "^Package: dpkg$" ' + 
487                             '/var/lib/apt/lists/' + self.sourcesFile + 
488                             ' | grep -E "^Directory:" | head -n 1' + 
489                             ' | cut -d\  -f 2').read().rstrip('\n')
490         src_hashes = os.popen('grep -A 20 -E "^Package: dpkg$" ' + 
491                             '/var/lib/apt/lists/' + self.sourcesFile + 
492                             ' | grep -A 4 -E "^Files:" | grep -E "^ " ' + 
493                             ' | cut -d\  -f 2').read().split('\n')[:-1]
494         src_paths = os.popen('grep -A 20 -E "^Package: dpkg$" ' + 
495                             '/var/lib/apt/lists/' + self.sourcesFile + 
496                             ' | grep -A 4 -E "^Files:" | grep -E "^ " ' + 
497                             ' | cut -d\  -f 4').read().split('\n')[:-1]
498
499         for i in range(len(src_hashes)):
500             d = self.client.findHash(src_dir + '/' + src_paths[i])
501             d.addCallback(self.verifyHash, src_dir + '/' + src_paths[i], src_hashes[i])
502             
503         idx_hash = os.popen('grep -A 3000 -E "^SHA1:" ' + 
504                             '/var/lib/apt/lists/' + self.releaseFile + 
505                             ' | grep -E " main/source/Sources.bz2$"'
506                             ' | head -n 1 | cut -d\  -f 2').read().rstrip('\n')
507         idx_path = '/' + self.releaseFile[self.releaseFile.find('_dists_')+1:].replace('_','/')[:-7] + 'main/source/Sources.bz2'
508
509         d = self.client.findHash(idx_path)
510         d.addCallback(self.verifyHash, idx_path, idx_hash)
511
512         d.addBoth(lastDefer.callback)
513         return lastDefer
514
515     def tearDown(self):
516         for p in self.pending_calls:
517             if p.active():
518                 p.cancel()
519         self.pending_calls = []
520         self.client.cleanup()
521         self.client = None