HTTPServer responds correctly to requests for torrent strings.
[quix0rs-apt-p2p.git] / apt_dht / AptPackages.py
1 #
2 # Copyright (C) 2002 Manuel Estrada Sainz <ranty@debian.org>
3 # Copyright (C) 2008 Cameron Dale <camrdale@gmail.com>
4 #
5 # This library is free software; you can redistribute it and/or
6 # modify it under the terms of version 2.1 of the GNU General Public
7 # License as published by the Free Software Foundation.
8 #
9 # This library is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12 # General Public License for more details.
13 #
14 # You should have received a copy of the GNU General Public
15 # License along with this library; if not, write to the Free Software
16 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
17
18 # Disable the FutureWarning from the apt module
19 import warnings
20 warnings.simplefilter("ignore", FutureWarning)
21
22 import os, shelve
23 from random import choice
24 from shutil import rmtree
25 from copy import deepcopy
26 from UserDict import DictMixin
27
28 from twisted.internet import threads, defer, reactor
29 from twisted.python import log
30 from twisted.python.filepath import FilePath
31 from twisted.trial import unittest
32
33 import apt_pkg, apt_inst
34 from apt import OpProgress
35 from debian_bundle import deb822
36
37 from Hash import HashObject
38
39 apt_pkg.init()
40
41 TRACKED_FILES = ['release', 'sources', 'packages']
42
43 class PackageFileList(DictMixin):
44     """Manages a list of package files belonging to a backend.
45     
46     @type packages: C{shelve dictionary}
47     @ivar packages: the files stored for this backend
48     """
49     
50     def __init__(self, cache_dir):
51         self.cache_dir = cache_dir
52         self.cache_dir.restat(False)
53         if not self.cache_dir.exists():
54             self.cache_dir.makedirs()
55         self.packages = None
56         self.open()
57
58     def open(self):
59         """Open the persistent dictionary of files in this backend."""
60         if self.packages is None:
61             self.packages = shelve.open(self.cache_dir.child('packages.db').path)
62
63     def close(self):
64         """Close the persistent dictionary."""
65         if self.packages is not None:
66             self.packages.close()
67
68     def update_file(self, cache_path, file_path):
69         """Check if an updated file needs to be tracked.
70
71         Called from the mirror manager when files get updated so we can update our
72         fake lists and sources.list.
73         """
74         filename = cache_path.split('/')[-1]
75         if filename.lower() in TRACKED_FILES:
76             log.msg("Registering package file: "+cache_path)
77             self.packages[cache_path] = file_path
78             return True
79         return False
80
81     def check_files(self):
82         """Check all files in the database to make sure they exist."""
83         files = self.packages.keys()
84         for f in files:
85             self.packages[f].restat(False)
86             if not self.packages[f].exists():
87                 log.msg("File in packages database has been deleted: "+f)
88                 del self.packages[f]
89
90     # Standard dictionary implementation so this class can be used like a dictionary.
91     def __getitem__(self, key): return self.packages[key]
92     def __setitem__(self, key, item): self.packages[key] = item
93     def __delitem__(self, key): del self.packages[key]
94     def keys(self): return self.packages.keys()
95
96 class AptPackages:
97     """Uses python-apt to answer queries about packages.
98
99     Makes a fake configuration for python-apt for each backend.
100     """
101
102     DEFAULT_APT_CONFIG = {
103         #'APT' : '',
104         #'APT::Architecture' : 'i386',  # Commented so the machine's config will set this
105         #'APT::Default-Release' : 'unstable',
106         'Dir':'.', # /
107         'Dir::State' : 'apt/', # var/lib/apt/
108         'Dir::State::Lists': 'lists/', # lists/
109         #'Dir::State::cdroms' : 'cdroms.list',
110         'Dir::State::userstatus' : 'status.user',
111         'Dir::State::status': 'dpkg/status', # '/var/lib/dpkg/status'
112         'Dir::Cache' : '.apt/cache/', # var/cache/apt/
113         #'Dir::Cache::archives' : 'archives/',
114         'Dir::Cache::srcpkgcache' : 'srcpkgcache.bin',
115         'Dir::Cache::pkgcache' : 'pkgcache.bin',
116         'Dir::Etc' : 'apt/etc/', # etc/apt/
117         'Dir::Etc::sourcelist' : 'sources.list',
118         'Dir::Etc::vendorlist' : 'vendors.list',
119         'Dir::Etc::vendorparts' : 'vendors.list.d',
120         #'Dir::Etc::main' : 'apt.conf',
121         #'Dir::Etc::parts' : 'apt.conf.d',
122         #'Dir::Etc::preferences' : 'preferences',
123         'Dir::Bin' : '',
124         #'Dir::Bin::methods' : '', #'/usr/lib/apt/methods'
125         'Dir::Bin::dpkg' : '/usr/bin/dpkg',
126         #'DPkg' : '',
127         #'DPkg::Pre-Install-Pkgs' : '',
128         #'DPkg::Tools' : '',
129         #'DPkg::Tools::Options' : '',
130         #'DPkg::Tools::Options::/usr/bin/apt-listchanges' : '',
131         #'DPkg::Tools::Options::/usr/bin/apt-listchanges::Version' : '2',
132         #'DPkg::Post-Invoke' : '',
133         }
134     essential_dirs = ('apt', 'apt/cache', 'apt/dpkg', 'apt/etc', 'apt/lists',
135                       'apt/lists/partial')
136     essential_files = ('apt/dpkg/status', 'apt/etc/sources.list',)
137         
138     def __init__(self, cache_dir, unload_delay):
139         """Construct a new packages manager.
140
141         @param cache_dir: cache directory from config file
142         """
143         self.cache_dir = cache_dir
144         self.unload_delay = unload_delay
145         self.apt_config = deepcopy(self.DEFAULT_APT_CONFIG)
146
147         for dir in self.essential_dirs:
148             path = self.cache_dir.preauthChild(dir)
149             if not path.exists():
150                 path.makedirs()
151         for file in self.essential_files:
152             path = self.cache_dir.preauthChild(file)
153             if not path.exists():
154                 path.touch()
155                 
156         self.apt_config['Dir'] = self.cache_dir.path
157         self.apt_config['Dir::State::status'] = self.cache_dir.preauthChild(self.apt_config['Dir::State']).preauthChild(self.apt_config['Dir::State::status']).path
158         self.packages = PackageFileList(cache_dir)
159         self.loaded = 0
160         self.loading = None
161         self.unload_later = None
162         
163     def __del__(self):
164         self.cleanup()
165         self.packages.close()
166         
167     def addRelease(self, cache_path, file_path):
168         """Dirty hack until python-apt supports apt-pkg/indexrecords.h
169         (see Bug #456141)
170         """
171         self.indexrecords[cache_path] = {}
172
173         read_packages = False
174         f = file_path.open('r')
175         
176         rel = deb822.Release(f, fields = ['MD5Sum', 'SHA1', 'SHA256'])
177         for hash_type in rel:
178             for file in rel[hash_type]:
179                 self.indexrecords[cache_path].setdefault(file['name'], {})[hash_type.upper()] = (file[hash_type], file['size'])
180             
181         f.close()
182
183     def file_updated(self, cache_path, file_path):
184         """A file in the backend has changed, manage it.
185         
186         If this affects us, unload our apt database
187         """
188         if self.packages.update_file(cache_path, file_path):
189             self.unload()
190
191     def load(self):
192         """Make sure the package is initialized and loaded."""
193         if self.unload_later and self.unload_later.active():
194             self.unload_later.reset(self.unload_delay)
195         else:
196             self.unload_later = reactor.callLater(self.unload_delay, self.unload)
197         if self.loading is None:
198             log.msg('Loading the packages cache')
199             self.loading = threads.deferToThread(self._load)
200             self.loading.addCallback(self.doneLoading)
201         return self.loading
202         
203     def doneLoading(self, loadResult):
204         """Cache is loaded."""
205         self.loading = None
206         # Must pass on the result for the next callback
207         return loadResult
208         
209     def _load(self):
210         """Regenerates the fake configuration and load the packages cache."""
211         if self.loaded: return True
212         apt_pkg.InitSystem()
213         self.cache_dir.preauthChild(self.apt_config['Dir::State']
214                      ).preauthChild(self.apt_config['Dir::State::Lists']).remove()
215         self.cache_dir.preauthChild(self.apt_config['Dir::State']
216                      ).preauthChild(self.apt_config['Dir::State::Lists']
217                      ).child('partial').makedirs()
218         sources_file = self.cache_dir.preauthChild(self.apt_config['Dir::Etc']
219                                ).preauthChild(self.apt_config['Dir::Etc::sourcelist'])
220         sources = sources_file.open('w')
221         sources_count = 0
222         deb_src_added = False
223         self.packages.check_files()
224         self.indexrecords = {}
225         for f in self.packages:
226             # we should probably clear old entries from self.packages and
227             # take into account the recorded mtime as optimization
228             file = self.packages[f]
229             if f.split('/')[-1] == "Release":
230                 self.addRelease(f, file)
231             fake_uri='http://apt-dht'+f
232             fake_dirname = '/'.join(fake_uri.split('/')[:-1])
233             if f.endswith('Sources'):
234                 deb_src_added = True
235                 source_line='deb-src '+fake_dirname+'/ /'
236             else:
237                 source_line='deb '+fake_dirname+'/ /'
238             listpath = self.cache_dir.preauthChild(self.apt_config['Dir::State']
239                                     ).preauthChild(self.apt_config['Dir::State::Lists']
240                                     ).child(apt_pkg.URItoFileName(fake_uri))
241             sources.write(source_line+'\n')
242             log.msg("Sources line: " + source_line)
243             sources_count = sources_count + 1
244
245             if listpath.exists():
246                 #we should empty the directory instead
247                 listpath.remove()
248             os.symlink(file.path, listpath.path)
249         sources.close()
250
251         if sources_count == 0:
252             log.msg("No Packages files available for %s backend"%(self.cache_dir.path))
253             return False
254
255         log.msg("Loading Packages database for "+self.cache_dir.path)
256         for key, value in self.apt_config.items():
257             apt_pkg.Config[key] = value
258
259         self.cache = apt_pkg.GetCache(OpProgress())
260         self.records = apt_pkg.GetPkgRecords(self.cache)
261         if deb_src_added:
262             self.srcrecords = apt_pkg.GetPkgSrcRecords()
263         else:
264             self.srcrecords = None
265
266         self.loaded = 1
267         return True
268
269     def unload(self):
270         """Tries to make the packages server quit."""
271         if self.unload_later and self.unload_later.active():
272             self.unload_later.cancel()
273         self.unload_later = None
274         if self.loaded:
275             log.msg('Unloading the packages cache')
276             del self.cache
277             del self.records
278             del self.srcrecords
279             del self.indexrecords
280             self.loaded = 0
281
282     def cleanup(self):
283         """Cleanup and close any loaded caches."""
284         self.unload()
285         self.packages.close()
286         
287     def findHash(self, path):
288         """Find the hash for a given path in this mirror.
289         
290         Returns a deferred so it can make sure the cache is loaded first.
291         """
292         d = defer.Deferred()
293
294         deferLoad = self.load()
295         deferLoad.addCallback(self._findHash, path, d)
296         deferLoad.addErrback(self._findHash_error, path, d)
297         
298         return d
299
300     def _findHash_error(self, failure, path, d):
301         """An error occurred while trying to find a hash."""
302         log.msg('An error occurred while looking up a hash for: %s' % path)
303         log.err(failure)
304         d.callback(HashObject())
305
306     def _findHash(self, loadResult, path, d):
307         """Really find the hash for a path.
308         
309         Have to pass the returned loadResult on in case other calls to this
310         function are pending.
311         """
312         if not loadResult:
313             d.callback(HashObject())
314             return loadResult
315         
316         # First look for the path in the cache of index files
317         for release in self.indexrecords:
318             if path.startswith(release[:-7]):
319                 for indexFile in self.indexrecords[release]:
320                     if release[:-7] + indexFile == path:
321                         h = HashObject()
322                         h.setFromIndexRecord(self.indexrecords[release][indexFile])
323                         d.callback(h)
324                         return loadResult
325         
326         package = path.split('/')[-1].split('_')[0]
327
328         # Check the binary packages
329         try:
330             for version in self.cache[package].VersionList:
331                 size = version.Size
332                 for verFile in version.FileList:
333                     if self.records.Lookup(verFile):
334                         if '/' + self.records.FileName == path:
335                             h = HashObject()
336                             h.setFromPkgRecord(self.records, size)
337                             d.callback(h)
338                             return loadResult
339         except KeyError:
340             pass
341
342         # Check the source packages' files
343         if self.srcrecords:
344             self.srcrecords.Restart()
345             if self.srcrecords.Lookup(package):
346                 for f in self.srcrecords.Files:
347                     if path == '/' + f[2]:
348                         h = HashObject()
349                         h.setFromSrcRecord(f)
350                         d.callback(h)
351                         return loadResult
352         
353         d.callback(HashObject())
354         return loadResult
355
356 class TestAptPackages(unittest.TestCase):
357     """Unit tests for the AptPackages cache."""
358     
359     pending_calls = []
360     client = None
361     timeout = 10
362     packagesFile = ''
363     sourcesFile = ''
364     releaseFile = ''
365     
366     def setUp(self):
367         self.client = AptPackages(FilePath('/tmp/.apt-dht'), 300)
368     
369         self.packagesFile = os.popen('ls -Sr /var/lib/apt/lists/ | grep -E "_main_.*Packages$" | tail -n 1').read().rstrip('\n')
370         self.sourcesFile = os.popen('ls -Sr /var/lib/apt/lists/ | grep -E "_main_.*Sources$" | tail -n 1').read().rstrip('\n')
371         for f in os.walk('/var/lib/apt/lists').next()[2]:
372             if f[-7:] == "Release" and self.packagesFile.startswith(f[:-7]):
373                 self.releaseFile = f
374                 break
375         
376         self.client.file_updated(self.releaseFile[self.releaseFile.find('_dists_'):].replace('_','/'), 
377                                  FilePath('/var/lib/apt/lists/' + self.releaseFile))
378         self.client.file_updated(self.packagesFile[self.packagesFile.find('_dists_'):].replace('_','/'), 
379                                  FilePath('/var/lib/apt/lists/' + self.packagesFile))
380         self.client.file_updated(self.sourcesFile[self.sourcesFile.find('_dists_'):].replace('_','/'), 
381                                  FilePath('/var/lib/apt/lists/' + self.sourcesFile))
382     
383     def test_pkg_hash(self):
384         self.client._load()
385
386         self.client.records.Lookup(self.client.cache['dpkg'].VersionList[0].FileList[0])
387         
388         pkg_hash = os.popen('grep -A 30 -E "^Package: dpkg$" ' + 
389                             '/var/lib/apt/lists/' + self.packagesFile + 
390                             ' | grep -E "^SHA1:" | head -n 1' + 
391                             ' | cut -d\  -f 2').read().rstrip('\n')
392
393         self.failUnless(self.client.records.SHA1Hash == pkg_hash, 
394                         "Hashes don't match: %s != %s" % (self.client.records.SHA1Hash, pkg_hash))
395
396     def test_src_hash(self):
397         self.client._load()
398
399         self.client.srcrecords.Lookup('dpkg')
400
401         src_hashes = os.popen('grep -A 20 -E "^Package: dpkg$" ' + 
402                             '/var/lib/apt/lists/' + self.sourcesFile + 
403                             ' | grep -A 4 -E "^Files:" | grep -E "^ " ' + 
404                             ' | cut -d\  -f 2').read().split('\n')[:-1]
405
406         for f in self.client.srcrecords.Files:
407             self.failUnless(f[0] in src_hashes, "Couldn't find %s in: %r" % (f[0], src_hashes))
408
409     def test_index_hash(self):
410         self.client._load()
411
412         indexhash = self.client.indexrecords[self.releaseFile[self.releaseFile.find('_dists_'):].replace('_','/')]['main/binary-i386/Packages.bz2']['SHA1'][0]
413
414         idx_hash = os.popen('grep -A 3000 -E "^SHA1:" ' + 
415                             '/var/lib/apt/lists/' + self.releaseFile + 
416                             ' | grep -E " main/binary-i386/Packages.bz2$"'
417                             ' | head -n 1 | cut -d\  -f 2').read().rstrip('\n')
418
419         self.failUnless(indexhash == idx_hash, "Hashes don't match: %s != %s" % (indexhash, idx_hash))
420
421     def verifyHash(self, found_hash, path, true_hash):
422         self.failUnless(found_hash.hexexpected() == true_hash, 
423                     "%s hashes don't match: %s != %s" % (path, found_hash.hexexpected(), true_hash))
424
425     def test_findIndexHash(self):
426         lastDefer = defer.Deferred()
427         
428         idx_hash = os.popen('grep -A 3000 -E "^SHA1:" ' + 
429                             '/var/lib/apt/lists/' + self.releaseFile + 
430                             ' | grep -E " main/binary-i386/Packages.bz2$"'
431                             ' | head -n 1 | cut -d\  -f 2').read().rstrip('\n')
432         idx_path = '/' + self.releaseFile[self.releaseFile.find('_dists_')+1:].replace('_','/')[:-7] + 'main/binary-i386/Packages.bz2'
433
434         d = self.client.findHash(idx_path)
435         d.addCallback(self.verifyHash, idx_path, idx_hash)
436
437         d.addBoth(lastDefer.callback)
438         return lastDefer
439
440     def test_findPkgHash(self):
441         lastDefer = defer.Deferred()
442         
443         pkg_hash = os.popen('grep -A 30 -E "^Package: dpkg$" ' + 
444                             '/var/lib/apt/lists/' + self.packagesFile + 
445                             ' | grep -E "^SHA1:" | head -n 1' + 
446                             ' | cut -d\  -f 2').read().rstrip('\n')
447         pkg_path = '/' + os.popen('grep -A 30 -E "^Package: dpkg$" ' + 
448                             '/var/lib/apt/lists/' + self.packagesFile + 
449                             ' | grep -E "^Filename:" | head -n 1' + 
450                             ' | cut -d\  -f 2').read().rstrip('\n')
451
452         d = self.client.findHash(pkg_path)
453         d.addCallback(self.verifyHash, pkg_path, pkg_hash)
454
455         d.addBoth(lastDefer.callback)
456         return lastDefer
457
458     def test_findSrcHash(self):
459         lastDefer = defer.Deferred()
460         
461         src_dir = '/' + os.popen('grep -A 30 -E "^Package: dpkg$" ' + 
462                             '/var/lib/apt/lists/' + self.sourcesFile + 
463                             ' | grep -E "^Directory:" | head -n 1' + 
464                             ' | cut -d\  -f 2').read().rstrip('\n')
465         src_hashes = os.popen('grep -A 20 -E "^Package: dpkg$" ' + 
466                             '/var/lib/apt/lists/' + self.sourcesFile + 
467                             ' | grep -A 4 -E "^Files:" | grep -E "^ " ' + 
468                             ' | cut -d\  -f 2').read().split('\n')[:-1]
469         src_paths = os.popen('grep -A 20 -E "^Package: dpkg$" ' + 
470                             '/var/lib/apt/lists/' + self.sourcesFile + 
471                             ' | grep -A 4 -E "^Files:" | grep -E "^ " ' + 
472                             ' | cut -d\  -f 4').read().split('\n')[:-1]
473
474         i = choice(range(len(src_hashes)))
475         d = self.client.findHash(src_dir + '/' + src_paths[i])
476         d.addCallback(self.verifyHash, src_dir + '/' + src_paths[i], src_hashes[i])
477             
478         d.addBoth(lastDefer.callback)
479         return lastDefer
480
481     def test_multipleFindHash(self):
482         lastDefer = defer.Deferred()
483         
484         idx_hash = os.popen('grep -A 3000 -E "^SHA1:" ' + 
485                             '/var/lib/apt/lists/' + self.releaseFile + 
486                             ' | grep -E " main/binary-i386/Packages.bz2$"'
487                             ' | head -n 1 | cut -d\  -f 2').read().rstrip('\n')
488         idx_path = '/' + self.releaseFile[self.releaseFile.find('_dists_')+1:].replace('_','/')[:-7] + 'main/binary-i386/Packages.bz2'
489
490         d = self.client.findHash(idx_path)
491         d.addCallback(self.verifyHash, idx_path, idx_hash)
492
493         pkg_hash = os.popen('grep -A 30 -E "^Package: dpkg$" ' + 
494                             '/var/lib/apt/lists/' + self.packagesFile + 
495                             ' | grep -E "^SHA1:" | head -n 1' + 
496                             ' | cut -d\  -f 2').read().rstrip('\n')
497         pkg_path = '/' + os.popen('grep -A 30 -E "^Package: dpkg$" ' + 
498                             '/var/lib/apt/lists/' + self.packagesFile + 
499                             ' | grep -E "^Filename:" | head -n 1' + 
500                             ' | cut -d\  -f 2').read().rstrip('\n')
501
502         d = self.client.findHash(pkg_path)
503         d.addCallback(self.verifyHash, pkg_path, pkg_hash)
504
505         src_dir = '/' + os.popen('grep -A 30 -E "^Package: dpkg$" ' + 
506                             '/var/lib/apt/lists/' + self.sourcesFile + 
507                             ' | grep -E "^Directory:" | head -n 1' + 
508                             ' | cut -d\  -f 2').read().rstrip('\n')
509         src_hashes = os.popen('grep -A 20 -E "^Package: dpkg$" ' + 
510                             '/var/lib/apt/lists/' + self.sourcesFile + 
511                             ' | grep -A 4 -E "^Files:" | grep -E "^ " ' + 
512                             ' | cut -d\  -f 2').read().split('\n')[:-1]
513         src_paths = os.popen('grep -A 20 -E "^Package: dpkg$" ' + 
514                             '/var/lib/apt/lists/' + self.sourcesFile + 
515                             ' | grep -A 4 -E "^Files:" | grep -E "^ " ' + 
516                             ' | cut -d\  -f 4').read().split('\n')[:-1]
517
518         for i in range(len(src_hashes)):
519             d = self.client.findHash(src_dir + '/' + src_paths[i])
520             d.addCallback(self.verifyHash, src_dir + '/' + src_paths[i], src_hashes[i])
521             
522         idx_hash = os.popen('grep -A 3000 -E "^SHA1:" ' + 
523                             '/var/lib/apt/lists/' + self.releaseFile + 
524                             ' | grep -E " main/source/Sources.bz2$"'
525                             ' | head -n 1 | cut -d\  -f 2').read().rstrip('\n')
526         idx_path = '/' + self.releaseFile[self.releaseFile.find('_dists_')+1:].replace('_','/')[:-7] + 'main/source/Sources.bz2'
527
528         d = self.client.findHash(idx_path)
529         d.addCallback(self.verifyHash, idx_path, idx_hash)
530
531         d.addBoth(lastDefer.callback)
532         return lastDefer
533
534     def tearDown(self):
535         for p in self.pending_calls:
536             if p.active():
537                 p.cancel()
538         self.pending_calls = []
539         self.client.cleanup()
540         self.client = None