2 # Copyright (C) 2002 Manuel Estrada Sainz <ranty@debian.org>
3 # Copyright (C) 2008 Cameron Dale <camrdale@gmail.com>
5 # This library is free software; you can redistribute it and/or
6 # modify it under the terms of version 2.1 of the GNU General Public
7 # License as published by the Free Software Foundation.
9 # This library is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 # General Public License for more details.
14 # You should have received a copy of the GNU General Public
15 # License along with this library; if not, write to the Free Software
16 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 """Manage a mirror's index files.
20 @type TRACKED_FILES: C{list} of C{string}
21 @var TRACKED_FILES: the file names of files that contain index information
24 # Disable the FutureWarning from the apt module
26 warnings.simplefilter("ignore", FutureWarning)
29 from random import choice
30 from shutil import rmtree
31 from copy import deepcopy
32 from UserDict import DictMixin
34 from twisted.internet import threads, defer, reactor
35 from twisted.python import log
36 from twisted.python.filepath import FilePath
37 from twisted.trial import unittest
39 import apt_pkg, apt_inst
40 from apt import OpProgress
41 from debian_bundle import deb822
43 from Hash import HashObject
47 TRACKED_FILES = ['release', 'sources', 'packages']
49 class PackageFileList(DictMixin):
50 """Manages a list of index files belonging to a mirror.
52 @type cache_dir: L{twisted.python.filepath.FilePath}
53 @ivar cache_dir: the directory to use for storing all files
54 @type packages: C{shelve dictionary}
55 @ivar packages: the files tracked for this mirror
58 def __init__(self, cache_dir):
59 """Initialize the list by opening the dictionary."""
60 self.cache_dir = cache_dir
61 self.cache_dir.restat(False)
62 if not self.cache_dir.exists():
63 self.cache_dir.makedirs()
68 """Open the persistent dictionary of files for this mirror."""
69 if self.packages is None:
70 self.packages = shelve.open(self.cache_dir.child('packages.db').path)
73 """Close the persistent dictionary."""
74 if self.packages is not None:
77 def update_file(self, cache_path, file_path):
78 """Check if an updated file needs to be tracked.
80 Called from the mirror manager when files get updated so we can update our
81 fake lists and sources.list.
83 @type cache_path: C{string}
84 @param cache_path: the location of the file within the mirror
85 @type file_path: L{twisted.python.filepath.FilePath}
86 @param file_path: The location of the file in the file system
88 @return: whether the file is an index file
90 filename = cache_path.split('/')[-1]
91 if filename.lower() in TRACKED_FILES:
92 log.msg("Registering package file: "+cache_path)
93 self.packages[cache_path] = file_path
97 def check_files(self):
98 """Check all files in the database to remove any that don't exist."""
99 files = self.packages.keys()
101 self.packages[f].restat(False)
102 if not self.packages[f].exists():
103 log.msg("File in packages database has been deleted: "+f)
106 #{ Dictionary interface details
107 def __getitem__(self, key): return self.packages[key]
108 def __setitem__(self, key, item): self.packages[key] = item
109 def __delitem__(self, key): del self.packages[key]
110 def keys(self): return self.packages.keys()
113 """Answers queries about packages available from a mirror.
115 Uses the python-apt tools to parse and provide information about the
116 files that are available on a single mirror.
118 @ivar DEFAULT_APT_CONFIG: the default configuration parameters to use for apt
119 @ivar essential_dirs: directories that must be created for apt to work
120 @ivar essential_files: files that must be created for apt to work
121 @type cache_dir: L{twisted.python.filepath.FilePath}
122 @ivar cache_dir: the directory to use for storing all files
123 @type unload_delay: C{int}
124 @ivar unload_delay: the time to wait before unloading the apt cache
125 @ivar apt_config: the configuration parameters to use for apt
126 @type packages: L{PackageFileList}
127 @ivar packages: the persistent storage of tracked apt index files
128 @type loaded: C{boolean}
129 @ivar loaded: whether the apt cache is currently loaded
130 @type loading: L{twisted.internet.defer.Deferred}
131 @ivar loading: if the cache is currently being loaded, this will be
132 called when it is loaded, otherwise it is None
133 @type unload_later: L{twisted.internet.interfaces.IDelayedCall}
134 @ivar unload_later: the delayed call to unload the apt cache
135 @type indexrecords: C{dictionary}
136 @ivar indexrecords: the hashes of index files for the mirror, keys are
137 mirror directories, values are dictionaries with keys the path to the
138 index file in the mirror directory and values are dictionaries with
139 keys the hash type and values the hash
140 @type cache: C{apt_pkg.GetCache()}
141 @ivar cache: the apt cache of the mirror
142 @type records: C{apt_pkg.GetPkgRecords()}
143 @ivar records: the apt package records for all binary packages in a mirror
144 @type srcrecords: C{apt_pkg.GetPkgSrcRecords}
145 @ivar srcrecords: the apt package records for all source packages in a mirror
148 DEFAULT_APT_CONFIG = {
150 #'APT::Architecture' : 'i386', # Commented so the machine's config will set this
151 #'APT::Default-Release' : 'unstable',
153 'Dir::State' : 'apt/', # var/lib/apt/
154 'Dir::State::Lists': 'lists/', # lists/
155 #'Dir::State::cdroms' : 'cdroms.list',
156 'Dir::State::userstatus' : 'status.user',
157 'Dir::State::status': 'dpkg/status', # '/var/lib/dpkg/status'
158 'Dir::Cache' : '.apt/cache/', # var/cache/apt/
159 #'Dir::Cache::archives' : 'archives/',
160 'Dir::Cache::srcpkgcache' : 'srcpkgcache.bin',
161 'Dir::Cache::pkgcache' : 'pkgcache.bin',
162 'Dir::Etc' : 'apt/etc/', # etc/apt/
163 'Dir::Etc::sourcelist' : 'sources.list',
164 'Dir::Etc::vendorlist' : 'vendors.list',
165 'Dir::Etc::vendorparts' : 'vendors.list.d',
166 #'Dir::Etc::main' : 'apt.conf',
167 #'Dir::Etc::parts' : 'apt.conf.d',
168 #'Dir::Etc::preferences' : 'preferences',
170 #'Dir::Bin::methods' : '', #'/usr/lib/apt/methods'
171 'Dir::Bin::dpkg' : '/usr/bin/dpkg',
173 #'DPkg::Pre-Install-Pkgs' : '',
175 #'DPkg::Tools::Options' : '',
176 #'DPkg::Tools::Options::/usr/bin/apt-listchanges' : '',
177 #'DPkg::Tools::Options::/usr/bin/apt-listchanges::Version' : '2',
178 #'DPkg::Post-Invoke' : '',
180 essential_dirs = ('apt', 'apt/cache', 'apt/dpkg', 'apt/etc', 'apt/lists',
182 essential_files = ('apt/dpkg/status', 'apt/etc/sources.list',)
184 def __init__(self, cache_dir, unload_delay):
185 """Construct a new packages manager.
187 @param cache_dir: directory to use to store files for this mirror
189 self.cache_dir = cache_dir
190 self.unload_delay = unload_delay
191 self.apt_config = deepcopy(self.DEFAULT_APT_CONFIG)
193 # Create the necessary files and directories for apt
194 for dir in self.essential_dirs:
195 path = self.cache_dir.preauthChild(dir)
196 if not path.exists():
198 for file in self.essential_files:
199 path = self.cache_dir.preauthChild(file)
200 if not path.exists():
203 self.apt_config['Dir'] = self.cache_dir.path
204 self.apt_config['Dir::State::status'] = self.cache_dir.preauthChild(self.apt_config['Dir::State']).preauthChild(self.apt_config['Dir::State::status']).path
205 self.packages = PackageFileList(cache_dir)
208 self.unload_later = None
212 self.packages.close()
214 def addRelease(self, cache_path, file_path):
215 """Add a Release file's info to the list of index files.
217 Dirty hack until python-apt supports apt-pkg/indexrecords.h
220 self.indexrecords[cache_path] = {}
222 read_packages = False
223 f = file_path.open('r')
225 # Use python-debian routines to parse the file for hashes
226 rel = deb822.Release(f, fields = ['MD5Sum', 'SHA1', 'SHA256'])
227 for hash_type in rel:
228 for file in rel[hash_type]:
229 self.indexrecords[cache_path].setdefault(file['name'], {})[hash_type.upper()] = (file[hash_type], file['size'])
233 def file_updated(self, cache_path, file_path):
234 """A file in the mirror has changed or been added.
236 If this affects us, unload our apt database.
237 @see: L{PackageFileList.update_file}
239 if self.packages.update_file(cache_path, file_path):
243 """Make sure the package cache is initialized and loaded."""
244 # Reset the pending unload call
245 if self.unload_later and self.unload_later.active():
246 self.unload_later.reset(self.unload_delay)
248 self.unload_later = reactor.callLater(self.unload_delay, self.unload)
250 # Make sure it's not already being loaded
251 if self.loading is None:
252 log.msg('Loading the packages cache')
253 self.loading = threads.deferToThread(self._load)
254 self.loading.addCallback(self.doneLoading)
257 def doneLoading(self, loadResult):
258 """Cache is loaded."""
260 # Must pass on the result for the next callback
264 """Regenerates the fake configuration and loads the packages caches."""
265 if self.loaded: return True
267 # Modify the default configuration to create the fake one.
269 self.cache_dir.preauthChild(self.apt_config['Dir::State']
270 ).preauthChild(self.apt_config['Dir::State::Lists']).remove()
271 self.cache_dir.preauthChild(self.apt_config['Dir::State']
272 ).preauthChild(self.apt_config['Dir::State::Lists']
273 ).child('partial').makedirs()
274 sources_file = self.cache_dir.preauthChild(self.apt_config['Dir::Etc']
275 ).preauthChild(self.apt_config['Dir::Etc::sourcelist'])
276 sources = sources_file.open('w')
278 deb_src_added = False
279 self.packages.check_files()
280 self.indexrecords = {}
282 # Create an entry in sources.list for each needed index file
283 for f in self.packages:
284 # we should probably clear old entries from self.packages and
285 # take into account the recorded mtime as optimization
286 file = self.packages[f]
287 if f.split('/')[-1] == "Release":
288 self.addRelease(f, file)
289 fake_uri='http://apt-dht'+f
290 fake_dirname = '/'.join(fake_uri.split('/')[:-1])
291 if f.endswith('Sources'):
293 source_line='deb-src '+fake_dirname+'/ /'
295 source_line='deb '+fake_dirname+'/ /'
296 listpath = self.cache_dir.preauthChild(self.apt_config['Dir::State']
297 ).preauthChild(self.apt_config['Dir::State::Lists']
298 ).child(apt_pkg.URItoFileName(fake_uri))
299 sources.write(source_line+'\n')
300 log.msg("Sources line: " + source_line)
301 sources_count = sources_count + 1
303 if listpath.exists():
304 #we should empty the directory instead
306 os.symlink(file.path, listpath.path)
309 if sources_count == 0:
310 log.msg("No Packages files available for %s backend"%(self.cache_dir.path))
313 log.msg("Loading Packages database for "+self.cache_dir.path)
314 for key, value in self.apt_config.items():
315 apt_pkg.Config[key] = value
317 self.cache = apt_pkg.GetCache(OpProgress())
318 self.records = apt_pkg.GetPkgRecords(self.cache)
320 self.srcrecords = apt_pkg.GetPkgSrcRecords()
322 self.srcrecords = None
328 """Tries to make the packages server quit."""
329 if self.unload_later and self.unload_later.active():
330 self.unload_later.cancel()
331 self.unload_later = None
333 log.msg('Unloading the packages cache')
334 # This should save memory
338 del self.indexrecords
342 """Cleanup and close any loaded caches."""
344 self.packages.close()
346 def findHash(self, path):
347 """Find the hash for a given path in this mirror.
349 @type path: C{string}
350 @param path: the path within the mirror of the file to lookup
351 @rtype: L{twisted.internet.defer.Deferred}
352 @return: a deferred so it can make sure the cache is loaded first
356 deferLoad = self.load()
357 deferLoad.addCallback(self._findHash, path, d)
358 deferLoad.addErrback(self._findHash_error, path, d)
362 def _findHash_error(self, failure, path, d):
363 """An error occurred, return an empty hash."""
364 log.msg('An error occurred while looking up a hash for: %s' % path)
366 d.callback(HashObject())
369 def _findHash(self, loadResult, path, d):
370 """Search the records for the hash of a path.
372 @type loadResult: C{boolean}
373 @param loadResult: whether apt's cache was successfully loaded
374 @type path: C{string}
375 @param path: the path within the mirror of the file to lookup
376 @type d: L{twisted.internet.defer.Deferred}
377 @param d: the deferred to callback with the result
380 d.callback(HashObject())
385 # First look for the path in the cache of index files
386 for release in self.indexrecords:
387 if path.startswith(release[:-7]):
388 for indexFile in self.indexrecords[release]:
389 if release[:-7] + indexFile == path:
390 h.setFromIndexRecord(self.indexrecords[release][indexFile])
394 package = path.split('/')[-1].split('_')[0]
396 # Check the binary packages
398 for version in self.cache[package].VersionList:
400 for verFile in version.FileList:
401 if self.records.Lookup(verFile):
402 if '/' + self.records.FileName == path:
403 h.setFromPkgRecord(self.records, size)
409 # Check the source packages' files
411 self.srcrecords.Restart()
412 if self.srcrecords.Lookup(package):
413 for f in self.srcrecords.Files:
414 if path == '/' + f[2]:
415 h.setFromSrcRecord(f)
421 # Have to pass the returned loadResult on in case other calls to this function are pending.
424 class TestAptPackages(unittest.TestCase):
425 """Unit tests for the AptPackages cache."""
435 """Initializes the cache with files found in the traditional apt location."""
436 self.client = AptPackages(FilePath('/tmp/.apt-dht'), 300)
438 # Find the largest index files that are for 'main'
439 self.packagesFile = os.popen('ls -Sr /var/lib/apt/lists/ | grep -E "_main_.*Packages$" | tail -n 1').read().rstrip('\n')
440 self.sourcesFile = os.popen('ls -Sr /var/lib/apt/lists/ | grep -E "_main_.*Sources$" | tail -n 1').read().rstrip('\n')
442 # Find the Release file corresponding to the found Packages file
443 for f in os.walk('/var/lib/apt/lists').next()[2]:
444 if f[-7:] == "Release" and self.packagesFile.startswith(f[:-7]):
448 # Add all the found files to the PackageFileList
449 self.client.file_updated(self.releaseFile[self.releaseFile.find('_dists_'):].replace('_','/'),
450 FilePath('/var/lib/apt/lists/' + self.releaseFile))
451 self.client.file_updated(self.packagesFile[self.packagesFile.find('_dists_'):].replace('_','/'),
452 FilePath('/var/lib/apt/lists/' + self.packagesFile))
453 self.client.file_updated(self.sourcesFile[self.sourcesFile.find('_dists_'):].replace('_','/'),
454 FilePath('/var/lib/apt/lists/' + self.sourcesFile))
456 def test_pkg_hash(self):
457 """Tests loading the binary package records cache."""
460 self.client.records.Lookup(self.client.cache['dpkg'].VersionList[0].FileList[0])
462 pkg_hash = os.popen('grep -A 30 -E "^Package: dpkg$" ' +
463 '/var/lib/apt/lists/' + self.packagesFile +
464 ' | grep -E "^SHA1:" | head -n 1' +
465 ' | cut -d\ -f 2').read().rstrip('\n')
467 self.failUnless(self.client.records.SHA1Hash == pkg_hash,
468 "Hashes don't match: %s != %s" % (self.client.records.SHA1Hash, pkg_hash))
470 def test_src_hash(self):
471 """Tests loading the source package records cache."""
474 self.client.srcrecords.Lookup('dpkg')
476 src_hashes = os.popen('grep -A 20 -E "^Package: dpkg$" ' +
477 '/var/lib/apt/lists/' + self.sourcesFile +
478 ' | grep -A 4 -E "^Files:" | grep -E "^ " ' +
479 ' | cut -d\ -f 2').read().split('\n')[:-1]
481 for f in self.client.srcrecords.Files:
482 self.failUnless(f[0] in src_hashes, "Couldn't find %s in: %r" % (f[0], src_hashes))
484 def test_index_hash(self):
485 """Tests loading the cache of index file information."""
488 indexhash = self.client.indexrecords[self.releaseFile[self.releaseFile.find('_dists_'):].replace('_','/')]['main/binary-i386/Packages.bz2']['SHA1'][0]
490 idx_hash = os.popen('grep -A 3000 -E "^SHA1:" ' +
491 '/var/lib/apt/lists/' + self.releaseFile +
492 ' | grep -E " main/binary-i386/Packages.bz2$"'
493 ' | head -n 1 | cut -d\ -f 2').read().rstrip('\n')
495 self.failUnless(indexhash == idx_hash, "Hashes don't match: %s != %s" % (indexhash, idx_hash))
497 def verifyHash(self, found_hash, path, true_hash):
498 self.failUnless(found_hash.hexexpected() == true_hash,
499 "%s hashes don't match: %s != %s" % (path, found_hash.hexexpected(), true_hash))
501 def test_findIndexHash(self):
502 """Tests finding the hash of a single index file."""
503 lastDefer = defer.Deferred()
505 idx_hash = os.popen('grep -A 3000 -E "^SHA1:" ' +
506 '/var/lib/apt/lists/' + self.releaseFile +
507 ' | grep -E " main/binary-i386/Packages.bz2$"'
508 ' | head -n 1 | cut -d\ -f 2').read().rstrip('\n')
509 idx_path = '/' + self.releaseFile[self.releaseFile.find('_dists_')+1:].replace('_','/')[:-7] + 'main/binary-i386/Packages.bz2'
511 d = self.client.findHash(idx_path)
512 d.addCallback(self.verifyHash, idx_path, idx_hash)
514 d.addBoth(lastDefer.callback)
517 def test_findPkgHash(self):
518 """Tests finding the hash of a single binary package."""
519 lastDefer = defer.Deferred()
521 pkg_hash = os.popen('grep -A 30 -E "^Package: dpkg$" ' +
522 '/var/lib/apt/lists/' + self.packagesFile +
523 ' | grep -E "^SHA1:" | head -n 1' +
524 ' | cut -d\ -f 2').read().rstrip('\n')
525 pkg_path = '/' + os.popen('grep -A 30 -E "^Package: dpkg$" ' +
526 '/var/lib/apt/lists/' + self.packagesFile +
527 ' | grep -E "^Filename:" | head -n 1' +
528 ' | cut -d\ -f 2').read().rstrip('\n')
530 d = self.client.findHash(pkg_path)
531 d.addCallback(self.verifyHash, pkg_path, pkg_hash)
533 d.addBoth(lastDefer.callback)
536 def test_findSrcHash(self):
537 """Tests finding the hash of a single source package."""
538 lastDefer = defer.Deferred()
540 src_dir = '/' + os.popen('grep -A 30 -E "^Package: dpkg$" ' +
541 '/var/lib/apt/lists/' + self.sourcesFile +
542 ' | grep -E "^Directory:" | head -n 1' +
543 ' | cut -d\ -f 2').read().rstrip('\n')
544 src_hashes = os.popen('grep -A 20 -E "^Package: dpkg$" ' +
545 '/var/lib/apt/lists/' + self.sourcesFile +
546 ' | grep -A 4 -E "^Files:" | grep -E "^ " ' +
547 ' | cut -d\ -f 2').read().split('\n')[:-1]
548 src_paths = os.popen('grep -A 20 -E "^Package: dpkg$" ' +
549 '/var/lib/apt/lists/' + self.sourcesFile +
550 ' | grep -A 4 -E "^Files:" | grep -E "^ " ' +
551 ' | cut -d\ -f 4').read().split('\n')[:-1]
553 i = choice(range(len(src_hashes)))
554 d = self.client.findHash(src_dir + '/' + src_paths[i])
555 d.addCallback(self.verifyHash, src_dir + '/' + src_paths[i], src_hashes[i])
557 d.addBoth(lastDefer.callback)
560 def test_multipleFindHash(self):
561 """Tests finding the hash of an index file, binary package, source package, and another index file."""
562 lastDefer = defer.Deferred()
564 # Lookup a Packages.bz2 file
565 idx_hash = os.popen('grep -A 3000 -E "^SHA1:" ' +
566 '/var/lib/apt/lists/' + self.releaseFile +
567 ' | grep -E " main/binary-i386/Packages.bz2$"'
568 ' | head -n 1 | cut -d\ -f 2').read().rstrip('\n')
569 idx_path = '/' + self.releaseFile[self.releaseFile.find('_dists_')+1:].replace('_','/')[:-7] + 'main/binary-i386/Packages.bz2'
571 d = self.client.findHash(idx_path)
572 d.addCallback(self.verifyHash, idx_path, idx_hash)
574 # Lookup the binary 'dpkg' package
575 pkg_hash = os.popen('grep -A 30 -E "^Package: dpkg$" ' +
576 '/var/lib/apt/lists/' + self.packagesFile +
577 ' | grep -E "^SHA1:" | head -n 1' +
578 ' | cut -d\ -f 2').read().rstrip('\n')
579 pkg_path = '/' + os.popen('grep -A 30 -E "^Package: dpkg$" ' +
580 '/var/lib/apt/lists/' + self.packagesFile +
581 ' | grep -E "^Filename:" | head -n 1' +
582 ' | cut -d\ -f 2').read().rstrip('\n')
584 d = self.client.findHash(pkg_path)
585 d.addCallback(self.verifyHash, pkg_path, pkg_hash)
587 # Lookup the source 'dpkg' package
588 src_dir = '/' + os.popen('grep -A 30 -E "^Package: dpkg$" ' +
589 '/var/lib/apt/lists/' + self.sourcesFile +
590 ' | grep -E "^Directory:" | head -n 1' +
591 ' | cut -d\ -f 2').read().rstrip('\n')
592 src_hashes = os.popen('grep -A 20 -E "^Package: dpkg$" ' +
593 '/var/lib/apt/lists/' + self.sourcesFile +
594 ' | grep -A 4 -E "^Files:" | grep -E "^ " ' +
595 ' | cut -d\ -f 2').read().split('\n')[:-1]
596 src_paths = os.popen('grep -A 20 -E "^Package: dpkg$" ' +
597 '/var/lib/apt/lists/' + self.sourcesFile +
598 ' | grep -A 4 -E "^Files:" | grep -E "^ " ' +
599 ' | cut -d\ -f 4').read().split('\n')[:-1]
601 for i in range(len(src_hashes)):
602 d = self.client.findHash(src_dir + '/' + src_paths[i])
603 d.addCallback(self.verifyHash, src_dir + '/' + src_paths[i], src_hashes[i])
605 # Lookup a Sources.bz2 file
606 idx_hash = os.popen('grep -A 3000 -E "^SHA1:" ' +
607 '/var/lib/apt/lists/' + self.releaseFile +
608 ' | grep -E " main/source/Sources.bz2$"'
609 ' | head -n 1 | cut -d\ -f 2').read().rstrip('\n')
610 idx_path = '/' + self.releaseFile[self.releaseFile.find('_dists_')+1:].replace('_','/')[:-7] + 'main/source/Sources.bz2'
612 d = self.client.findHash(idx_path)
613 d.addCallback(self.verifyHash, idx_path, idx_hash)
615 d.addBoth(lastDefer.callback)
619 for p in self.pending_calls:
622 self.pending_calls = []
623 self.client.cleanup()