2 # Copyright (C) 2002 Manuel Estrada Sainz <ranty@debian.org>
3 # Copyright (C) 2008 Cameron Dale <camrdale@gmail.com>
5 # This library is free software; you can redistribute it and/or
6 # modify it under the terms of version 2.1 of the GNU General Public
7 # License as published by the Free Software Foundation.
9 # This library is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 # General Public License for more details.
14 # You should have received a copy of the GNU General Public
15 # License along with this library; if not, write to the Free Software
16 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 """Manage a mirror's index files.
20 @type TRACKED_FILES: C{list} of C{string}
21 @var TRACKED_FILES: the file names of files that contain index information
24 # Disable the FutureWarning from the apt module
26 warnings.simplefilter("ignore", FutureWarning)
29 from random import choice
30 from shutil import rmtree
31 from copy import deepcopy
32 from UserDict import DictMixin
34 from twisted.internet import threads, defer, reactor
35 from twisted.python import log
36 from twisted.python.filepath import FilePath
37 from twisted.trial import unittest
39 import apt_pkg, apt_inst
40 from apt import OpProgress
41 from debian_bundle import deb822
43 from Hash import HashObject
47 TRACKED_FILES = ['release', 'sources', 'packages']
49 class PackageFileList(DictMixin):
50 """Manages a list of index files belonging to a mirror.
52 @type cache_dir: L{twisted.python.filepath.FilePath}
53 @ivar cache_dir: the directory to use for storing all files
54 @type packages: C{shelve dictionary}
55 @ivar packages: the files tracked for this mirror
58 def __init__(self, cache_dir):
59 """Initialize the list by opening the dictionary."""
60 self.cache_dir = cache_dir
61 self.cache_dir.restat(False)
62 if not self.cache_dir.exists():
63 self.cache_dir.makedirs()
68 """Open the persistent dictionary of files for this mirror."""
69 if self.packages is None:
70 self.packages = shelve.open(self.cache_dir.child('packages.db').path)
73 """Close the persistent dictionary."""
74 if self.packages is not None:
77 def update_file(self, cache_path, file_path):
78 """Check if an updated file needs to be tracked.
80 Called from the mirror manager when files get updated so we can update our
81 fake lists and sources.list.
83 @type cache_path: C{string}
84 @param cache_path: the location of the file within the mirror
85 @type file_path: L{twisted.python.filepath.FilePath}
86 @param file_path: The location of the file in the file system
88 @return: whether the file is an index file
90 filename = cache_path.split('/')[-1]
91 if filename.lower() in TRACKED_FILES:
92 log.msg("Registering package file: "+cache_path)
93 self.packages[cache_path] = file_path
97 def check_files(self):
98 """Check all files in the database to remove any that don't exist."""
99 files = self.packages.keys()
101 self.packages[f].restat(False)
102 if not self.packages[f].exists():
103 log.msg("File in packages database has been deleted: "+f)
106 #{ Dictionary interface details
107 def __getitem__(self, key): return self.packages[key]
108 def __setitem__(self, key, item): self.packages[key] = item
109 def __delitem__(self, key): del self.packages[key]
110 def keys(self): return self.packages.keys()
113 """Answers queries about packages available from a mirror.
115 Uses the python-apt tools to parse and provide information about the
116 files that are available on a single mirror.
118 @ivar DEFAULT_APT_CONFIG: the default configuration parameters to use for apt
119 @ivar essential_dirs: directories that must be created for apt to work
120 @ivar essential_files: files that must be created for apt to work
121 @type cache_dir: L{twisted.python.filepath.FilePath}
122 @ivar cache_dir: the directory to use for storing all files
123 @type unload_delay: C{int}
124 @ivar unload_delay: the time to wait before unloading the apt cache
125 @ivar apt_config: the configuration parameters to use for apt
126 @type packages: L{PackageFileList}
127 @ivar packages: the persistent storage of tracked apt index files
128 @type loaded: C{boolean}
129 @ivar loaded: whether the apt cache is currently loaded
130 @type loading: L{twisted.internet.defer.Deferred}
131 @ivar loading: if the cache is currently being loaded, this will be
132 called when it is loaded, otherwise it is None
133 @type unload_later: L{twisted.internet.interfaces.IDelayedCall}
134 @ivar unload_later: the delayed call to unload the apt cache
135 @type indexrecords: C{dictionary}
136 @ivar indexrecords: the hashes of index files for the mirror, keys are
137 mirror directories, values are dictionaries with keys the path to the
138 index file in the mirror directory and values are dictionaries with
139 keys the hash type and values the hash
140 @type cache: C{apt_pkg.GetCache()}
141 @ivar cache: the apt cache of the mirror
142 @type records: C{apt_pkg.GetPkgRecords()}
143 @ivar records: the apt package records for all binary packages in a mirror
144 @type srcrecords: C{apt_pkg.GetPkgSrcRecords}
145 @ivar srcrecords: the apt package records for all source packages in a mirror
148 DEFAULT_APT_CONFIG = {
150 #'APT::Architecture' : 'i386', # Commented so the machine's config will set this
151 #'APT::Default-Release' : 'unstable',
153 'Dir::State' : 'apt/', # var/lib/apt/
154 'Dir::State::Lists': 'lists/', # lists/
155 #'Dir::State::cdroms' : 'cdroms.list',
156 'Dir::State::userstatus' : 'status.user',
157 'Dir::State::status': 'dpkg/status', # '/var/lib/dpkg/status'
158 'Dir::Cache' : '.apt/cache/', # var/cache/apt/
159 #'Dir::Cache::archives' : 'archives/',
160 'Dir::Cache::srcpkgcache' : 'srcpkgcache.bin',
161 'Dir::Cache::pkgcache' : 'pkgcache.bin',
162 'Dir::Etc' : 'apt/etc/', # etc/apt/
163 'Dir::Etc::sourcelist' : 'sources.list',
164 'Dir::Etc::vendorlist' : 'vendors.list',
165 'Dir::Etc::vendorparts' : 'vendors.list.d',
166 #'Dir::Etc::main' : 'apt.conf',
167 #'Dir::Etc::parts' : 'apt.conf.d',
168 #'Dir::Etc::preferences' : 'preferences',
170 #'Dir::Bin::methods' : '', #'/usr/lib/apt/methods'
171 'Dir::Bin::dpkg' : '/usr/bin/dpkg',
173 #'DPkg::Pre-Install-Pkgs' : '',
175 #'DPkg::Tools::Options' : '',
176 #'DPkg::Tools::Options::/usr/bin/apt-listchanges' : '',
177 #'DPkg::Tools::Options::/usr/bin/apt-listchanges::Version' : '2',
178 #'DPkg::Post-Invoke' : '',
180 essential_dirs = ('apt', 'apt/cache', 'apt/dpkg', 'apt/etc', 'apt/lists',
182 essential_files = ('apt/dpkg/status', 'apt/etc/sources.list',)
184 def __init__(self, cache_dir, unload_delay):
185 """Construct a new packages manager.
187 @param cache_dir: directory to use to store files for this mirror
189 self.cache_dir = cache_dir
190 self.unload_delay = unload_delay
191 self.apt_config = deepcopy(self.DEFAULT_APT_CONFIG)
193 # Create the necessary files and directories for apt
194 for dir in self.essential_dirs:
195 path = self.cache_dir.preauthChild(dir)
196 if not path.exists():
198 for file in self.essential_files:
199 path = self.cache_dir.preauthChild(file)
200 if not path.exists():
203 self.apt_config['Dir'] = self.cache_dir.path
204 self.apt_config['Dir::State::status'] = self.cache_dir.preauthChild(self.apt_config['Dir::State']).preauthChild(self.apt_config['Dir::State::status']).path
205 self.packages = PackageFileList(cache_dir)
208 self.unload_later = None
213 def addRelease(self, cache_path, file_path):
214 """Add a Release file's info to the list of index files.
216 Dirty hack until python-apt supports apt-pkg/indexrecords.h
219 self.indexrecords[cache_path] = {}
221 read_packages = False
222 f = file_path.open('r')
224 # Use python-debian routines to parse the file for hashes
225 rel = deb822.Release(f, fields = ['MD5Sum', 'SHA1', 'SHA256'])
226 for hash_type in rel:
227 for file in rel[hash_type]:
228 self.indexrecords[cache_path].setdefault(file['name'], {})[hash_type.upper()] = (file[hash_type], file['size'])
232 def file_updated(self, cache_path, file_path):
233 """A file in the mirror has changed or been added.
235 If this affects us, unload our apt database.
236 @see: L{PackageFileList.update_file}
238 if self.packages.update_file(cache_path, file_path):
242 """Make sure the package cache is initialized and loaded."""
243 # Reset the pending unload call
244 if self.unload_later and self.unload_later.active():
245 self.unload_later.reset(self.unload_delay)
247 self.unload_later = reactor.callLater(self.unload_delay, self.unload)
249 # Make sure it's not already being loaded
250 if self.loading is None:
251 log.msg('Loading the packages cache')
252 self.loading = threads.deferToThread(self._load)
253 self.loading.addCallback(self.doneLoading)
256 def doneLoading(self, loadResult):
257 """Cache is loaded."""
259 # Must pass on the result for the next callback
263 """Regenerates the fake configuration and loads the packages caches."""
264 if self.loaded: return True
266 # Modify the default configuration to create the fake one.
268 self.cache_dir.preauthChild(self.apt_config['Dir::State']
269 ).preauthChild(self.apt_config['Dir::State::Lists']).remove()
270 self.cache_dir.preauthChild(self.apt_config['Dir::State']
271 ).preauthChild(self.apt_config['Dir::State::Lists']
272 ).child('partial').makedirs()
273 sources_file = self.cache_dir.preauthChild(self.apt_config['Dir::Etc']
274 ).preauthChild(self.apt_config['Dir::Etc::sourcelist'])
275 sources = sources_file.open('w')
277 deb_src_added = False
278 self.packages.check_files()
279 self.indexrecords = {}
281 # Create an entry in sources.list for each needed index file
282 for f in self.packages:
283 # we should probably clear old entries from self.packages and
284 # take into account the recorded mtime as optimization
285 file = self.packages[f]
286 if f.split('/')[-1] == "Release":
287 self.addRelease(f, file)
288 fake_uri='http://apt-p2p'+f
289 fake_dirname = '/'.join(fake_uri.split('/')[:-1])
290 if f.endswith('Sources'):
292 source_line='deb-src '+fake_dirname+'/ /'
294 source_line='deb '+fake_dirname+'/ /'
295 listpath = self.cache_dir.preauthChild(self.apt_config['Dir::State']
296 ).preauthChild(self.apt_config['Dir::State::Lists']
297 ).child(apt_pkg.URItoFileName(fake_uri))
298 sources.write(source_line+'\n')
299 log.msg("Sources line: " + source_line)
300 sources_count = sources_count + 1
302 if listpath.exists():
303 #we should empty the directory instead
305 os.symlink(file.path, listpath.path)
308 if sources_count == 0:
309 log.msg("No Packages files available for %s backend"%(self.cache_dir.path))
312 log.msg("Loading Packages database for "+self.cache_dir.path)
313 for key, value in self.apt_config.items():
314 apt_pkg.Config[key] = value
316 self.cache = apt_pkg.GetCache(OpProgress())
317 self.records = apt_pkg.GetPkgRecords(self.cache)
319 self.srcrecords = apt_pkg.GetPkgSrcRecords()
321 self.srcrecords = None
327 """Tries to make the packages server quit."""
328 if self.unload_later and self.unload_later.active():
329 self.unload_later.cancel()
330 self.unload_later = None
332 log.msg('Unloading the packages cache')
333 # This should save memory
337 del self.indexrecords
341 """Cleanup and close any loaded caches."""
343 if self.unload_later and self.unload_later.active():
344 self.unload_later.cancel()
345 self.packages.close()
347 def findHash(self, path):
348 """Find the hash for a given path in this mirror.
350 @type path: C{string}
351 @param path: the path within the mirror of the file to lookup
352 @rtype: L{twisted.internet.defer.Deferred}
353 @return: a deferred so it can make sure the cache is loaded first
357 deferLoad = self.load()
358 deferLoad.addCallback(self._findHash, path, d)
359 deferLoad.addErrback(self._findHash_error, path, d)
363 def _findHash_error(self, failure, path, d):
364 """An error occurred, return an empty hash."""
365 log.msg('An error occurred while looking up a hash for: %s' % path)
367 d.callback(HashObject())
370 def _findHash(self, loadResult, path, d):
371 """Search the records for the hash of a path.
373 @type loadResult: C{boolean}
374 @param loadResult: whether apt's cache was successfully loaded
375 @type path: C{string}
376 @param path: the path within the mirror of the file to lookup
377 @type d: L{twisted.internet.defer.Deferred}
378 @param d: the deferred to callback with the result
381 d.callback(HashObject())
386 # First look for the path in the cache of index files
387 for release in self.indexrecords:
388 if path.startswith(release[:-7]):
389 for indexFile in self.indexrecords[release]:
390 if release[:-7] + indexFile == path:
391 h.setFromIndexRecord(self.indexrecords[release][indexFile])
395 package = path.split('/')[-1].split('_')[0]
397 # Check the binary packages
399 for version in self.cache[package].VersionList:
401 for verFile in version.FileList:
402 if self.records.Lookup(verFile):
403 if '/' + self.records.FileName == path:
404 h.setFromPkgRecord(self.records, size)
410 # Check the source packages' files
412 self.srcrecords.Restart()
413 if self.srcrecords.Lookup(package):
414 for f in self.srcrecords.Files:
415 if path == '/' + f[2]:
416 h.setFromSrcRecord(f)
422 # Have to pass the returned loadResult on in case other calls to this function are pending.
425 class TestAptPackages(unittest.TestCase):
426 """Unit tests for the AptPackages cache."""
436 """Initializes the cache with files found in the traditional apt location."""
437 self.client = AptPackages(FilePath('/tmp/.apt-p2p'), 300)
439 # Find the largest index files that are for 'main'
440 self.packagesFile = os.popen('ls -Sr /var/lib/apt/lists/ | grep -E "_main_.*Packages$" | tail -n 1').read().rstrip('\n')
441 self.sourcesFile = os.popen('ls -Sr /var/lib/apt/lists/ | grep -E "_main_.*Sources$" | tail -n 1').read().rstrip('\n')
443 # Find the Release file corresponding to the found Packages file
444 for f in os.walk('/var/lib/apt/lists').next()[2]:
445 if f[-7:] == "Release" and self.packagesFile.startswith(f[:-7]):
449 # Add all the found files to the PackageFileList
450 self.client.file_updated(self.releaseFile[self.releaseFile.find('_dists_'):].replace('_','/'),
451 FilePath('/var/lib/apt/lists/' + self.releaseFile))
452 self.client.file_updated(self.packagesFile[self.packagesFile.find('_dists_'):].replace('_','/'),
453 FilePath('/var/lib/apt/lists/' + self.packagesFile))
454 self.client.file_updated(self.sourcesFile[self.sourcesFile.find('_dists_'):].replace('_','/'),
455 FilePath('/var/lib/apt/lists/' + self.sourcesFile))
457 def test_pkg_hash(self):
458 """Tests loading the binary package records cache."""
461 self.client.records.Lookup(self.client.cache['dpkg'].VersionList[0].FileList[0])
463 pkg_hash = os.popen('grep -A 30 -E "^Package: dpkg$" ' +
464 '/var/lib/apt/lists/' + self.packagesFile +
465 ' | grep -E "^SHA1:" | head -n 1' +
466 ' | cut -d\ -f 2').read().rstrip('\n')
468 self.failUnless(self.client.records.SHA1Hash == pkg_hash,
469 "Hashes don't match: %s != %s" % (self.client.records.SHA1Hash, pkg_hash))
471 def test_src_hash(self):
472 """Tests loading the source package records cache."""
475 self.client.srcrecords.Lookup('dpkg')
477 src_hashes = os.popen('grep -A 20 -E "^Package: dpkg$" ' +
478 '/var/lib/apt/lists/' + self.sourcesFile +
479 ' | grep -A 4 -E "^Files:" | grep -E "^ " ' +
480 ' | cut -d\ -f 2').read().split('\n')[:-1]
482 for f in self.client.srcrecords.Files:
483 self.failUnless(f[0] in src_hashes, "Couldn't find %s in: %r" % (f[0], src_hashes))
485 def test_index_hash(self):
486 """Tests loading the cache of index file information."""
489 indexhash = self.client.indexrecords[self.releaseFile[self.releaseFile.find('_dists_'):].replace('_','/')]['main/binary-i386/Packages.bz2']['SHA1'][0]
491 idx_hash = os.popen('grep -A 3000 -E "^SHA1:" ' +
492 '/var/lib/apt/lists/' + self.releaseFile +
493 ' | grep -E " main/binary-i386/Packages.bz2$"'
494 ' | head -n 1 | cut -d\ -f 2').read().rstrip('\n')
496 self.failUnless(indexhash == idx_hash, "Hashes don't match: %s != %s" % (indexhash, idx_hash))
498 def verifyHash(self, found_hash, path, true_hash):
499 self.failUnless(found_hash.hexexpected() == true_hash,
500 "%s hashes don't match: %s != %s" % (path, found_hash.hexexpected(), true_hash))
502 def test_findIndexHash(self):
503 """Tests finding the hash of a single index file."""
504 lastDefer = defer.Deferred()
506 idx_hash = os.popen('grep -A 3000 -E "^SHA1:" ' +
507 '/var/lib/apt/lists/' + self.releaseFile +
508 ' | grep -E " main/binary-i386/Packages.bz2$"'
509 ' | head -n 1 | cut -d\ -f 2').read().rstrip('\n')
510 idx_path = '/' + self.releaseFile[self.releaseFile.find('_dists_')+1:].replace('_','/')[:-7] + 'main/binary-i386/Packages.bz2'
512 d = self.client.findHash(idx_path)
513 d.addCallback(self.verifyHash, idx_path, idx_hash)
515 d.addBoth(lastDefer.callback)
518 def test_findPkgHash(self):
519 """Tests finding the hash of a single binary package."""
520 lastDefer = defer.Deferred()
522 pkg_hash = os.popen('grep -A 30 -E "^Package: dpkg$" ' +
523 '/var/lib/apt/lists/' + self.packagesFile +
524 ' | grep -E "^SHA1:" | head -n 1' +
525 ' | cut -d\ -f 2').read().rstrip('\n')
526 pkg_path = '/' + os.popen('grep -A 30 -E "^Package: dpkg$" ' +
527 '/var/lib/apt/lists/' + self.packagesFile +
528 ' | grep -E "^Filename:" | head -n 1' +
529 ' | cut -d\ -f 2').read().rstrip('\n')
531 d = self.client.findHash(pkg_path)
532 d.addCallback(self.verifyHash, pkg_path, pkg_hash)
534 d.addBoth(lastDefer.callback)
537 def test_findSrcHash(self):
538 """Tests finding the hash of a single source package."""
539 lastDefer = defer.Deferred()
541 src_dir = '/' + os.popen('grep -A 30 -E "^Package: dpkg$" ' +
542 '/var/lib/apt/lists/' + self.sourcesFile +
543 ' | grep -E "^Directory:" | head -n 1' +
544 ' | cut -d\ -f 2').read().rstrip('\n')
545 src_hashes = os.popen('grep -A 20 -E "^Package: dpkg$" ' +
546 '/var/lib/apt/lists/' + self.sourcesFile +
547 ' | grep -A 4 -E "^Files:" | grep -E "^ " ' +
548 ' | cut -d\ -f 2').read().split('\n')[:-1]
549 src_paths = os.popen('grep -A 20 -E "^Package: dpkg$" ' +
550 '/var/lib/apt/lists/' + self.sourcesFile +
551 ' | grep -A 4 -E "^Files:" | grep -E "^ " ' +
552 ' | cut -d\ -f 4').read().split('\n')[:-1]
554 i = choice(range(len(src_hashes)))
555 d = self.client.findHash(src_dir + '/' + src_paths[i])
556 d.addCallback(self.verifyHash, src_dir + '/' + src_paths[i], src_hashes[i])
558 d.addBoth(lastDefer.callback)
561 def test_multipleFindHash(self):
562 """Tests finding the hash of an index file, binary package, source package, and another index file."""
563 lastDefer = defer.Deferred()
565 # Lookup a Packages.bz2 file
566 idx_hash = os.popen('grep -A 3000 -E "^SHA1:" ' +
567 '/var/lib/apt/lists/' + self.releaseFile +
568 ' | grep -E " main/binary-i386/Packages.bz2$"'
569 ' | head -n 1 | cut -d\ -f 2').read().rstrip('\n')
570 idx_path = '/' + self.releaseFile[self.releaseFile.find('_dists_')+1:].replace('_','/')[:-7] + 'main/binary-i386/Packages.bz2'
572 d = self.client.findHash(idx_path)
573 d.addCallback(self.verifyHash, idx_path, idx_hash)
575 # Lookup the binary 'dpkg' package
576 pkg_hash = os.popen('grep -A 30 -E "^Package: dpkg$" ' +
577 '/var/lib/apt/lists/' + self.packagesFile +
578 ' | grep -E "^SHA1:" | head -n 1' +
579 ' | cut -d\ -f 2').read().rstrip('\n')
580 pkg_path = '/' + os.popen('grep -A 30 -E "^Package: dpkg$" ' +
581 '/var/lib/apt/lists/' + self.packagesFile +
582 ' | grep -E "^Filename:" | head -n 1' +
583 ' | cut -d\ -f 2').read().rstrip('\n')
585 d = self.client.findHash(pkg_path)
586 d.addCallback(self.verifyHash, pkg_path, pkg_hash)
588 # Lookup the source 'dpkg' package
589 src_dir = '/' + os.popen('grep -A 30 -E "^Package: dpkg$" ' +
590 '/var/lib/apt/lists/' + self.sourcesFile +
591 ' | grep -E "^Directory:" | head -n 1' +
592 ' | cut -d\ -f 2').read().rstrip('\n')
593 src_hashes = os.popen('grep -A 20 -E "^Package: dpkg$" ' +
594 '/var/lib/apt/lists/' + self.sourcesFile +
595 ' | grep -A 4 -E "^Files:" | grep -E "^ " ' +
596 ' | cut -d\ -f 2').read().split('\n')[:-1]
597 src_paths = os.popen('grep -A 20 -E "^Package: dpkg$" ' +
598 '/var/lib/apt/lists/' + self.sourcesFile +
599 ' | grep -A 4 -E "^Files:" | grep -E "^ " ' +
600 ' | cut -d\ -f 4').read().split('\n')[:-1]
602 for i in range(len(src_hashes)):
603 d = self.client.findHash(src_dir + '/' + src_paths[i])
604 d.addCallback(self.verifyHash, src_dir + '/' + src_paths[i], src_hashes[i])
606 # Lookup a Sources.bz2 file
607 idx_hash = os.popen('grep -A 3000 -E "^SHA1:" ' +
608 '/var/lib/apt/lists/' + self.releaseFile +
609 ' | grep -E " main/source/Sources.bz2$"'
610 ' | head -n 1 | cut -d\ -f 2').read().rstrip('\n')
611 idx_path = '/' + self.releaseFile[self.releaseFile.find('_dists_')+1:].replace('_','/')[:-7] + 'main/source/Sources.bz2'
613 d = self.client.findHash(idx_path)
614 d.addCallback(self.verifyHash, idx_path, idx_hash)
616 d.addBoth(lastDefer.callback)
620 for p in self.pending_calls:
623 self.pending_calls = []
624 self.client.cleanup()