2 # Copyright (C) 2002 Manuel Estrada Sainz <ranty@debian.org>
3 # Copyright (C) 2008 Cameron Dale <camrdale@gmail.com>
5 # This library is free software; you can redistribute it and/or
6 # modify it under the terms of version 2.1 of the GNU General Public
7 # License as published by the Free Software Foundation.
9 # This library is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 # General Public License for more details.
14 # You should have received a copy of the GNU General Public
15 # License along with this library; if not, write to the Free Software
16 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 """Manage a mirror's index files.
20 @type TRACKED_FILES: C{list} of C{string}
21 @var TRACKED_FILES: the file names of files that contain index information
24 # Disable the FutureWarning from the apt module
26 warnings.simplefilter("ignore", FutureWarning)
29 from random import choice
30 from shutil import rmtree
31 from copy import deepcopy
32 from UserDict import DictMixin
34 from twisted.internet import threads, defer, reactor
35 from twisted.python import log
36 from twisted.python.filepath import FilePath
37 from twisted.trial import unittest
39 import apt_pkg, apt_inst
40 from apt import OpProgress
41 from debian_bundle import deb822
43 from apt_p2p_conf import config
44 from Hash import HashObject
48 TRACKED_FILES = ['release', 'sources', 'packages']
50 class PackageFileList(DictMixin):
51 """Manages a list of index files belonging to a mirror.
53 @type cache_dir: L{twisted.python.filepath.FilePath}
54 @ivar cache_dir: the directory to use for storing all files
55 @type packages: C{shelve dictionary}
56 @ivar packages: the files tracked for this mirror
59 def __init__(self, cache_dir):
60 """Initialize the list by opening the dictionary."""
61 self.cache_dir = cache_dir
62 self.cache_dir.restat(False)
63 if not self.cache_dir.exists():
64 self.cache_dir.makedirs()
69 """Open the persistent dictionary of files for this mirror."""
70 if self.packages is None:
71 self.packages = shelve.open(self.cache_dir.child('packages.db').path)
74 """Close the persistent dictionary."""
75 if self.packages is not None:
78 def update_file(self, cache_path, file_path):
79 """Check if an updated file needs to be tracked.
81 Called from the mirror manager when files get updated so we can update our
82 fake lists and sources.list.
84 @type cache_path: C{string}
85 @param cache_path: the location of the file within the mirror
86 @type file_path: L{twisted.python.filepath.FilePath}
87 @param file_path: The location of the file in the file system
89 @return: whether the file is an index file
91 filename = cache_path.split('/')[-1]
92 if filename.lower() in TRACKED_FILES:
93 log.msg("Registering package file: "+cache_path)
94 self.packages[cache_path] = file_path
99 def check_files(self):
100 """Check all files in the database to remove any that don't exist."""
101 files = self.packages.keys()
103 self.packages[f].restat(False)
104 if not self.packages[f].exists():
105 log.msg("File in packages database has been deleted: "+f)
109 #{ Dictionary interface details
110 def __getitem__(self, key): return self.packages[key]
111 def __setitem__(self, key, item): self.packages[key] = item
112 def __delitem__(self, key): del self.packages[key]
113 def keys(self): return self.packages.keys()
116 """Answers queries about packages available from a mirror.
118 Uses the python-apt tools to parse and provide information about the
119 files that are available on a single mirror.
121 @ivar DEFAULT_APT_CONFIG: the default configuration parameters to use for apt
122 @ivar essential_dirs: directories that must be created for apt to work
123 @ivar essential_files: files that must be created for apt to work
124 @type cache_dir: L{twisted.python.filepath.FilePath}
125 @ivar cache_dir: the directory to use for storing all files
126 @ivar apt_config: the configuration parameters to use for apt
127 @type packages: L{PackageFileList}
128 @ivar packages: the persistent storage of tracked apt index files
129 @type loaded: C{boolean}
130 @ivar loaded: whether the apt cache is currently loaded
131 @type loading: L{twisted.internet.defer.Deferred}
132 @ivar loading: if the cache is currently being loaded, this will be
133 called when it is loaded, otherwise it is None
134 @type loading_unload: C{boolean}
135 @ivar loading_unload: whether there is an unload pending on the current load
136 @type unload_later: L{twisted.internet.interfaces.IDelayedCall}
137 @ivar unload_later: the delayed call to unload the apt cache
138 @type indexrecords: C{dictionary}
139 @ivar indexrecords: the hashes of index files for the mirror, keys are
140 mirror directories, values are dictionaries with keys the path to the
141 index file in the mirror directory and values are dictionaries with
142 keys the hash type and values the hash
143 @type cache: C{apt_pkg.GetCache()}
144 @ivar cache: the apt cache of the mirror
145 @type records: C{apt_pkg.GetPkgRecords()}
146 @ivar records: the apt package records for all binary packages in a mirror
147 @type srcrecords: C{apt_pkg.GetPkgSrcRecords}
148 @ivar srcrecords: the apt package records for all source packages in a mirror
151 DEFAULT_APT_CONFIG = {
153 #'APT::Architecture' : 'i386', # Commented so the machine's config will set this
154 #'APT::Default-Release' : 'unstable',
156 'Dir::State' : 'apt/', # var/lib/apt/
157 'Dir::State::Lists': 'lists/', # lists/
158 #'Dir::State::cdroms' : 'cdroms.list',
159 'Dir::State::userstatus' : 'status.user',
160 'Dir::State::status': 'dpkg/status', # '/var/lib/dpkg/status'
161 'Dir::Cache' : '.apt/cache/', # var/cache/apt/
162 #'Dir::Cache::archives' : 'archives/',
163 'Dir::Cache::srcpkgcache' : 'srcpkgcache.bin',
164 'Dir::Cache::pkgcache' : 'pkgcache.bin',
165 'Dir::Etc' : 'apt/etc/', # etc/apt/
166 'Dir::Etc::sourcelist' : 'sources.list',
167 'Dir::Etc::vendorlist' : 'vendors.list',
168 'Dir::Etc::vendorparts' : 'vendors.list.d',
169 #'Dir::Etc::main' : 'apt.conf',
170 #'Dir::Etc::parts' : 'apt.conf.d',
171 #'Dir::Etc::preferences' : 'preferences',
173 #'Dir::Bin::methods' : '', #'/usr/lib/apt/methods'
174 'Dir::Bin::dpkg' : '/usr/bin/dpkg',
176 #'DPkg::Pre-Install-Pkgs' : '',
178 #'DPkg::Tools::Options' : '',
179 #'DPkg::Tools::Options::/usr/bin/apt-listchanges' : '',
180 #'DPkg::Tools::Options::/usr/bin/apt-listchanges::Version' : '2',
181 #'DPkg::Post-Invoke' : '',
183 essential_dirs = ('apt', 'apt/cache', 'apt/dpkg', 'apt/etc', 'apt/lists',
185 essential_files = ('apt/dpkg/status', 'apt/etc/sources.list',)
187 def __init__(self, cache_dir):
188 """Construct a new packages manager.
190 @param cache_dir: directory to use to store files for this mirror
192 self.cache_dir = cache_dir
193 self.apt_config = deepcopy(self.DEFAULT_APT_CONFIG)
195 # Create the necessary files and directories for apt
196 for dir in self.essential_dirs:
197 path = self.cache_dir.preauthChild(dir)
198 if not path.exists():
200 for file in self.essential_files:
201 path = self.cache_dir.preauthChild(file)
202 if not path.exists():
205 self.apt_config['Dir'] = self.cache_dir.path
206 self.apt_config['Dir::State::status'] = self.cache_dir.preauthChild(self.apt_config['Dir::State']).preauthChild(self.apt_config['Dir::State::status']).path
207 self.packages = PackageFileList(cache_dir)
210 self.loading_unload = False
211 self.unload_later = None
216 def addRelease(self, cache_path, file_path):
217 """Add a Release file's info to the list of index files.
219 Dirty hack until python-apt supports apt-pkg/indexrecords.h
222 self.indexrecords[cache_path] = {}
224 read_packages = False
225 f = file_path.open('r')
227 # Use python-debian routines to parse the file for hashes
228 rel = deb822.Release(f, fields = ['MD5Sum', 'SHA1', 'SHA256'])
229 for hash_type in rel:
230 for file in rel[hash_type]:
231 self.indexrecords[cache_path].setdefault(file['name'], {})[hash_type.upper()] = (file[hash_type], file['size'])
235 def file_updated(self, cache_path, file_path):
236 """A file in the mirror has changed or been added.
238 If this affects us, unload our apt database.
239 @see: L{PackageFileList.update_file}
241 if self.packages.update_file(cache_path, file_path):
245 """Make sure the package cache is initialized and loaded."""
246 # Reset the pending unload call
247 if self.unload_later and self.unload_later.active():
248 self.unload_later.reset(config.gettime('DEFAULT', 'UNLOAD_PACKAGES_CACHE'))
250 self.unload_later = reactor.callLater(config.gettime('DEFAULT', 'UNLOAD_PACKAGES_CACHE'), self.unload)
252 # Check if it's already loaded
254 return defer.succeed(True)
256 # Make sure it's not already being loaded
257 if self.loading is None:
258 log.msg('Loading the packages cache')
259 self.loading_unload = False
260 self.loading = threads.deferToThread(self._load)
261 self.loading.addCallback(self.doneLoading)
264 def doneLoading(self, loadResult):
265 """Cache is loaded."""
268 # Check for a pending unload
269 if self.loading_unload:
270 log.msg('Re-loading the packages cache')
272 self.loading_unload = False
273 self.loading = threads.deferToThread(self._load)
274 self.loading.addCallback(self.doneLoading)
277 # Must pass on the result for the next callback
281 """Regenerates the fake configuration and loads the packages caches."""
282 if self.loaded: return True
284 # Modify the default configuration to create the fake one.
286 self.cache_dir.preauthChild(self.apt_config['Dir::State']
287 ).preauthChild(self.apt_config['Dir::State::Lists']).remove()
288 self.cache_dir.preauthChild(self.apt_config['Dir::State']
289 ).preauthChild(self.apt_config['Dir::State::Lists']
290 ).child('partial').makedirs()
291 sources_file = self.cache_dir.preauthChild(self.apt_config['Dir::Etc']
292 ).preauthChild(self.apt_config['Dir::Etc::sourcelist'])
293 sources = sources_file.open('w')
295 deb_src_added = False
296 self.packages.check_files()
297 self.indexrecords = {}
299 # Create an entry in sources.list for each needed index file
300 for f in self.packages:
301 # we should probably clear old entries from self.packages and
302 # take into account the recorded mtime as optimization
303 file = self.packages[f]
304 if f.split('/')[-1] == "Release":
305 self.addRelease(f, file)
306 fake_uri='http://apt-p2p'+f
307 fake_dirname = '/'.join(fake_uri.split('/')[:-1])
308 if f.endswith('Sources'):
310 source_line='deb-src '+fake_dirname+'/ /'
312 source_line='deb '+fake_dirname+'/ /'
313 listpath = self.cache_dir.preauthChild(self.apt_config['Dir::State']
314 ).preauthChild(self.apt_config['Dir::State::Lists']
315 ).child(apt_pkg.URItoFileName(fake_uri))
316 sources.write(source_line+'\n')
317 log.msg("Sources line: " + source_line)
318 sources_count = sources_count + 1
320 if listpath.exists():
321 #we should empty the directory instead
323 os.symlink(file.path, listpath.path)
326 if sources_count == 0:
327 log.msg("No Packages files available for %s backend"%(self.cache_dir.path))
330 log.msg("Loading Packages database for "+self.cache_dir.path)
331 for key, value in self.apt_config.items():
332 apt_pkg.Config[key] = value
334 self.cache = apt_pkg.GetCache(OpProgress())
335 self.records = apt_pkg.GetPkgRecords(self.cache)
337 self.srcrecords = apt_pkg.GetPkgSrcRecords()
339 self.srcrecords = None
345 """Tries to make the packages server quit."""
346 if self.unload_later and self.unload_later.active():
347 self.unload_later.cancel()
348 self.unload_later = None
350 self.loading_unload = True
352 log.msg('Unloading the packages cache')
353 # This should save memory
357 del self.indexrecords
361 """Cleanup and close any loaded caches."""
363 if self.unload_later and self.unload_later.active():
364 self.unload_later.cancel()
365 self.packages.close()
367 def findHash(self, path):
368 """Find the hash for a given path in this mirror.
370 @type path: C{string}
371 @param path: the path within the mirror of the file to lookup
372 @rtype: L{twisted.internet.defer.Deferred}
373 @return: a deferred so it can make sure the cache is loaded first
377 deferLoad = self.load()
378 deferLoad.addCallback(self._findHash, path, d)
379 deferLoad.addErrback(self._findHash_error, path, d)
383 def _findHash_error(self, failure, path, d):
384 """An error occurred, return an empty hash."""
385 log.msg('An error occurred while looking up a hash for: %s' % path)
387 d.callback(HashObject())
390 def _findHash(self, loadResult, path, d):
391 """Search the records for the hash of a path.
393 @type loadResult: C{boolean}
394 @param loadResult: whether apt's cache was successfully loaded
395 @type path: C{string}
396 @param path: the path within the mirror of the file to lookup
397 @type d: L{twisted.internet.defer.Deferred}
398 @param d: the deferred to callback with the result
401 d.callback(HashObject())
406 # First look for the path in the cache of index files
407 for release in self.indexrecords:
408 if path.startswith(release[:-7]):
409 for indexFile in self.indexrecords[release]:
410 if release[:-7] + indexFile == path:
411 h.setFromIndexRecord(self.indexrecords[release][indexFile])
415 package = path.split('/')[-1].split('_')[0]
417 # Check the binary packages
419 for version in self.cache[package].VersionList:
421 for verFile in version.FileList:
422 if self.records.Lookup(verFile):
423 if '/' + self.records.FileName == path:
424 h.setFromPkgRecord(self.records, size)
430 # Check the source packages' files
432 self.srcrecords.Restart()
433 if self.srcrecords.Lookup(package):
434 for f in self.srcrecords.Files:
435 if path == '/' + f[2]:
436 h.setFromSrcRecord(f)
442 # Have to pass the returned loadResult on in case other calls to this function are pending.
445 class TestAptPackages(unittest.TestCase):
446 """Unit tests for the AptPackages cache."""
456 """Initializes the cache with files found in the traditional apt location."""
457 self.client = AptPackages(FilePath('/tmp/.apt-p2p'))
459 # Find the largest index files that are for 'main'
460 self.packagesFile = os.popen('ls -Sr /var/lib/apt/lists/ | grep -E "_main_.*Packages$" | tail -n 1').read().rstrip('\n')
461 self.sourcesFile = os.popen('ls -Sr /var/lib/apt/lists/ | grep -E "_main_.*Sources$" | tail -n 1').read().rstrip('\n')
463 # Find the Release file corresponding to the found Packages file
464 for f in os.walk('/var/lib/apt/lists').next()[2]:
465 if f[-7:] == "Release" and self.packagesFile.startswith(f[:-7]):
469 # Add all the found files to the PackageFileList
470 self.client.file_updated(self.releaseFile[self.releaseFile.find('_dists_'):].replace('_','/'),
471 FilePath('/var/lib/apt/lists/' + self.releaseFile))
472 self.client.file_updated(self.packagesFile[self.packagesFile.find('_dists_'):].replace('_','/'),
473 FilePath('/var/lib/apt/lists/' + self.packagesFile))
474 self.client.file_updated(self.sourcesFile[self.sourcesFile.find('_dists_'):].replace('_','/'),
475 FilePath('/var/lib/apt/lists/' + self.sourcesFile))
477 def test_pkg_hash(self):
478 """Tests loading the binary package records cache."""
481 self.client.records.Lookup(self.client.cache['dpkg'].VersionList[0].FileList[0])
483 pkg_hash = os.popen('grep -A 30 -E "^Package: dpkg$" ' +
484 '/var/lib/apt/lists/' + self.packagesFile +
485 ' | grep -E "^SHA1:" | head -n 1' +
486 ' | cut -d\ -f 2').read().rstrip('\n')
488 self.failUnless(self.client.records.SHA1Hash == pkg_hash,
489 "Hashes don't match: %s != %s" % (self.client.records.SHA1Hash, pkg_hash))
491 def test_src_hash(self):
492 """Tests loading the source package records cache."""
495 self.client.srcrecords.Lookup('dpkg')
497 src_hashes = os.popen('grep -A 20 -E "^Package: dpkg$" ' +
498 '/var/lib/apt/lists/' + self.sourcesFile +
499 ' | grep -A 4 -E "^Files:" | grep -E "^ " ' +
500 ' | cut -d\ -f 2').read().split('\n')[:-1]
502 for f in self.client.srcrecords.Files:
503 self.failUnless(f[0] in src_hashes, "Couldn't find %s in: %r" % (f[0], src_hashes))
505 def test_index_hash(self):
506 """Tests loading the cache of index file information."""
509 indexhash = self.client.indexrecords[self.releaseFile[self.releaseFile.find('_dists_'):].replace('_','/')]['main/binary-i386/Packages.bz2']['SHA1'][0]
511 idx_hash = os.popen('grep -A 3000 -E "^SHA1:" ' +
512 '/var/lib/apt/lists/' + self.releaseFile +
513 ' | grep -E " main/binary-i386/Packages.bz2$"'
514 ' | head -n 1 | cut -d\ -f 2').read().rstrip('\n')
516 self.failUnless(indexhash == idx_hash, "Hashes don't match: %s != %s" % (indexhash, idx_hash))
518 def verifyHash(self, found_hash, path, true_hash):
519 self.failUnless(found_hash.hexexpected() == true_hash,
520 "%s hashes don't match: %s != %s" % (path, found_hash.hexexpected(), true_hash))
522 def test_findIndexHash(self):
523 """Tests finding the hash of a single index file."""
524 lastDefer = defer.Deferred()
526 idx_hash = os.popen('grep -A 3000 -E "^SHA1:" ' +
527 '/var/lib/apt/lists/' + self.releaseFile +
528 ' | grep -E " main/binary-i386/Packages.bz2$"'
529 ' | head -n 1 | cut -d\ -f 2').read().rstrip('\n')
530 idx_path = '/' + self.releaseFile[self.releaseFile.find('_dists_')+1:].replace('_','/')[:-7] + 'main/binary-i386/Packages.bz2'
532 d = self.client.findHash(idx_path)
533 d.addCallback(self.verifyHash, idx_path, idx_hash)
535 d.addBoth(lastDefer.callback)
538 def test_findPkgHash(self):
539 """Tests finding the hash of a single binary package."""
540 lastDefer = defer.Deferred()
542 pkg_hash = os.popen('grep -A 30 -E "^Package: dpkg$" ' +
543 '/var/lib/apt/lists/' + self.packagesFile +
544 ' | grep -E "^SHA1:" | head -n 1' +
545 ' | cut -d\ -f 2').read().rstrip('\n')
546 pkg_path = '/' + os.popen('grep -A 30 -E "^Package: dpkg$" ' +
547 '/var/lib/apt/lists/' + self.packagesFile +
548 ' | grep -E "^Filename:" | head -n 1' +
549 ' | cut -d\ -f 2').read().rstrip('\n')
551 d = self.client.findHash(pkg_path)
552 d.addCallback(self.verifyHash, pkg_path, pkg_hash)
554 d.addBoth(lastDefer.callback)
557 def test_findSrcHash(self):
558 """Tests finding the hash of a single source package."""
559 lastDefer = defer.Deferred()
561 src_dir = '/' + os.popen('grep -A 30 -E "^Package: dpkg$" ' +
562 '/var/lib/apt/lists/' + self.sourcesFile +
563 ' | grep -E "^Directory:" | head -n 1' +
564 ' | cut -d\ -f 2').read().rstrip('\n')
565 src_hashes = os.popen('grep -A 20 -E "^Package: dpkg$" ' +
566 '/var/lib/apt/lists/' + self.sourcesFile +
567 ' | grep -A 4 -E "^Files:" | grep -E "^ " ' +
568 ' | cut -d\ -f 2').read().split('\n')[:-1]
569 src_paths = os.popen('grep -A 20 -E "^Package: dpkg$" ' +
570 '/var/lib/apt/lists/' + self.sourcesFile +
571 ' | grep -A 4 -E "^Files:" | grep -E "^ " ' +
572 ' | cut -d\ -f 4').read().split('\n')[:-1]
574 i = choice(range(len(src_hashes)))
575 d = self.client.findHash(src_dir + '/' + src_paths[i])
576 d.addCallback(self.verifyHash, src_dir + '/' + src_paths[i], src_hashes[i])
578 d.addBoth(lastDefer.callback)
581 def test_multipleFindHash(self):
582 """Tests finding the hash of an index file, binary package, source package, and another index file."""
583 lastDefer = defer.Deferred()
585 # Lookup a Packages.bz2 file
586 idx_hash = os.popen('grep -A 3000 -E "^SHA1:" ' +
587 '/var/lib/apt/lists/' + self.releaseFile +
588 ' | grep -E " main/binary-i386/Packages.bz2$"'
589 ' | head -n 1 | cut -d\ -f 2').read().rstrip('\n')
590 idx_path = '/' + self.releaseFile[self.releaseFile.find('_dists_')+1:].replace('_','/')[:-7] + 'main/binary-i386/Packages.bz2'
592 d = self.client.findHash(idx_path)
593 d.addCallback(self.verifyHash, idx_path, idx_hash)
595 # Lookup the binary 'dpkg' package
596 pkg_hash = os.popen('grep -A 30 -E "^Package: dpkg$" ' +
597 '/var/lib/apt/lists/' + self.packagesFile +
598 ' | grep -E "^SHA1:" | head -n 1' +
599 ' | cut -d\ -f 2').read().rstrip('\n')
600 pkg_path = '/' + os.popen('grep -A 30 -E "^Package: dpkg$" ' +
601 '/var/lib/apt/lists/' + self.packagesFile +
602 ' | grep -E "^Filename:" | head -n 1' +
603 ' | cut -d\ -f 2').read().rstrip('\n')
605 d = self.client.findHash(pkg_path)
606 d.addCallback(self.verifyHash, pkg_path, pkg_hash)
608 # Lookup the source 'dpkg' package
609 src_dir = '/' + os.popen('grep -A 30 -E "^Package: dpkg$" ' +
610 '/var/lib/apt/lists/' + self.sourcesFile +
611 ' | grep -E "^Directory:" | head -n 1' +
612 ' | cut -d\ -f 2').read().rstrip('\n')
613 src_hashes = os.popen('grep -A 20 -E "^Package: dpkg$" ' +
614 '/var/lib/apt/lists/' + self.sourcesFile +
615 ' | grep -A 4 -E "^Files:" | grep -E "^ " ' +
616 ' | cut -d\ -f 2').read().split('\n')[:-1]
617 src_paths = os.popen('grep -A 20 -E "^Package: dpkg$" ' +
618 '/var/lib/apt/lists/' + self.sourcesFile +
619 ' | grep -A 4 -E "^Files:" | grep -E "^ " ' +
620 ' | cut -d\ -f 4').read().split('\n')[:-1]
622 for i in range(len(src_hashes)):
623 d = self.client.findHash(src_dir + '/' + src_paths[i])
624 d.addCallback(self.verifyHash, src_dir + '/' + src_paths[i], src_hashes[i])
626 # Lookup a Sources.bz2 file
627 idx_hash = os.popen('grep -A 3000 -E "^SHA1:" ' +
628 '/var/lib/apt/lists/' + self.releaseFile +
629 ' | grep -E " main/source/Sources.bz2$"'
630 ' | head -n 1 | cut -d\ -f 2').read().rstrip('\n')
631 idx_path = '/' + self.releaseFile[self.releaseFile.find('_dists_')+1:].replace('_','/')[:-7] + 'main/source/Sources.bz2'
633 d = self.client.findHash(idx_path)
634 d.addCallback(self.verifyHash, idx_path, idx_hash)
636 d.addBoth(lastDefer.callback)
640 for p in self.pending_calls:
643 self.pending_calls = []
644 self.client.cleanup()