2 # Copyright (C) 2002 Manuel Estrada Sainz <ranty@debian.org>
3 # Copyright (C) 2008 Cameron Dale <camrdale@gmail.com>
5 # This library is free software; you can redistribute it and/or
6 # modify it under the terms of version 2.1 of the GNU General Public
7 # License as published by the Free Software Foundation.
9 # This library is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 # General Public License for more details.
14 # You should have received a copy of the GNU General Public
15 # License along with this library; if not, write to the Free Software
16 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 """Manage a mirror's index files.
20 @type TRACKED_FILES: C{list} of C{string}
21 @var TRACKED_FILES: the file names of files that contain index information
24 # Disable the FutureWarning from the apt module
26 warnings.simplefilter("ignore", FutureWarning)
29 from random import choice
30 from shutil import rmtree
31 from copy import deepcopy
32 from UserDict import DictMixin
34 from twisted.internet import threads, defer, reactor
35 from twisted.python import log
36 from twisted.python.filepath import FilePath
37 from twisted.trial import unittest
39 import apt_pkg, apt_inst
40 from apt import OpProgress
41 from debian_bundle import deb822
43 from apt_p2p_conf import config
44 from Hash import HashObject
48 TRACKED_FILES = ['release', 'sources', 'packages']
50 class PackageFileList(DictMixin):
51 """Manages a list of index files belonging to a mirror.
53 @type cache_dir: L{twisted.python.filepath.FilePath}
54 @ivar cache_dir: the directory to use for storing all files
55 @type packages: C{shelve dictionary}
56 @ivar packages: the files tracked for this mirror
59 def __init__(self, cache_dir):
60 """Initialize the list by opening the dictionary."""
61 self.cache_dir = cache_dir
62 self.cache_dir.restat(False)
63 if not self.cache_dir.exists():
64 self.cache_dir.makedirs()
69 """Open the persistent dictionary of files for this mirror."""
70 if self.packages is None:
71 self.packages = shelve.open(self.cache_dir.child('packages.db').path)
74 """Close the persistent dictionary."""
75 if self.packages is not None:
78 def update_file(self, cache_path, file_path):
79 """Check if an updated file needs to be tracked.
81 Called from the mirror manager when files get updated so we can update our
82 fake lists and sources.list.
84 @type cache_path: C{string}
85 @param cache_path: the location of the file within the mirror
86 @type file_path: L{twisted.python.filepath.FilePath}
87 @param file_path: The location of the file in the file system
89 @return: whether the file is an index file
91 filename = cache_path.split('/')[-1]
92 if filename.lower() in TRACKED_FILES:
93 log.msg("Registering package file: "+cache_path)
94 self.packages[cache_path] = file_path
99 def check_files(self):
100 """Check all files in the database to remove any that don't exist."""
101 files = self.packages.keys()
103 self.packages[f].restat(False)
104 if not self.packages[f].exists():
105 log.msg("File in packages database has been deleted: "+f)
109 #{ Dictionary interface details
110 def __getitem__(self, key): return self.packages[key]
111 def __setitem__(self, key, item): self.packages[key] = item
112 def __delitem__(self, key): del self.packages[key]
113 def keys(self): return self.packages.keys()
116 """Answers queries about packages available from a mirror.
118 Uses the python-apt tools to parse and provide information about the
119 files that are available on a single mirror.
121 @ivar DEFAULT_APT_CONFIG: the default configuration parameters to use for apt
122 @ivar essential_dirs: directories that must be created for apt to work
123 @ivar essential_files: files that must be created for apt to work
124 @type cache_dir: L{twisted.python.filepath.FilePath}
125 @ivar cache_dir: the directory to use for storing all files
126 @ivar apt_config: the configuration parameters to use for apt
127 @type packages: L{PackageFileList}
128 @ivar packages: the persistent storage of tracked apt index files
129 @type loaded: C{boolean}
130 @ivar loaded: whether the apt cache is currently loaded
131 @type loading: L{twisted.internet.defer.Deferred}
132 @ivar loading: if the cache is currently being loaded, this will be
133 called when it is loaded, otherwise it is None
134 @type unload_later: L{twisted.internet.interfaces.IDelayedCall}
135 @ivar unload_later: the delayed call to unload the apt cache
136 @type indexrecords: C{dictionary}
137 @ivar indexrecords: the hashes of index files for the mirror, keys are
138 mirror directories, values are dictionaries with keys the path to the
139 index file in the mirror directory and values are dictionaries with
140 keys the hash type and values the hash
141 @type cache: C{apt_pkg.GetCache()}
142 @ivar cache: the apt cache of the mirror
143 @type records: C{apt_pkg.GetPkgRecords()}
144 @ivar records: the apt package records for all binary packages in a mirror
145 @type srcrecords: C{apt_pkg.GetPkgSrcRecords}
146 @ivar srcrecords: the apt package records for all source packages in a mirror
149 DEFAULT_APT_CONFIG = {
151 #'APT::Architecture' : 'i386', # Commented so the machine's config will set this
152 #'APT::Default-Release' : 'unstable',
154 'Dir::State' : 'apt/', # var/lib/apt/
155 'Dir::State::Lists': 'lists/', # lists/
156 #'Dir::State::cdroms' : 'cdroms.list',
157 'Dir::State::userstatus' : 'status.user',
158 'Dir::State::status': 'dpkg/status', # '/var/lib/dpkg/status'
159 'Dir::Cache' : '.apt/cache/', # var/cache/apt/
160 #'Dir::Cache::archives' : 'archives/',
161 'Dir::Cache::srcpkgcache' : 'srcpkgcache.bin',
162 'Dir::Cache::pkgcache' : 'pkgcache.bin',
163 'Dir::Etc' : 'apt/etc/', # etc/apt/
164 'Dir::Etc::sourcelist' : 'sources.list',
165 'Dir::Etc::vendorlist' : 'vendors.list',
166 'Dir::Etc::vendorparts' : 'vendors.list.d',
167 #'Dir::Etc::main' : 'apt.conf',
168 #'Dir::Etc::parts' : 'apt.conf.d',
169 #'Dir::Etc::preferences' : 'preferences',
171 #'Dir::Bin::methods' : '', #'/usr/lib/apt/methods'
172 'Dir::Bin::dpkg' : '/usr/bin/dpkg',
174 #'DPkg::Pre-Install-Pkgs' : '',
176 #'DPkg::Tools::Options' : '',
177 #'DPkg::Tools::Options::/usr/bin/apt-listchanges' : '',
178 #'DPkg::Tools::Options::/usr/bin/apt-listchanges::Version' : '2',
179 #'DPkg::Post-Invoke' : '',
181 essential_dirs = ('apt', 'apt/cache', 'apt/dpkg', 'apt/etc', 'apt/lists',
183 essential_files = ('apt/dpkg/status', 'apt/etc/sources.list',)
185 def __init__(self, cache_dir):
186 """Construct a new packages manager.
188 @param cache_dir: directory to use to store files for this mirror
190 self.cache_dir = cache_dir
191 self.apt_config = deepcopy(self.DEFAULT_APT_CONFIG)
193 # Create the necessary files and directories for apt
194 for dir in self.essential_dirs:
195 path = self.cache_dir.preauthChild(dir)
196 if not path.exists():
198 for file in self.essential_files:
199 path = self.cache_dir.preauthChild(file)
200 if not path.exists():
203 self.apt_config['Dir'] = self.cache_dir.path
204 self.apt_config['Dir::State::status'] = self.cache_dir.preauthChild(self.apt_config['Dir::State']).preauthChild(self.apt_config['Dir::State::status']).path
205 self.packages = PackageFileList(cache_dir)
208 self.unload_later = None
213 def addRelease(self, cache_path, file_path):
214 """Add a Release file's info to the list of index files.
216 Dirty hack until python-apt supports apt-pkg/indexrecords.h
219 self.indexrecords[cache_path] = {}
221 read_packages = False
222 f = file_path.open('r')
224 # Use python-debian routines to parse the file for hashes
225 rel = deb822.Release(f, fields = ['MD5Sum', 'SHA1', 'SHA256'])
226 for hash_type in rel:
227 for file in rel[hash_type]:
228 self.indexrecords[cache_path].setdefault(file['name'], {})[hash_type.upper()] = (file[hash_type], file['size'])
232 def file_updated(self, cache_path, file_path):
233 """A file in the mirror has changed or been added.
235 If this affects us, unload our apt database.
236 @see: L{PackageFileList.update_file}
238 if self.packages.update_file(cache_path, file_path):
242 """Make sure the package cache is initialized and loaded."""
243 # Reset the pending unload call
244 if self.unload_later and self.unload_later.active():
245 self.unload_later.reset(config.gettime('DEFAULT', 'UNLOAD_PACKAGES_CACHE'))
247 self.unload_later = reactor.callLater(config.gettime('DEFAULT', 'UNLOAD_PACKAGES_CACHE'), self.unload)
249 # Check if it's already loaded
251 return defer.succeed(True)
253 # Make sure it's not already being loaded
254 if self.loading is None:
255 log.msg('Loading the packages cache')
256 self.loading = threads.deferToThread(self._load)
257 self.loading.addCallback(self.doneLoading)
260 def doneLoading(self, loadResult):
261 """Cache is loaded."""
263 # Must pass on the result for the next callback
267 """Regenerates the fake configuration and loads the packages caches."""
268 if self.loaded: return True
270 # Modify the default configuration to create the fake one.
272 self.cache_dir.preauthChild(self.apt_config['Dir::State']
273 ).preauthChild(self.apt_config['Dir::State::Lists']).remove()
274 self.cache_dir.preauthChild(self.apt_config['Dir::State']
275 ).preauthChild(self.apt_config['Dir::State::Lists']
276 ).child('partial').makedirs()
277 sources_file = self.cache_dir.preauthChild(self.apt_config['Dir::Etc']
278 ).preauthChild(self.apt_config['Dir::Etc::sourcelist'])
279 sources = sources_file.open('w')
281 deb_src_added = False
282 self.packages.check_files()
283 self.indexrecords = {}
285 # Create an entry in sources.list for each needed index file
286 for f in self.packages:
287 # we should probably clear old entries from self.packages and
288 # take into account the recorded mtime as optimization
289 file = self.packages[f]
290 if f.split('/')[-1] == "Release":
291 self.addRelease(f, file)
292 fake_uri='http://apt-p2p'+f
293 fake_dirname = '/'.join(fake_uri.split('/')[:-1])
294 if f.endswith('Sources'):
296 source_line='deb-src '+fake_dirname+'/ /'
298 source_line='deb '+fake_dirname+'/ /'
299 listpath = self.cache_dir.preauthChild(self.apt_config['Dir::State']
300 ).preauthChild(self.apt_config['Dir::State::Lists']
301 ).child(apt_pkg.URItoFileName(fake_uri))
302 sources.write(source_line+'\n')
303 log.msg("Sources line: " + source_line)
304 sources_count = sources_count + 1
306 if listpath.exists():
307 #we should empty the directory instead
309 os.symlink(file.path, listpath.path)
312 if sources_count == 0:
313 log.msg("No Packages files available for %s backend"%(self.cache_dir.path))
316 log.msg("Loading Packages database for "+self.cache_dir.path)
317 for key, value in self.apt_config.items():
318 apt_pkg.Config[key] = value
320 self.cache = apt_pkg.GetCache(OpProgress())
321 self.records = apt_pkg.GetPkgRecords(self.cache)
323 self.srcrecords = apt_pkg.GetPkgSrcRecords()
325 self.srcrecords = None
331 """Tries to make the packages server quit."""
332 if self.unload_later and self.unload_later.active():
333 self.unload_later.cancel()
334 self.unload_later = None
336 log.msg('Unloading the packages cache')
337 # This should save memory
341 del self.indexrecords
345 """Cleanup and close any loaded caches."""
347 if self.unload_later and self.unload_later.active():
348 self.unload_later.cancel()
349 self.packages.close()
351 def findHash(self, path):
352 """Find the hash for a given path in this mirror.
354 @type path: C{string}
355 @param path: the path within the mirror of the file to lookup
356 @rtype: L{twisted.internet.defer.Deferred}
357 @return: a deferred so it can make sure the cache is loaded first
361 deferLoad = self.load()
362 deferLoad.addCallback(self._findHash, path, d)
363 deferLoad.addErrback(self._findHash_error, path, d)
367 def _findHash_error(self, failure, path, d):
368 """An error occurred, return an empty hash."""
369 log.msg('An error occurred while looking up a hash for: %s' % path)
371 d.callback(HashObject())
374 def _findHash(self, loadResult, path, d):
375 """Search the records for the hash of a path.
377 @type loadResult: C{boolean}
378 @param loadResult: whether apt's cache was successfully loaded
379 @type path: C{string}
380 @param path: the path within the mirror of the file to lookup
381 @type d: L{twisted.internet.defer.Deferred}
382 @param d: the deferred to callback with the result
385 d.callback(HashObject())
390 # First look for the path in the cache of index files
391 for release in self.indexrecords:
392 if path.startswith(release[:-7]):
393 for indexFile in self.indexrecords[release]:
394 if release[:-7] + indexFile == path:
395 h.setFromIndexRecord(self.indexrecords[release][indexFile])
399 package = path.split('/')[-1].split('_')[0]
401 # Check the binary packages
403 for version in self.cache[package].VersionList:
405 for verFile in version.FileList:
406 if self.records.Lookup(verFile):
407 if '/' + self.records.FileName == path:
408 h.setFromPkgRecord(self.records, size)
414 # Check the source packages' files
416 self.srcrecords.Restart()
417 if self.srcrecords.Lookup(package):
418 for f in self.srcrecords.Files:
419 if path == '/' + f[2]:
420 h.setFromSrcRecord(f)
426 # Have to pass the returned loadResult on in case other calls to this function are pending.
429 class TestAptPackages(unittest.TestCase):
430 """Unit tests for the AptPackages cache."""
440 """Initializes the cache with files found in the traditional apt location."""
441 self.client = AptPackages(FilePath('/tmp/.apt-p2p'))
443 # Find the largest index files that are for 'main'
444 self.packagesFile = os.popen('ls -Sr /var/lib/apt/lists/ | grep -E "_main_.*Packages$" | tail -n 1').read().rstrip('\n')
445 self.sourcesFile = os.popen('ls -Sr /var/lib/apt/lists/ | grep -E "_main_.*Sources$" | tail -n 1').read().rstrip('\n')
447 # Find the Release file corresponding to the found Packages file
448 for f in os.walk('/var/lib/apt/lists').next()[2]:
449 if f[-7:] == "Release" and self.packagesFile.startswith(f[:-7]):
453 # Add all the found files to the PackageFileList
454 self.client.file_updated(self.releaseFile[self.releaseFile.find('_dists_'):].replace('_','/'),
455 FilePath('/var/lib/apt/lists/' + self.releaseFile))
456 self.client.file_updated(self.packagesFile[self.packagesFile.find('_dists_'):].replace('_','/'),
457 FilePath('/var/lib/apt/lists/' + self.packagesFile))
458 self.client.file_updated(self.sourcesFile[self.sourcesFile.find('_dists_'):].replace('_','/'),
459 FilePath('/var/lib/apt/lists/' + self.sourcesFile))
461 def test_pkg_hash(self):
462 """Tests loading the binary package records cache."""
465 self.client.records.Lookup(self.client.cache['dpkg'].VersionList[0].FileList[0])
467 pkg_hash = os.popen('grep -A 30 -E "^Package: dpkg$" ' +
468 '/var/lib/apt/lists/' + self.packagesFile +
469 ' | grep -E "^SHA1:" | head -n 1' +
470 ' | cut -d\ -f 2').read().rstrip('\n')
472 self.failUnless(self.client.records.SHA1Hash == pkg_hash,
473 "Hashes don't match: %s != %s" % (self.client.records.SHA1Hash, pkg_hash))
475 def test_src_hash(self):
476 """Tests loading the source package records cache."""
479 self.client.srcrecords.Lookup('dpkg')
481 src_hashes = os.popen('grep -A 20 -E "^Package: dpkg$" ' +
482 '/var/lib/apt/lists/' + self.sourcesFile +
483 ' | grep -A 4 -E "^Files:" | grep -E "^ " ' +
484 ' | cut -d\ -f 2').read().split('\n')[:-1]
486 for f in self.client.srcrecords.Files:
487 self.failUnless(f[0] in src_hashes, "Couldn't find %s in: %r" % (f[0], src_hashes))
489 def test_index_hash(self):
490 """Tests loading the cache of index file information."""
493 indexhash = self.client.indexrecords[self.releaseFile[self.releaseFile.find('_dists_'):].replace('_','/')]['main/binary-i386/Packages.bz2']['SHA1'][0]
495 idx_hash = os.popen('grep -A 3000 -E "^SHA1:" ' +
496 '/var/lib/apt/lists/' + self.releaseFile +
497 ' | grep -E " main/binary-i386/Packages.bz2$"'
498 ' | head -n 1 | cut -d\ -f 2').read().rstrip('\n')
500 self.failUnless(indexhash == idx_hash, "Hashes don't match: %s != %s" % (indexhash, idx_hash))
502 def verifyHash(self, found_hash, path, true_hash):
503 self.failUnless(found_hash.hexexpected() == true_hash,
504 "%s hashes don't match: %s != %s" % (path, found_hash.hexexpected(), true_hash))
506 def test_findIndexHash(self):
507 """Tests finding the hash of a single index file."""
508 lastDefer = defer.Deferred()
510 idx_hash = os.popen('grep -A 3000 -E "^SHA1:" ' +
511 '/var/lib/apt/lists/' + self.releaseFile +
512 ' | grep -E " main/binary-i386/Packages.bz2$"'
513 ' | head -n 1 | cut -d\ -f 2').read().rstrip('\n')
514 idx_path = '/' + self.releaseFile[self.releaseFile.find('_dists_')+1:].replace('_','/')[:-7] + 'main/binary-i386/Packages.bz2'
516 d = self.client.findHash(idx_path)
517 d.addCallback(self.verifyHash, idx_path, idx_hash)
519 d.addBoth(lastDefer.callback)
522 def test_findPkgHash(self):
523 """Tests finding the hash of a single binary package."""
524 lastDefer = defer.Deferred()
526 pkg_hash = os.popen('grep -A 30 -E "^Package: dpkg$" ' +
527 '/var/lib/apt/lists/' + self.packagesFile +
528 ' | grep -E "^SHA1:" | head -n 1' +
529 ' | cut -d\ -f 2').read().rstrip('\n')
530 pkg_path = '/' + os.popen('grep -A 30 -E "^Package: dpkg$" ' +
531 '/var/lib/apt/lists/' + self.packagesFile +
532 ' | grep -E "^Filename:" | head -n 1' +
533 ' | cut -d\ -f 2').read().rstrip('\n')
535 d = self.client.findHash(pkg_path)
536 d.addCallback(self.verifyHash, pkg_path, pkg_hash)
538 d.addBoth(lastDefer.callback)
541 def test_findSrcHash(self):
542 """Tests finding the hash of a single source package."""
543 lastDefer = defer.Deferred()
545 src_dir = '/' + os.popen('grep -A 30 -E "^Package: dpkg$" ' +
546 '/var/lib/apt/lists/' + self.sourcesFile +
547 ' | grep -E "^Directory:" | head -n 1' +
548 ' | cut -d\ -f 2').read().rstrip('\n')
549 src_hashes = os.popen('grep -A 20 -E "^Package: dpkg$" ' +
550 '/var/lib/apt/lists/' + self.sourcesFile +
551 ' | grep -A 4 -E "^Files:" | grep -E "^ " ' +
552 ' | cut -d\ -f 2').read().split('\n')[:-1]
553 src_paths = os.popen('grep -A 20 -E "^Package: dpkg$" ' +
554 '/var/lib/apt/lists/' + self.sourcesFile +
555 ' | grep -A 4 -E "^Files:" | grep -E "^ " ' +
556 ' | cut -d\ -f 4').read().split('\n')[:-1]
558 i = choice(range(len(src_hashes)))
559 d = self.client.findHash(src_dir + '/' + src_paths[i])
560 d.addCallback(self.verifyHash, src_dir + '/' + src_paths[i], src_hashes[i])
562 d.addBoth(lastDefer.callback)
565 def test_multipleFindHash(self):
566 """Tests finding the hash of an index file, binary package, source package, and another index file."""
567 lastDefer = defer.Deferred()
569 # Lookup a Packages.bz2 file
570 idx_hash = os.popen('grep -A 3000 -E "^SHA1:" ' +
571 '/var/lib/apt/lists/' + self.releaseFile +
572 ' | grep -E " main/binary-i386/Packages.bz2$"'
573 ' | head -n 1 | cut -d\ -f 2').read().rstrip('\n')
574 idx_path = '/' + self.releaseFile[self.releaseFile.find('_dists_')+1:].replace('_','/')[:-7] + 'main/binary-i386/Packages.bz2'
576 d = self.client.findHash(idx_path)
577 d.addCallback(self.verifyHash, idx_path, idx_hash)
579 # Lookup the binary 'dpkg' package
580 pkg_hash = os.popen('grep -A 30 -E "^Package: dpkg$" ' +
581 '/var/lib/apt/lists/' + self.packagesFile +
582 ' | grep -E "^SHA1:" | head -n 1' +
583 ' | cut -d\ -f 2').read().rstrip('\n')
584 pkg_path = '/' + os.popen('grep -A 30 -E "^Package: dpkg$" ' +
585 '/var/lib/apt/lists/' + self.packagesFile +
586 ' | grep -E "^Filename:" | head -n 1' +
587 ' | cut -d\ -f 2').read().rstrip('\n')
589 d = self.client.findHash(pkg_path)
590 d.addCallback(self.verifyHash, pkg_path, pkg_hash)
592 # Lookup the source 'dpkg' package
593 src_dir = '/' + os.popen('grep -A 30 -E "^Package: dpkg$" ' +
594 '/var/lib/apt/lists/' + self.sourcesFile +
595 ' | grep -E "^Directory:" | head -n 1' +
596 ' | cut -d\ -f 2').read().rstrip('\n')
597 src_hashes = os.popen('grep -A 20 -E "^Package: dpkg$" ' +
598 '/var/lib/apt/lists/' + self.sourcesFile +
599 ' | grep -A 4 -E "^Files:" | grep -E "^ " ' +
600 ' | cut -d\ -f 2').read().split('\n')[:-1]
601 src_paths = os.popen('grep -A 20 -E "^Package: dpkg$" ' +
602 '/var/lib/apt/lists/' + self.sourcesFile +
603 ' | grep -A 4 -E "^Files:" | grep -E "^ " ' +
604 ' | cut -d\ -f 4').read().split('\n')[:-1]
606 for i in range(len(src_hashes)):
607 d = self.client.findHash(src_dir + '/' + src_paths[i])
608 d.addCallback(self.verifyHash, src_dir + '/' + src_paths[i], src_hashes[i])
610 # Lookup a Sources.bz2 file
611 idx_hash = os.popen('grep -A 3000 -E "^SHA1:" ' +
612 '/var/lib/apt/lists/' + self.releaseFile +
613 ' | grep -E " main/source/Sources.bz2$"'
614 ' | head -n 1 | cut -d\ -f 2').read().rstrip('\n')
615 idx_path = '/' + self.releaseFile[self.releaseFile.find('_dists_')+1:].replace('_','/')[:-7] + 'main/source/Sources.bz2'
617 d = self.client.findHash(idx_path)
618 d.addCallback(self.verifyHash, idx_path, idx_hash)
620 d.addBoth(lastDefer.callback)
624 for p in self.pending_calls:
627 self.pending_calls = []
628 self.client.cleanup()