2 # Copyright (C) 2002 Manuel Estrada Sainz <ranty@debian.org>
3 # Copyright (C) 2008 Cameron Dale <camrdale@gmail.com>
5 # This library is free software; you can redistribute it and/or
6 # modify it under the terms of version 2.1 of the GNU General Public
7 # License as published by the Free Software Foundation.
9 # This library is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 # General Public License for more details.
14 # You should have received a copy of the GNU General Public
15 # License along with this library; if not, write to the Free Software
16 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 """Manage a mirror's index files.
20 @type TRACKED_FILES: C{list} of C{string}
21 @var TRACKED_FILES: the file names of files that contain index information
24 # Disable the FutureWarning from the apt module
26 warnings.simplefilter("ignore", FutureWarning)
29 from random import choice
30 from shutil import rmtree
31 from copy import deepcopy
32 from UserDict import DictMixin
34 from twisted.internet import threads, defer, reactor
35 from twisted.python import log
36 from twisted.python.filepath import FilePath
37 from twisted.trial import unittest
39 import apt_pkg, apt_inst
40 from apt import OpProgress
41 from debian_bundle import deb822
43 from apt_p2p_conf import config
44 from Hash import HashObject
48 TRACKED_FILES = ['release', 'sources', 'packages']
50 class PackageFileList(DictMixin):
51 """Manages a list of index files belonging to a mirror.
53 @type cache_dir: L{twisted.python.filepath.FilePath}
54 @ivar cache_dir: the directory to use for storing all files
55 @type packages: C{shelve dictionary}
56 @ivar packages: the files tracked for this mirror
59 def __init__(self, cache_dir):
60 """Initialize the list by opening the dictionary."""
61 self.cache_dir = cache_dir
62 self.cache_dir.restat(False)
63 if not self.cache_dir.exists():
64 self.cache_dir.makedirs()
69 """Open the persistent dictionary of files for this mirror."""
70 if self.packages is None:
71 self.packages = shelve.open(self.cache_dir.child('packages.db').path)
74 """Close the persistent dictionary."""
75 if self.packages is not None:
78 def update_file(self, cache_path, file_path):
79 """Check if an updated file needs to be tracked.
81 Called from the mirror manager when files get updated so we can update our
82 fake lists and sources.list.
84 @type cache_path: C{string}
85 @param cache_path: the location of the file within the mirror
86 @type file_path: L{twisted.python.filepath.FilePath}
87 @param file_path: The location of the file in the file system
89 @return: whether the file is an index file
91 filename = cache_path.split('/')[-1]
92 if filename.lower() in TRACKED_FILES:
93 log.msg("Registering package file: "+cache_path)
94 self.packages[cache_path] = file_path
98 def check_files(self):
99 """Check all files in the database to remove any that don't exist."""
100 files = self.packages.keys()
102 self.packages[f].restat(False)
103 if not self.packages[f].exists():
104 log.msg("File in packages database has been deleted: "+f)
107 #{ Dictionary interface details
108 def __getitem__(self, key): return self.packages[key]
109 def __setitem__(self, key, item): self.packages[key] = item
110 def __delitem__(self, key): del self.packages[key]
111 def keys(self): return self.packages.keys()
114 """Answers queries about packages available from a mirror.
116 Uses the python-apt tools to parse and provide information about the
117 files that are available on a single mirror.
119 @ivar DEFAULT_APT_CONFIG: the default configuration parameters to use for apt
120 @ivar essential_dirs: directories that must be created for apt to work
121 @ivar essential_files: files that must be created for apt to work
122 @type cache_dir: L{twisted.python.filepath.FilePath}
123 @ivar cache_dir: the directory to use for storing all files
124 @ivar apt_config: the configuration parameters to use for apt
125 @type packages: L{PackageFileList}
126 @ivar packages: the persistent storage of tracked apt index files
127 @type loaded: C{boolean}
128 @ivar loaded: whether the apt cache is currently loaded
129 @type loading: L{twisted.internet.defer.Deferred}
130 @ivar loading: if the cache is currently being loaded, this will be
131 called when it is loaded, otherwise it is None
132 @type unload_later: L{twisted.internet.interfaces.IDelayedCall}
133 @ivar unload_later: the delayed call to unload the apt cache
134 @type indexrecords: C{dictionary}
135 @ivar indexrecords: the hashes of index files for the mirror, keys are
136 mirror directories, values are dictionaries with keys the path to the
137 index file in the mirror directory and values are dictionaries with
138 keys the hash type and values the hash
139 @type cache: C{apt_pkg.GetCache()}
140 @ivar cache: the apt cache of the mirror
141 @type records: C{apt_pkg.GetPkgRecords()}
142 @ivar records: the apt package records for all binary packages in a mirror
143 @type srcrecords: C{apt_pkg.GetPkgSrcRecords}
144 @ivar srcrecords: the apt package records for all source packages in a mirror
147 DEFAULT_APT_CONFIG = {
149 #'APT::Architecture' : 'i386', # Commented so the machine's config will set this
150 #'APT::Default-Release' : 'unstable',
152 'Dir::State' : 'apt/', # var/lib/apt/
153 'Dir::State::Lists': 'lists/', # lists/
154 #'Dir::State::cdroms' : 'cdroms.list',
155 'Dir::State::userstatus' : 'status.user',
156 'Dir::State::status': 'dpkg/status', # '/var/lib/dpkg/status'
157 'Dir::Cache' : '.apt/cache/', # var/cache/apt/
158 #'Dir::Cache::archives' : 'archives/',
159 'Dir::Cache::srcpkgcache' : 'srcpkgcache.bin',
160 'Dir::Cache::pkgcache' : 'pkgcache.bin',
161 'Dir::Etc' : 'apt/etc/', # etc/apt/
162 'Dir::Etc::sourcelist' : 'sources.list',
163 'Dir::Etc::vendorlist' : 'vendors.list',
164 'Dir::Etc::vendorparts' : 'vendors.list.d',
165 #'Dir::Etc::main' : 'apt.conf',
166 #'Dir::Etc::parts' : 'apt.conf.d',
167 #'Dir::Etc::preferences' : 'preferences',
169 #'Dir::Bin::methods' : '', #'/usr/lib/apt/methods'
170 'Dir::Bin::dpkg' : '/usr/bin/dpkg',
172 #'DPkg::Pre-Install-Pkgs' : '',
174 #'DPkg::Tools::Options' : '',
175 #'DPkg::Tools::Options::/usr/bin/apt-listchanges' : '',
176 #'DPkg::Tools::Options::/usr/bin/apt-listchanges::Version' : '2',
177 #'DPkg::Post-Invoke' : '',
179 essential_dirs = ('apt', 'apt/cache', 'apt/dpkg', 'apt/etc', 'apt/lists',
181 essential_files = ('apt/dpkg/status', 'apt/etc/sources.list',)
183 def __init__(self, cache_dir):
184 """Construct a new packages manager.
186 @param cache_dir: directory to use to store files for this mirror
188 self.cache_dir = cache_dir
189 self.apt_config = deepcopy(self.DEFAULT_APT_CONFIG)
191 # Create the necessary files and directories for apt
192 for dir in self.essential_dirs:
193 path = self.cache_dir.preauthChild(dir)
194 if not path.exists():
196 for file in self.essential_files:
197 path = self.cache_dir.preauthChild(file)
198 if not path.exists():
201 self.apt_config['Dir'] = self.cache_dir.path
202 self.apt_config['Dir::State::status'] = self.cache_dir.preauthChild(self.apt_config['Dir::State']).preauthChild(self.apt_config['Dir::State::status']).path
203 self.packages = PackageFileList(cache_dir)
206 self.unload_later = None
211 def addRelease(self, cache_path, file_path):
212 """Add a Release file's info to the list of index files.
214 Dirty hack until python-apt supports apt-pkg/indexrecords.h
217 self.indexrecords[cache_path] = {}
219 read_packages = False
220 f = file_path.open('r')
222 # Use python-debian routines to parse the file for hashes
223 rel = deb822.Release(f, fields = ['MD5Sum', 'SHA1', 'SHA256'])
224 for hash_type in rel:
225 for file in rel[hash_type]:
226 self.indexrecords[cache_path].setdefault(file['name'], {})[hash_type.upper()] = (file[hash_type], file['size'])
230 def file_updated(self, cache_path, file_path):
231 """A file in the mirror has changed or been added.
233 If this affects us, unload our apt database.
234 @see: L{PackageFileList.update_file}
236 if self.packages.update_file(cache_path, file_path):
240 """Make sure the package cache is initialized and loaded."""
241 # Reset the pending unload call
242 if self.unload_later and self.unload_later.active():
243 self.unload_later.reset(config.gettime('DEFAULT', 'UNLOAD_PACKAGES_CACHE'))
245 self.unload_later = reactor.callLater(config.gettime('DEFAULT', 'UNLOAD_PACKAGES_CACHE'), self.unload)
247 # Check if it's already loaded
249 return defer.succeed(True)
251 # Make sure it's not already being loaded
252 if self.loading is None:
253 log.msg('Loading the packages cache')
254 self.loading = threads.deferToThread(self._load)
255 self.loading.addCallback(self.doneLoading)
258 def doneLoading(self, loadResult):
259 """Cache is loaded."""
261 # Must pass on the result for the next callback
265 """Regenerates the fake configuration and loads the packages caches."""
266 if self.loaded: return True
268 # Modify the default configuration to create the fake one.
270 self.cache_dir.preauthChild(self.apt_config['Dir::State']
271 ).preauthChild(self.apt_config['Dir::State::Lists']).remove()
272 self.cache_dir.preauthChild(self.apt_config['Dir::State']
273 ).preauthChild(self.apt_config['Dir::State::Lists']
274 ).child('partial').makedirs()
275 sources_file = self.cache_dir.preauthChild(self.apt_config['Dir::Etc']
276 ).preauthChild(self.apt_config['Dir::Etc::sourcelist'])
277 sources = sources_file.open('w')
279 deb_src_added = False
280 self.packages.check_files()
281 self.indexrecords = {}
283 # Create an entry in sources.list for each needed index file
284 for f in self.packages:
285 # we should probably clear old entries from self.packages and
286 # take into account the recorded mtime as optimization
287 file = self.packages[f]
288 if f.split('/')[-1] == "Release":
289 self.addRelease(f, file)
290 fake_uri='http://apt-p2p'+f
291 fake_dirname = '/'.join(fake_uri.split('/')[:-1])
292 if f.endswith('Sources'):
294 source_line='deb-src '+fake_dirname+'/ /'
296 source_line='deb '+fake_dirname+'/ /'
297 listpath = self.cache_dir.preauthChild(self.apt_config['Dir::State']
298 ).preauthChild(self.apt_config['Dir::State::Lists']
299 ).child(apt_pkg.URItoFileName(fake_uri))
300 sources.write(source_line+'\n')
301 log.msg("Sources line: " + source_line)
302 sources_count = sources_count + 1
304 if listpath.exists():
305 #we should empty the directory instead
307 os.symlink(file.path, listpath.path)
310 if sources_count == 0:
311 log.msg("No Packages files available for %s backend"%(self.cache_dir.path))
314 log.msg("Loading Packages database for "+self.cache_dir.path)
315 for key, value in self.apt_config.items():
316 apt_pkg.Config[key] = value
318 self.cache = apt_pkg.GetCache(OpProgress())
319 self.records = apt_pkg.GetPkgRecords(self.cache)
321 self.srcrecords = apt_pkg.GetPkgSrcRecords()
323 self.srcrecords = None
329 """Tries to make the packages server quit."""
330 if self.unload_later and self.unload_later.active():
331 self.unload_later.cancel()
332 self.unload_later = None
334 log.msg('Unloading the packages cache')
335 # This should save memory
339 del self.indexrecords
343 """Cleanup and close any loaded caches."""
345 if self.unload_later and self.unload_later.active():
346 self.unload_later.cancel()
347 self.packages.close()
349 def findHash(self, path):
350 """Find the hash for a given path in this mirror.
352 @type path: C{string}
353 @param path: the path within the mirror of the file to lookup
354 @rtype: L{twisted.internet.defer.Deferred}
355 @return: a deferred so it can make sure the cache is loaded first
359 deferLoad = self.load()
360 deferLoad.addCallback(self._findHash, path, d)
361 deferLoad.addErrback(self._findHash_error, path, d)
365 def _findHash_error(self, failure, path, d):
366 """An error occurred, return an empty hash."""
367 log.msg('An error occurred while looking up a hash for: %s' % path)
369 d.callback(HashObject())
372 def _findHash(self, loadResult, path, d):
373 """Search the records for the hash of a path.
375 @type loadResult: C{boolean}
376 @param loadResult: whether apt's cache was successfully loaded
377 @type path: C{string}
378 @param path: the path within the mirror of the file to lookup
379 @type d: L{twisted.internet.defer.Deferred}
380 @param d: the deferred to callback with the result
383 d.callback(HashObject())
388 # First look for the path in the cache of index files
389 for release in self.indexrecords:
390 if path.startswith(release[:-7]):
391 for indexFile in self.indexrecords[release]:
392 if release[:-7] + indexFile == path:
393 h.setFromIndexRecord(self.indexrecords[release][indexFile])
397 package = path.split('/')[-1].split('_')[0]
399 # Check the binary packages
401 for version in self.cache[package].VersionList:
403 for verFile in version.FileList:
404 if self.records.Lookup(verFile):
405 if '/' + self.records.FileName == path:
406 h.setFromPkgRecord(self.records, size)
412 # Check the source packages' files
414 self.srcrecords.Restart()
415 if self.srcrecords.Lookup(package):
416 for f in self.srcrecords.Files:
417 if path == '/' + f[2]:
418 h.setFromSrcRecord(f)
424 # Have to pass the returned loadResult on in case other calls to this function are pending.
427 class TestAptPackages(unittest.TestCase):
428 """Unit tests for the AptPackages cache."""
438 """Initializes the cache with files found in the traditional apt location."""
439 self.client = AptPackages(FilePath('/tmp/.apt-p2p'))
441 # Find the largest index files that are for 'main'
442 self.packagesFile = os.popen('ls -Sr /var/lib/apt/lists/ | grep -E "_main_.*Packages$" | tail -n 1').read().rstrip('\n')
443 self.sourcesFile = os.popen('ls -Sr /var/lib/apt/lists/ | grep -E "_main_.*Sources$" | tail -n 1').read().rstrip('\n')
445 # Find the Release file corresponding to the found Packages file
446 for f in os.walk('/var/lib/apt/lists').next()[2]:
447 if f[-7:] == "Release" and self.packagesFile.startswith(f[:-7]):
451 # Add all the found files to the PackageFileList
452 self.client.file_updated(self.releaseFile[self.releaseFile.find('_dists_'):].replace('_','/'),
453 FilePath('/var/lib/apt/lists/' + self.releaseFile))
454 self.client.file_updated(self.packagesFile[self.packagesFile.find('_dists_'):].replace('_','/'),
455 FilePath('/var/lib/apt/lists/' + self.packagesFile))
456 self.client.file_updated(self.sourcesFile[self.sourcesFile.find('_dists_'):].replace('_','/'),
457 FilePath('/var/lib/apt/lists/' + self.sourcesFile))
459 def test_pkg_hash(self):
460 """Tests loading the binary package records cache."""
463 self.client.records.Lookup(self.client.cache['dpkg'].VersionList[0].FileList[0])
465 pkg_hash = os.popen('grep -A 30 -E "^Package: dpkg$" ' +
466 '/var/lib/apt/lists/' + self.packagesFile +
467 ' | grep -E "^SHA1:" | head -n 1' +
468 ' | cut -d\ -f 2').read().rstrip('\n')
470 self.failUnless(self.client.records.SHA1Hash == pkg_hash,
471 "Hashes don't match: %s != %s" % (self.client.records.SHA1Hash, pkg_hash))
473 def test_src_hash(self):
474 """Tests loading the source package records cache."""
477 self.client.srcrecords.Lookup('dpkg')
479 src_hashes = os.popen('grep -A 20 -E "^Package: dpkg$" ' +
480 '/var/lib/apt/lists/' + self.sourcesFile +
481 ' | grep -A 4 -E "^Files:" | grep -E "^ " ' +
482 ' | cut -d\ -f 2').read().split('\n')[:-1]
484 for f in self.client.srcrecords.Files:
485 self.failUnless(f[0] in src_hashes, "Couldn't find %s in: %r" % (f[0], src_hashes))
487 def test_index_hash(self):
488 """Tests loading the cache of index file information."""
491 indexhash = self.client.indexrecords[self.releaseFile[self.releaseFile.find('_dists_'):].replace('_','/')]['main/binary-i386/Packages.bz2']['SHA1'][0]
493 idx_hash = os.popen('grep -A 3000 -E "^SHA1:" ' +
494 '/var/lib/apt/lists/' + self.releaseFile +
495 ' | grep -E " main/binary-i386/Packages.bz2$"'
496 ' | head -n 1 | cut -d\ -f 2').read().rstrip('\n')
498 self.failUnless(indexhash == idx_hash, "Hashes don't match: %s != %s" % (indexhash, idx_hash))
500 def verifyHash(self, found_hash, path, true_hash):
501 self.failUnless(found_hash.hexexpected() == true_hash,
502 "%s hashes don't match: %s != %s" % (path, found_hash.hexexpected(), true_hash))
504 def test_findIndexHash(self):
505 """Tests finding the hash of a single index file."""
506 lastDefer = defer.Deferred()
508 idx_hash = os.popen('grep -A 3000 -E "^SHA1:" ' +
509 '/var/lib/apt/lists/' + self.releaseFile +
510 ' | grep -E " main/binary-i386/Packages.bz2$"'
511 ' | head -n 1 | cut -d\ -f 2').read().rstrip('\n')
512 idx_path = '/' + self.releaseFile[self.releaseFile.find('_dists_')+1:].replace('_','/')[:-7] + 'main/binary-i386/Packages.bz2'
514 d = self.client.findHash(idx_path)
515 d.addCallback(self.verifyHash, idx_path, idx_hash)
517 d.addBoth(lastDefer.callback)
520 def test_findPkgHash(self):
521 """Tests finding the hash of a single binary package."""
522 lastDefer = defer.Deferred()
524 pkg_hash = os.popen('grep -A 30 -E "^Package: dpkg$" ' +
525 '/var/lib/apt/lists/' + self.packagesFile +
526 ' | grep -E "^SHA1:" | head -n 1' +
527 ' | cut -d\ -f 2').read().rstrip('\n')
528 pkg_path = '/' + os.popen('grep -A 30 -E "^Package: dpkg$" ' +
529 '/var/lib/apt/lists/' + self.packagesFile +
530 ' | grep -E "^Filename:" | head -n 1' +
531 ' | cut -d\ -f 2').read().rstrip('\n')
533 d = self.client.findHash(pkg_path)
534 d.addCallback(self.verifyHash, pkg_path, pkg_hash)
536 d.addBoth(lastDefer.callback)
539 def test_findSrcHash(self):
540 """Tests finding the hash of a single source package."""
541 lastDefer = defer.Deferred()
543 src_dir = '/' + os.popen('grep -A 30 -E "^Package: dpkg$" ' +
544 '/var/lib/apt/lists/' + self.sourcesFile +
545 ' | grep -E "^Directory:" | head -n 1' +
546 ' | cut -d\ -f 2').read().rstrip('\n')
547 src_hashes = os.popen('grep -A 20 -E "^Package: dpkg$" ' +
548 '/var/lib/apt/lists/' + self.sourcesFile +
549 ' | grep -A 4 -E "^Files:" | grep -E "^ " ' +
550 ' | cut -d\ -f 2').read().split('\n')[:-1]
551 src_paths = os.popen('grep -A 20 -E "^Package: dpkg$" ' +
552 '/var/lib/apt/lists/' + self.sourcesFile +
553 ' | grep -A 4 -E "^Files:" | grep -E "^ " ' +
554 ' | cut -d\ -f 4').read().split('\n')[:-1]
556 i = choice(range(len(src_hashes)))
557 d = self.client.findHash(src_dir + '/' + src_paths[i])
558 d.addCallback(self.verifyHash, src_dir + '/' + src_paths[i], src_hashes[i])
560 d.addBoth(lastDefer.callback)
563 def test_multipleFindHash(self):
564 """Tests finding the hash of an index file, binary package, source package, and another index file."""
565 lastDefer = defer.Deferred()
567 # Lookup a Packages.bz2 file
568 idx_hash = os.popen('grep -A 3000 -E "^SHA1:" ' +
569 '/var/lib/apt/lists/' + self.releaseFile +
570 ' | grep -E " main/binary-i386/Packages.bz2$"'
571 ' | head -n 1 | cut -d\ -f 2').read().rstrip('\n')
572 idx_path = '/' + self.releaseFile[self.releaseFile.find('_dists_')+1:].replace('_','/')[:-7] + 'main/binary-i386/Packages.bz2'
574 d = self.client.findHash(idx_path)
575 d.addCallback(self.verifyHash, idx_path, idx_hash)
577 # Lookup the binary 'dpkg' package
578 pkg_hash = os.popen('grep -A 30 -E "^Package: dpkg$" ' +
579 '/var/lib/apt/lists/' + self.packagesFile +
580 ' | grep -E "^SHA1:" | head -n 1' +
581 ' | cut -d\ -f 2').read().rstrip('\n')
582 pkg_path = '/' + os.popen('grep -A 30 -E "^Package: dpkg$" ' +
583 '/var/lib/apt/lists/' + self.packagesFile +
584 ' | grep -E "^Filename:" | head -n 1' +
585 ' | cut -d\ -f 2').read().rstrip('\n')
587 d = self.client.findHash(pkg_path)
588 d.addCallback(self.verifyHash, pkg_path, pkg_hash)
590 # Lookup the source 'dpkg' package
591 src_dir = '/' + os.popen('grep -A 30 -E "^Package: dpkg$" ' +
592 '/var/lib/apt/lists/' + self.sourcesFile +
593 ' | grep -E "^Directory:" | head -n 1' +
594 ' | cut -d\ -f 2').read().rstrip('\n')
595 src_hashes = os.popen('grep -A 20 -E "^Package: dpkg$" ' +
596 '/var/lib/apt/lists/' + self.sourcesFile +
597 ' | grep -A 4 -E "^Files:" | grep -E "^ " ' +
598 ' | cut -d\ -f 2').read().split('\n')[:-1]
599 src_paths = os.popen('grep -A 20 -E "^Package: dpkg$" ' +
600 '/var/lib/apt/lists/' + self.sourcesFile +
601 ' | grep -A 4 -E "^Files:" | grep -E "^ " ' +
602 ' | cut -d\ -f 4').read().split('\n')[:-1]
604 for i in range(len(src_hashes)):
605 d = self.client.findHash(src_dir + '/' + src_paths[i])
606 d.addCallback(self.verifyHash, src_dir + '/' + src_paths[i], src_hashes[i])
608 # Lookup a Sources.bz2 file
609 idx_hash = os.popen('grep -A 3000 -E "^SHA1:" ' +
610 '/var/lib/apt/lists/' + self.releaseFile +
611 ' | grep -E " main/source/Sources.bz2$"'
612 ' | head -n 1 | cut -d\ -f 2').read().rstrip('\n')
613 idx_path = '/' + self.releaseFile[self.releaseFile.find('_dists_')+1:].replace('_','/')[:-7] + 'main/source/Sources.bz2'
615 d = self.client.findHash(idx_path)
616 d.addCallback(self.verifyHash, idx_path, idx_hash)
618 d.addBoth(lastDefer.callback)
622 for p in self.pending_calls:
625 self.pending_calls = []
626 self.client.cleanup()