X-Git-Url: https://git.mxchange.org/?p=quix0rs-apt-p2p.git;a=blobdiff_plain;f=apt_dht%2FAptPackages.py;h=45c035f12833ce707f933d34c2c2978421e9e8f9;hp=8ebcf918b0e3dcef43c1232a788c3f84941daca5;hb=5ae15cb5cc24c1fd9a75ea7a8b416f733d1c79be;hpb=0e7833ee84550e1a07f5c77d1a70bbd4a7aa6459 diff --git a/apt_dht/AptPackages.py b/apt_dht/AptPackages.py index 8ebcf91..45c035f 100644 --- a/apt_dht/AptPackages.py +++ b/apt_dht/AptPackages.py @@ -1,3 +1,26 @@ +# +# Copyright (C) 2002 Manuel Estrada Sainz +# Copyright (C) 2008 Cameron Dale +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of version 2.1 of the GNU General Public +# License as published by the Free Software Foundation. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +"""Manage a mirror's index files. + +@type TRACKED_FILES: C{list} of C{string} +@var TRACKED_FILES: the file names of files that contain index information +""" + # Disable the FutureWarning from the apt module import warnings warnings.simplefilter("ignore", FutureWarning) @@ -8,33 +31,43 @@ from shutil import rmtree from copy import deepcopy from UserDict import DictMixin -from twisted.internet import threads, defer +from twisted.internet import threads, defer, reactor from twisted.python import log +from twisted.python.filepath import FilePath from twisted.trial import unittest import apt_pkg, apt_inst from apt import OpProgress +from debian_bundle import deb822 + +from Hash import HashObject apt_pkg.init() +TRACKED_FILES = ['release', 'sources', 'packages'] + class PackageFileList(DictMixin): - """Manages a list of package files belonging to a backend. + """Manages a list of index files belonging to a mirror. + @type cache_dir: L{twisted.python.filepath.FilePath} + @ivar cache_dir: the directory to use for storing all files @type packages: C{shelve dictionary} - @ivar packages: the files stored for this backend + @ivar packages: the files tracked for this mirror """ def __init__(self, cache_dir): + """Initialize the list by opening the dictionary.""" self.cache_dir = cache_dir - if not os.path.exists(self.cache_dir): - os.makedirs(self.cache_dir) + self.cache_dir.restat(False) + if not self.cache_dir.exists(): + self.cache_dir.makedirs() self.packages = None self.open() def open(self): - """Open the persistent dictionary of files in this backend.""" + """Open the persistent dictionary of files for this mirror.""" if self.packages is None: - self.packages = shelve.open(self.cache_dir+'/packages.db') + self.packages = shelve.open(self.cache_dir.child('packages.db').path) def close(self): """Close the persistent dictionary.""" @@ -46,32 +79,70 @@ class PackageFileList(DictMixin): Called from the mirror manager when files get updated so we can update our fake lists and sources.list. + + @type cache_path: C{string} + @param cache_path: the location of the file within the mirror + @type file_path: L{twisted.python.filepath.FilePath} + @param file_path: The location of the file in the file system + @rtype: C{boolean} + @return: whether the file is an index file """ filename = cache_path.split('/')[-1] - if filename=="Packages" or filename=="Release" or filename=="Sources": + if filename.lower() in TRACKED_FILES: log.msg("Registering package file: "+cache_path) self.packages[cache_path] = file_path return True return False def check_files(self): - """Check all files in the database to make sure they exist.""" + """Check all files in the database to remove any that don't exist.""" files = self.packages.keys() for f in files: - if not os.path.exists(self.packages[f]): + self.packages[f].restat(False) + if not self.packages[f].exists(): log.msg("File in packages database has been deleted: "+f) del self.packages[f] - # Standard dictionary implementation so this class can be used like a dictionary. + #{ Dictionary interface details def __getitem__(self, key): return self.packages[key] def __setitem__(self, key, item): self.packages[key] = item def __delitem__(self, key): del self.packages[key] def keys(self): return self.packages.keys() class AptPackages: - """Uses python-apt to answer queries about packages. - - Makes a fake configuration for python-apt for each backend. + """Answers queries about packages available from a mirror. + + Uses the python-apt tools to parse and provide information about the + files that are available on a single mirror. + + @ivar DEFAULT_APT_CONFIG: the default configuration parameters to use for apt + @ivar essential_dirs: directories that must be created for apt to work + @ivar essential_files: files that must be created for apt to work + @type cache_dir: L{twisted.python.filepath.FilePath} + @ivar cache_dir: the directory to use for storing all files + @type unload_delay: C{int} + @ivar unload_delay: the time to wait before unloading the apt cache + @ivar apt_config: the configuration parameters to use for apt + @type packages: L{PackageFileList} + @ivar packages: the persistent storage of tracked apt index files + @type loaded: C{boolean} + @ivar loaded: whether the apt cache is currently loaded + @type loading: L{twisted.internet.defer.Deferred} + @ivar loading: if the cache is currently being loaded, this will be + called when it is loaded, otherwise it is None + @type unload_later: L{twisted.internet.interfaces.IDelayedCall} + @ivar unload_later: the delayed call to unload the apt cache + @type indexrecords: C{dictionary} + @ivar indexrecords: the hashes of index files for the mirror, keys are + mirror directories, values are dictionaries with keys the path to the + index file in the mirror directory and values are dictionaries with + keys the hash type and values the hash + @type cache: C{apt_pkg.GetCache()} + @ivar cache: the apt cache of the mirror + @type records: C{apt_pkg.GetPkgRecords()} + @ivar records: the apt package records for all binary packages in a mirror + @type srcrecords: C{apt_pkg.GetPkgSrcRecords} + @ivar srcrecords: the apt package records for all source packages in a mirror """ DEFAULT_APT_CONFIG = { @@ -110,82 +181,75 @@ class AptPackages: 'apt/lists/partial') essential_files = ('apt/dpkg/status', 'apt/etc/sources.list',) - def __init__(self, cache_dir): + def __init__(self, cache_dir, unload_delay): """Construct a new packages manager. - @ivar backendName: name of backend associated with this packages file - @ivar cache_dir: cache directory from config file + @param cache_dir: directory to use to store files for this mirror """ self.cache_dir = cache_dir + self.unload_delay = unload_delay self.apt_config = deepcopy(self.DEFAULT_APT_CONFIG) + # Create the necessary files and directories for apt for dir in self.essential_dirs: - path = os.path.join(self.cache_dir, dir) - if not os.path.exists(path): - os.makedirs(path) + path = self.cache_dir.preauthChild(dir) + if not path.exists(): + path.makedirs() for file in self.essential_files: - path = os.path.join(self.cache_dir, file) - if not os.path.exists(path): - f = open(path,'w') - f.close() - del f + path = self.cache_dir.preauthChild(file) + if not path.exists(): + path.touch() - self.apt_config['Dir'] = self.cache_dir - self.apt_config['Dir::State::status'] = os.path.join(self.cache_dir, - self.apt_config['Dir::State'], self.apt_config['Dir::State::status']) + self.apt_config['Dir'] = self.cache_dir.path + self.apt_config['Dir::State::status'] = self.cache_dir.preauthChild(self.apt_config['Dir::State']).preauthChild(self.apt_config['Dir::State::status']).path self.packages = PackageFileList(cache_dir) - self.loaded = 0 + self.loaded = False self.loading = None + self.unload_later = None def __del__(self): self.cleanup() self.packages.close() def addRelease(self, cache_path, file_path): - """Dirty hack until python-apt supports apt-pkg/indexrecords.h + """Add a Release file's info to the list of index files. + + Dirty hack until python-apt supports apt-pkg/indexrecords.h (see Bug #456141) """ self.indexrecords[cache_path] = {} read_packages = False - f = open(file_path, 'r') + f = file_path.open('r') - for line in f: - line = line.rstrip() - - if line[:1] != " ": - read_packages = False - try: - # Read the various headers from the file - h, v = line.split(":", 1) - if h == "MD5Sum" or h == "SHA1" or h == "SHA256": - read_packages = True - hash_type = h - except: - # Bad header line, just ignore it - log.msg("WARNING: Ignoring badly formatted Release line: %s" % line) - - # Skip to the next line - continue + # Use python-debian routines to parse the file for hashes + rel = deb822.Release(f, fields = ['MD5Sum', 'SHA1', 'SHA256']) + for hash_type in rel: + for file in rel[hash_type]: + self.indexrecords[cache_path].setdefault(file['name'], {})[hash_type.upper()] = (file[hash_type], file['size']) - # Read file names from the multiple hash sections of the file - if read_packages: - p = line.split() - self.indexrecords[cache_path].setdefault(p[2], {})[hash_type] = (p[0], p[1]) - f.close() def file_updated(self, cache_path, file_path): - """A file in the backend has changed, manage it. + """A file in the mirror has changed or been added. - If this affects us, unload our apt database + If this affects us, unload our apt database. + @see: L{PackageFileList.update_file} """ if self.packages.update_file(cache_path, file_path): self.unload() def load(self): - """Make sure the package is initialized and loaded.""" + """Make sure the package cache is initialized and loaded.""" + # Reset the pending unload call + if self.unload_later and self.unload_later.active(): + self.unload_later.reset(self.unload_delay) + else: + self.unload_later = reactor.callLater(self.unload_delay, self.unload) + + # Make sure it's not already being loaded if self.loading is None: + log.msg('Loading the packages cache') self.loading = threads.deferToThread(self._load) self.loading.addCallback(self.doneLoading) return self.loading @@ -197,69 +261,82 @@ class AptPackages: return loadResult def _load(self): - """Regenerates the fake configuration and load the packages cache.""" + """Regenerates the fake configuration and loads the packages caches.""" if self.loaded: return True + + # Modify the default configuration to create the fake one. apt_pkg.InitSystem() - rmtree(os.path.join(self.cache_dir, self.apt_config['Dir::State'], - self.apt_config['Dir::State::Lists'])) - os.makedirs(os.path.join(self.cache_dir, self.apt_config['Dir::State'], - self.apt_config['Dir::State::Lists'], 'partial')) - sources_filename = os.path.join(self.cache_dir, self.apt_config['Dir::Etc'], - self.apt_config['Dir::Etc::sourcelist']) - sources = open(sources_filename, 'w') + self.cache_dir.preauthChild(self.apt_config['Dir::State'] + ).preauthChild(self.apt_config['Dir::State::Lists']).remove() + self.cache_dir.preauthChild(self.apt_config['Dir::State'] + ).preauthChild(self.apt_config['Dir::State::Lists'] + ).child('partial').makedirs() + sources_file = self.cache_dir.preauthChild(self.apt_config['Dir::Etc'] + ).preauthChild(self.apt_config['Dir::Etc::sourcelist']) + sources = sources_file.open('w') sources_count = 0 + deb_src_added = False self.packages.check_files() self.indexrecords = {} + + # Create an entry in sources.list for each needed index file for f in self.packages: # we should probably clear old entries from self.packages and # take into account the recorded mtime as optimization - filepath = self.packages[f] + file = self.packages[f] if f.split('/')[-1] == "Release": - self.addRelease(f, filepath) + self.addRelease(f, file) fake_uri='http://apt-dht'+f fake_dirname = '/'.join(fake_uri.split('/')[:-1]) if f.endswith('Sources'): + deb_src_added = True source_line='deb-src '+fake_dirname+'/ /' else: source_line='deb '+fake_dirname+'/ /' - listpath=(os.path.join(self.cache_dir, self.apt_config['Dir::State'], - self.apt_config['Dir::State::Lists'], - apt_pkg.URItoFileName(fake_uri))) + listpath = self.cache_dir.preauthChild(self.apt_config['Dir::State'] + ).preauthChild(self.apt_config['Dir::State::Lists'] + ).child(apt_pkg.URItoFileName(fake_uri)) sources.write(source_line+'\n') log.msg("Sources line: " + source_line) sources_count = sources_count + 1 - try: + if listpath.exists(): #we should empty the directory instead - os.unlink(listpath) - except: - pass - os.symlink(filepath, listpath) + listpath.remove() + os.symlink(file.path, listpath.path) sources.close() if sources_count == 0: - log.msg("No Packages files available for %s backend"%(self.cache_dir)) + log.msg("No Packages files available for %s backend"%(self.cache_dir.path)) return False - log.msg("Loading Packages database for "+self.cache_dir) + log.msg("Loading Packages database for "+self.cache_dir.path) for key, value in self.apt_config.items(): apt_pkg.Config[key] = value self.cache = apt_pkg.GetCache(OpProgress()) self.records = apt_pkg.GetPkgRecords(self.cache) - self.srcrecords = apt_pkg.GetPkgSrcRecords() + if deb_src_added: + self.srcrecords = apt_pkg.GetPkgSrcRecords() + else: + self.srcrecords = None - self.loaded = 1 + self.loaded = True return True def unload(self): """Tries to make the packages server quit.""" + if self.unload_later and self.unload_later.active(): + self.unload_later.cancel() + self.unload_later = None if self.loaded: + log.msg('Unloading the packages cache') + # This should save memory del self.cache del self.records del self.srcrecords del self.indexrecords - self.loaded = 0 + self.loaded = False def cleanup(self): """Cleanup and close any loaded caches.""" @@ -269,31 +346,49 @@ class AptPackages: def findHash(self, path): """Find the hash for a given path in this mirror. - Returns a deferred so it can make sure the cache is loaded first. + @type path: C{string} + @param path: the path within the mirror of the file to lookup + @rtype: L{twisted.internet.defer.Deferred} + @return: a deferred so it can make sure the cache is loaded first """ d = defer.Deferred() deferLoad = self.load() deferLoad.addCallback(self._findHash, path, d) + deferLoad.addErrback(self._findHash_error, path, d) return d + def _findHash_error(self, failure, path, d): + """An error occurred, return an empty hash.""" + log.msg('An error occurred while looking up a hash for: %s' % path) + log.err(failure) + d.callback(HashObject()) + return failure + def _findHash(self, loadResult, path, d): - """Really find the hash for a path. + """Search the records for the hash of a path. - Have to pass the returned loadResult on in case other calls to this - function are pending. + @type loadResult: C{boolean} + @param loadResult: whether apt's cache was successfully loaded + @type path: C{string} + @param path: the path within the mirror of the file to lookup + @type d: L{twisted.internet.defer.Deferred} + @param d: the deferred to callback with the result """ if not loadResult: - d.callback((None, None)) + d.callback(HashObject()) return loadResult + h = HashObject() + # First look for the path in the cache of index files for release in self.indexrecords: if path.startswith(release[:-7]): for indexFile in self.indexrecords[release]: if release[:-7] + indexFile == path: - d.callback(self.indexrecords[release][indexFile]['SHA1']) + h.setFromIndexRecord(self.indexrecords[release][indexFile]) + d.callback(h) return loadResult package = path.split('/')[-1].split('_')[0] @@ -305,20 +400,25 @@ class AptPackages: for verFile in version.FileList: if self.records.Lookup(verFile): if '/' + self.records.FileName == path: - d.callback((self.records.SHA1Hash, size)) + h.setFromPkgRecord(self.records, size) + d.callback(h) return loadResult except KeyError: pass # Check the source packages' files - self.srcrecords.Restart() - if self.srcrecords.Lookup(package): - for f in self.srcrecords.Files: - if path == '/' + f[2]: - d.callback((f[0], f[1])) - return loadResult + if self.srcrecords: + self.srcrecords.Restart() + if self.srcrecords.Lookup(package): + for f in self.srcrecords.Files: + if path == '/' + f[2]: + h.setFromSrcRecord(f) + d.callback(h) + return loadResult + + d.callback(h) - d.callback((None, None)) + # Have to pass the returned loadResult on in case other calls to this function are pending. return loadResult class TestAptPackages(unittest.TestCase): @@ -332,23 +432,29 @@ class TestAptPackages(unittest.TestCase): releaseFile = '' def setUp(self): - self.client = AptPackages('/tmp/.apt-dht') + """Initializes the cache with files found in the traditional apt location.""" + self.client = AptPackages(FilePath('/tmp/.apt-dht'), 300) + # Find the largest index files that are for 'main' self.packagesFile = os.popen('ls -Sr /var/lib/apt/lists/ | grep -E "_main_.*Packages$" | tail -n 1').read().rstrip('\n') self.sourcesFile = os.popen('ls -Sr /var/lib/apt/lists/ | grep -E "_main_.*Sources$" | tail -n 1').read().rstrip('\n') + + # Find the Release file corresponding to the found Packages file for f in os.walk('/var/lib/apt/lists').next()[2]: if f[-7:] == "Release" and self.packagesFile.startswith(f[:-7]): self.releaseFile = f break - + + # Add all the found files to the PackageFileList self.client.file_updated(self.releaseFile[self.releaseFile.find('_dists_'):].replace('_','/'), - '/var/lib/apt/lists/' + self.releaseFile) + FilePath('/var/lib/apt/lists/' + self.releaseFile)) self.client.file_updated(self.packagesFile[self.packagesFile.find('_dists_'):].replace('_','/'), - '/var/lib/apt/lists/' + self.packagesFile) + FilePath('/var/lib/apt/lists/' + self.packagesFile)) self.client.file_updated(self.sourcesFile[self.sourcesFile.find('_dists_'):].replace('_','/'), - '/var/lib/apt/lists/' + self.sourcesFile) + FilePath('/var/lib/apt/lists/' + self.sourcesFile)) def test_pkg_hash(self): + """Tests loading the binary package records cache.""" self.client._load() self.client.records.Lookup(self.client.cache['dpkg'].VersionList[0].FileList[0]) @@ -362,6 +468,7 @@ class TestAptPackages(unittest.TestCase): "Hashes don't match: %s != %s" % (self.client.records.SHA1Hash, pkg_hash)) def test_src_hash(self): + """Tests loading the source package records cache.""" self.client._load() self.client.srcrecords.Lookup('dpkg') @@ -375,6 +482,7 @@ class TestAptPackages(unittest.TestCase): self.failUnless(f[0] in src_hashes, "Couldn't find %s in: %r" % (f[0], src_hashes)) def test_index_hash(self): + """Tests loading the cache of index file information.""" self.client._load() indexhash = self.client.indexrecords[self.releaseFile[self.releaseFile.find('_dists_'):].replace('_','/')]['main/binary-i386/Packages.bz2']['SHA1'][0] @@ -387,10 +495,11 @@ class TestAptPackages(unittest.TestCase): self.failUnless(indexhash == idx_hash, "Hashes don't match: %s != %s" % (indexhash, idx_hash)) def verifyHash(self, found_hash, path, true_hash): - self.failUnless(found_hash[0] == true_hash, - "%s hashes don't match: %s != %s" % (path, found_hash[0], true_hash)) + self.failUnless(found_hash.hexexpected() == true_hash, + "%s hashes don't match: %s != %s" % (path, found_hash.hexexpected(), true_hash)) def test_findIndexHash(self): + """Tests finding the hash of a single index file.""" lastDefer = defer.Deferred() idx_hash = os.popen('grep -A 3000 -E "^SHA1:" ' + @@ -406,6 +515,7 @@ class TestAptPackages(unittest.TestCase): return lastDefer def test_findPkgHash(self): + """Tests finding the hash of a single binary package.""" lastDefer = defer.Deferred() pkg_hash = os.popen('grep -A 30 -E "^Package: dpkg$" ' + @@ -424,6 +534,7 @@ class TestAptPackages(unittest.TestCase): return lastDefer def test_findSrcHash(self): + """Tests finding the hash of a single source package.""" lastDefer = defer.Deferred() src_dir = '/' + os.popen('grep -A 30 -E "^Package: dpkg$" ' + @@ -447,8 +558,10 @@ class TestAptPackages(unittest.TestCase): return lastDefer def test_multipleFindHash(self): + """Tests finding the hash of an index file, binary package, source package, and another index file.""" lastDefer = defer.Deferred() + # Lookup a Packages.bz2 file idx_hash = os.popen('grep -A 3000 -E "^SHA1:" ' + '/var/lib/apt/lists/' + self.releaseFile + ' | grep -E " main/binary-i386/Packages.bz2$"' @@ -458,6 +571,7 @@ class TestAptPackages(unittest.TestCase): d = self.client.findHash(idx_path) d.addCallback(self.verifyHash, idx_path, idx_hash) + # Lookup the binary 'dpkg' package pkg_hash = os.popen('grep -A 30 -E "^Package: dpkg$" ' + '/var/lib/apt/lists/' + self.packagesFile + ' | grep -E "^SHA1:" | head -n 1' + @@ -470,6 +584,7 @@ class TestAptPackages(unittest.TestCase): d = self.client.findHash(pkg_path) d.addCallback(self.verifyHash, pkg_path, pkg_hash) + # Lookup the source 'dpkg' package src_dir = '/' + os.popen('grep -A 30 -E "^Package: dpkg$" ' + '/var/lib/apt/lists/' + self.sourcesFile + ' | grep -E "^Directory:" | head -n 1' + @@ -487,6 +602,7 @@ class TestAptPackages(unittest.TestCase): d = self.client.findHash(src_dir + '/' + src_paths[i]) d.addCallback(self.verifyHash, src_dir + '/' + src_paths[i], src_hashes[i]) + # Lookup a Sources.bz2 file idx_hash = os.popen('grep -A 3000 -E "^SHA1:" ' + '/var/lib/apt/lists/' + self.releaseFile + ' | grep -E " main/source/Sources.bz2$"'