+"""Manage the multiple mirrors that may be requested.
+
+@var aptpkg_dir: the name of the directory to use for mirror files
+"""
+
from urlparse import urlparse
import os
-from twisted.python import log, filepath
+from twisted.python import log
+from twisted.python.filepath import FilePath
from twisted.internet import defer
from twisted.trial import unittest
-from twisted.web2 import stream
from twisted.web2.http import splitHostPort
from AptPackages import AptPackages
-aptpkg_dir='.apt-dht'
+aptpkg_dir='apt-packages'
class MirrorError(Exception):
"""Exception raised when there's a problem with the mirror."""
-class ProxyFileStream(stream.SimpleStream):
- """Saves a stream to a file while providing a new stream."""
-
- def __init__(self, stream, outFile):
- """Initializes the proxy.
-
- @type stream: C{twisted.web2.stream.IByteStream}
- @param stream: the input stream to read from
- @type outFile: C{twisted.python.filepath.FilePath}
- @param outFile: the file to write to
- """
- self.stream = stream
- self.outFile = outFile.open('w')
- self.length = self.stream.length
- self.start = 0
-
- def _done(self):
- """Close the output file."""
- self.outFile.close()
-
- def read(self):
- """Read some data from the stream."""
- if self.outFile.closed:
- return None
-
- data = self.stream.read()
- if isinstance(data, defer.Deferred):
- data.addCallbacks(self._write, self._done)
- return data
-
- self._write(data)
- return data
-
- def _write(self, data):
- """Write the stream data to the file and return it for others to use."""
- if data is None:
- self._done()
- return data
-
- self.outFile.write(data)
- return data
-
- def close(self):
- """Clean everything up and return None to future reads."""
- self.length = 0
- self._done()
- self.stream.close()
-
class MirrorManager:
- """Manages all requests for mirror objects."""
+ """Manages all requests for mirror information.
- def __init__(self, cache_dir):
+ @type cache_dir: L{twisted.python.filepath.FilePath}
+ @ivar cache_dir: the directory to use for storing all files
+ @type unload_delay: C{int}
+ @ivar unload_delay: the time to wait before unloading the apt cache
+ @type apt_caches: C{dictionary}
+ @ivar apt_caches: the avaliable mirrors
+ """
+
+ def __init__(self, cache_dir, unload_delay):
self.cache_dir = cache_dir
- self.cache = filepath.FilePath(self.cache_dir)
+ self.unload_delay = unload_delay
self.apt_caches = {}
def extractPath(self, url):
+ """Break the full URI down into the site, base directory and path.
+
+ Site is the host and port of the mirror. Base directory is the
+ directory to the mirror location (usually just '/debian'). Path is
+ the remaining path to get to the file.
+
+ E.g. http://ftp.debian.org/debian/dists/sid/binary-i386/Packages.bz2
+ would return ('ftp.debian.org:80', '/debian',
+ '/dists/sid/binary-i386/Packages.bz2').
+
+ @param url: the URI of the file's location on the mirror
+ @rtype: (C{string}, C{string}, C{string})
+ @return: the site, base directory and path to the file
+ """
+ # Extract the host and port
parsed = urlparse(url)
host, port = splitHostPort(parsed[0], parsed[1])
site = host + ":" + str(port)
path = parsed[2]
-
+
+ # Try to find the base directory (most can be found this way)
i = max(path.rfind('/dists/'), path.rfind('/pool/'))
if i >= 0:
baseDir = path[:i]
else:
# Uh oh, this is not good
log.msg("Couldn't find a good base directory for path: %s" % (site + path))
+
+ # Try to find an existing cache that starts with this one
+ # (fallback to using an empty base directory)
baseDir = ''
if site in self.apt_caches:
longest_match = 0
baseDir = base_match
log.msg("Settled on baseDir: %s" % baseDir)
- log.msg("Parsing '%s' gave '%s', '%s', '%s'" % (url, site, baseDir, path))
return site, baseDir, path
def init(self, site, baseDir):
+ """Make sure an L{AptPackages} exists for this mirror."""
if site not in self.apt_caches:
self.apt_caches[site] = {}
if baseDir not in self.apt_caches[site]:
- site_cache = os.path.join(self.cache_dir, aptpkg_dir, 'mirrors', site + baseDir.replace('/', '_'))
- self.apt_caches[site][baseDir] = AptPackages(site_cache)
+ site_cache = self.cache_dir.child(aptpkg_dir).child('mirrors').child(site + baseDir.replace('/', '_'))
+ site_cache.makedirs
+ self.apt_caches[site][baseDir] = AptPackages(site_cache, self.unload_delay)
def updatedFile(self, url, file_path):
+ """A file in the mirror has changed or been added.
+
+ @see: L{AptPackages.PackageFileList.update_file}
+ """
site, baseDir, path = self.extractPath(url)
self.init(site, baseDir)
self.apt_caches[site][baseDir].file_updated(path, file_path)
-
+
def findHash(self, url):
- log.msg('Trying to find hash for %s' % url)
+ """Find the hash for a given url.
+
+ @param url: the URI of the file's location on the mirror
+ @rtype: L{twisted.internet.defer.Deferred}
+ @return: a deferred that will fire with the returned L{Hash.HashObject}
+ """
site, baseDir, path = self.extractPath(url)
if site in self.apt_caches and baseDir in self.apt_caches[site]:
return self.apt_caches[site][baseDir].findHash(path)
d = defer.Deferred()
d.errback(MirrorError("Site Not Found"))
return d
-
- def save_file(self, response, hash, size, url):
- """Save a downloaded file to the cache and stream it."""
- log.msg('Returning file: %s' % url)
-
- parsed = urlparse(url)
- destFile = self.cache.preauthChild(parsed[1] + parsed[2])
- log.msg('Cache file: %s' % destFile.path)
-
- if destFile.exists():
- log.err('File already exists: %s', destFile.path)
- d.callback(response)
- return
-
- destFile.parent().makedirs()
- log.msg('Saving returned %i byte file to: %s' % (response.stream.length, destFile.path))
-
- orig_stream = response.stream
- response.stream = ProxyFileStream(orig_stream, destFile)
- return response
-
- def save_error(self, failure, url):
- """An error has occurred in downloadign or saving the file."""
- log.msg('Error occurred downloading %s' % url)
- log.err(failure)
- return failure
-
+
class TestMirrorManager(unittest.TestCase):
"""Unit tests for the mirror manager."""
client = None
def setUp(self):
- self.client = MirrorManager('/tmp')
+ self.client = MirrorManager(FilePath('/tmp/.apt-dht'), 300)
def test_extractPath(self):
+ """Test extracting the site and base directory from various mirrors."""
site, baseDir, path = self.client.extractPath('http://ftp.us.debian.org/debian/dists/unstable/Release')
self.failUnless(site == "ftp.us.debian.org:80", "no match: %s" % site)
self.failUnless(baseDir == "/debian", "no match: %s" % baseDir)
self.failUnless(path == "/dists/unstable/Release", "no match: %s" % path)
def verifyHash(self, found_hash, path, true_hash):
- self.failUnless(found_hash[0] == true_hash,
- "%s hashes don't match: %s != %s" % (path, found_hash[0], true_hash))
+ self.failUnless(found_hash.hexexpected() == true_hash,
+ "%s hashes don't match: %s != %s" % (path, found_hash.hexexpected(), true_hash))
def test_findHash(self):
+ """Tests finding the hash of an index file, binary package, source package, and another index file."""
+ # Find the largest index files that are for 'main'
self.packagesFile = os.popen('ls -Sr /var/lib/apt/lists/ | grep -E "_main_.*Packages$" | tail -n 1').read().rstrip('\n')
self.sourcesFile = os.popen('ls -Sr /var/lib/apt/lists/ | grep -E "_main_.*Sources$" | tail -n 1').read().rstrip('\n')
+
+ # Find the Release file corresponding to the found Packages file
for f in os.walk('/var/lib/apt/lists').next()[2]:
if f[-7:] == "Release" and self.packagesFile.startswith(f[:-7]):
self.releaseFile = f
break
+ # Add all the found files to the mirror
self.client.updatedFile('http://' + self.releaseFile.replace('_','/'),
- '/var/lib/apt/lists/' + self.releaseFile)
+ FilePath('/var/lib/apt/lists/' + self.releaseFile))
self.client.updatedFile('http://' + self.releaseFile[:self.releaseFile.find('_dists_')+1].replace('_','/') +
self.packagesFile[self.packagesFile.find('_dists_')+1:].replace('_','/'),
- '/var/lib/apt/lists/' + self.packagesFile)
+ FilePath('/var/lib/apt/lists/' + self.packagesFile))
self.client.updatedFile('http://' + self.releaseFile[:self.releaseFile.find('_dists_')+1].replace('_','/') +
self.sourcesFile[self.sourcesFile.find('_dists_')+1:].replace('_','/'),
- '/var/lib/apt/lists/' + self.sourcesFile)
+ FilePath('/var/lib/apt/lists/' + self.sourcesFile))
lastDefer = defer.Deferred()
+ # Lookup a Packages.bz2 file
idx_hash = os.popen('grep -A 3000 -E "^SHA1:" ' +
'/var/lib/apt/lists/' + self.releaseFile +
' | grep -E " main/binary-i386/Packages.bz2$"'
d = self.client.findHash(idx_path)
d.addCallback(self.verifyHash, idx_path, idx_hash)
+ # Lookup the binary 'dpkg' package
pkg_hash = os.popen('grep -A 30 -E "^Package: dpkg$" ' +
'/var/lib/apt/lists/' + self.packagesFile +
' | grep -E "^SHA1:" | head -n 1' +
d = self.client.findHash(pkg_path)
d.addCallback(self.verifyHash, pkg_path, pkg_hash)
+ # Lookup the source 'dpkg' package
src_dir = os.popen('grep -A 30 -E "^Package: dpkg$" ' +
'/var/lib/apt/lists/' + self.sourcesFile +
' | grep -E "^Directory:" | head -n 1' +
d = self.client.findHash(src_path)
d.addCallback(self.verifyHash, src_path, src_hashes[i])
+ # Lookup a Sources.bz2 file
idx_hash = os.popen('grep -A 3000 -E "^SHA1:" ' +
'/var/lib/apt/lists/' + self.releaseFile +
' | grep -E " main/source/Sources.bz2$"'