+from bz2 import BZ2Decompressor
+from zlib import decompressobj, MAX_WBITS
+from gzip import FCOMMENT, FEXTRA, FHCRC, FNAME, FTEXT
from urlparse import urlparse
import os
aptpkg_dir='.apt-dht'
+DECOMPRESS_EXTS = ['.gz', '.bz2']
+DECOMPRESS_FILES = ['release', 'sources', 'packages']
+
class MirrorError(Exception):
"""Exception raised when there's a problem with the mirror."""
class ProxyFileStream(stream.SimpleStream):
"""Saves a stream to a file while providing a new stream."""
- def __init__(self, stream, outFile):
+ def __init__(self, stream, outFile, hash, decompress = None, decFile = None):
"""Initializes the proxy.
@type stream: C{twisted.web2.stream.IByteStream}
@param stream: the input stream to read from
@type outFile: C{twisted.python.filepath.FilePath}
@param outFile: the file to write to
+ @type hash: L{Hash.HashObject}
+ @param hash: the hash object to use for the file
+ @type decompress: C{string}
+ @param decompress: also decompress the file as this type
+ (currently only '.gz' and '.bz2' are supported)
+ @type decFile: C{twisted.python.filepath.FilePath}
+ @param decFile: the file to write the decompressed data to
"""
self.stream = stream
self.outFile = outFile.open('w')
+ self.hash = hash
+ self.hash.new()
+ self.gzfile = None
+ self.bz2file = None
+ if decompress == ".gz":
+ self.gzheader = True
+ self.gzfile = decFile.open('w')
+ self.gzdec = decompressobj(-MAX_WBITS)
+ elif decompress == ".bz2":
+ self.bz2file = decFile.open('w')
+ self.bz2dec = BZ2Decompressor()
self.length = self.stream.length
self.start = 0
+ self.doneDefer = defer.Deferred()
def _done(self):
"""Close the output file."""
- self.outFile.close()
+ if not self.outFile.closed:
+ self.outFile.close()
+ self.hash.digest()
+ if self.gzfile:
+ data_dec = self.gzdec.flush()
+ self.gzfile.write(data_dec)
+ self.gzfile.close()
+ self.gzfile = None
+ if self.bz2file:
+ self.bz2file.close()
+ self.bz2file = None
+
+ self.doneDefer.callback(self.hash)
def read(self):
"""Read some data from the stream."""
return data
self.outFile.write(data)
+ self.hash.update(data)
+ if self.gzfile:
+ if self.gzheader:
+ self.gzheader = False
+ new_data = self._remove_gzip_header(data)
+ dec_data = self.gzdec.decompress(new_data)
+ else:
+ dec_data = self.gzdec.decompress(data)
+ self.gzfile.write(dec_data)
+ if self.bz2file:
+ dec_data = self.bz2dec.decompress(data)
+ self.bz2file.write(dec_data)
return data
+ def _remove_gzip_header(self, data):
+ if data[:2] != '\037\213':
+ raise IOError, 'Not a gzipped file'
+ if ord(data[2]) != 8:
+ raise IOError, 'Unknown compression method'
+ flag = ord(data[3])
+ # modtime = self.fileobj.read(4)
+ # extraflag = self.fileobj.read(1)
+ # os = self.fileobj.read(1)
+
+ skip = 10
+ if flag & FEXTRA:
+ # Read & discard the extra field, if present
+ xlen = ord(data[10])
+ xlen = xlen + 256*ord(data[11])
+ skip = skip + 2 + xlen
+ if flag & FNAME:
+ # Read and discard a null-terminated string containing the filename
+ while True:
+ if not data[skip] or data[skip] == '\000':
+ break
+ skip += 1
+ skip += 1
+ if flag & FCOMMENT:
+ # Read and discard a null-terminated string containing a comment
+ while True:
+ if not data[skip] or data[skip] == '\000':
+ break
+ skip += 1
+ skip += 1
+ if flag & FHCRC:
+ skip += 2 # Read & discard the 16-bit header CRC
+ return data[skip:]
+
def close(self):
"""Clean everything up and return None to future reads."""
self.length = 0
class MirrorManager:
"""Manages all requests for mirror objects."""
- def __init__(self, cache_dir):
+ def __init__(self, manager, cache_dir):
+ self.manager = manager
self.cache_dir = cache_dir
self.cache = filepath.FilePath(self.cache_dir)
self.apt_caches = {}
baseDir = base_match
log.msg("Settled on baseDir: %s" % baseDir)
- log.msg("Parsing '%s' gave '%s', '%s', '%s'" % (url, site, baseDir, path))
return site, baseDir, path
def init(self, site, baseDir):
site, baseDir, path = self.extractPath(url)
self.init(site, baseDir)
self.apt_caches[site][baseDir].file_updated(path, file_path)
-
+
def findHash(self, url):
- log.msg('Trying to find hash for %s' % url)
site, baseDir, path = self.extractPath(url)
if site in self.apt_caches and baseDir in self.apt_caches[site]:
return self.apt_caches[site][baseDir].findHash(path)
d = defer.Deferred()
d.errback(MirrorError("Site Not Found"))
return d
-
- def save_file(self, response, hash, size, url):
+
+ def save_file(self, response, hash, url):
"""Save a downloaded file to the cache and stream it."""
log.msg('Returning file: %s' % url)
parsed = urlparse(url)
destFile = self.cache.preauthChild(parsed[1] + parsed[2])
- log.msg('Cache file: %s' % destFile.path)
+ log.msg('Saving returned %r byte file to cache: %s' % (response.stream.length, destFile.path))
if destFile.exists():
- log.err('File already exists: %s', destFile.path)
- d.callback(response)
- return
-
- destFile.parent().makedirs()
- log.msg('Saving returned %i byte file to: %s' % (response.stream.length, destFile.path))
-
+ log.msg('File already exists, removing: %s' % destFile.path)
+ destFile.remove()
+ else:
+ destFile.parent().makedirs()
+
+ root, ext = os.path.splitext(destFile.basename())
+ if root.lower() in DECOMPRESS_FILES and ext.lower() in DECOMPRESS_EXTS:
+ ext = ext.lower()
+ decFile = destFile.sibling(root)
+ log.msg('Decompressing to: %s' % decFile.path)
+ if decFile.exists():
+ log.msg('File already exists, removing: %s' % decFile.path)
+ decFile.remove()
+ else:
+ ext = None
+ decFile = None
+
orig_stream = response.stream
- response.stream = ProxyFileStream(orig_stream, destFile)
+ response.stream = ProxyFileStream(orig_stream, destFile, hash, ext, decFile)
+ response.stream.doneDefer.addCallback(self.save_complete, url, destFile,
+ response.headers.getHeader('Last-Modified'),
+ ext, decFile)
+ response.stream.doneDefer.addErrback(self.save_error, url)
return response
+ def save_complete(self, hash, url, destFile, modtime = None, ext = None, decFile = None):
+ """Update the modification time and AptPackages."""
+ if modtime:
+ os.utime(destFile.path, (modtime, modtime))
+ if ext:
+ os.utime(decFile.path, (modtime, modtime))
+
+ result = hash.verify()
+ if result or result is None:
+ if result:
+ log.msg('Hashes match: %s' % url)
+ else:
+ log.msg('Hashed file to %s: %s' % (hash.hexdigest(), url))
+
+ self.updatedFile(url, destFile.path)
+ if ext:
+ self.updatedFile(url[:-len(ext)], decFile.path)
+
+ self.manager.download_complete(hash, url, destFile.path)
+ else:
+ log.msg("Hashes don't match %s != %s: %s" % (hash.hexexpected(), hash.hexdigest(), url))
+
def save_error(self, failure, url):
"""An error has occurred in downloadign or saving the file."""
log.msg('Error occurred downloading %s' % url)
self.failUnless(path == "/dists/unstable/Release", "no match: %s" % path)
def verifyHash(self, found_hash, path, true_hash):
- self.failUnless(found_hash[0] == true_hash,
- "%s hashes don't match: %s != %s" % (path, found_hash[0], true_hash))
+ self.failUnless(found_hash.hexexpected() == true_hash,
+ "%s hashes don't match: %s != %s" % (path, found_hash.hexexpected(), true_hash))
def test_findHash(self):
self.packagesFile = os.popen('ls -Sr /var/lib/apt/lists/ | grep -E "_main_.*Packages$" | tail -n 1').read().rstrip('\n')