X-Git-Url: https://git.mxchange.org/?a=blobdiff_plain;f=apt_dht%2FMirrorManager.py;h=8bf197f9d40203749bb4c34ae33938d290e94c51;hb=21f93f967869efe321e2162b458aa22fd836da68;hp=b4995e9445a1a9ca175399430b4196b5582a1e1e;hpb=85d160514bda0e66058f0fb1ac70c8cf52b4435d;p=quix0rs-apt-p2p.git diff --git a/apt_dht/MirrorManager.py b/apt_dht/MirrorManager.py index b4995e9..8bf197f 100644 --- a/apt_dht/MirrorManager.py +++ b/apt_dht/MirrorManager.py @@ -1,4 +1,7 @@ +from bz2 import BZ2Decompressor +from zlib import decompressobj, MAX_WBITS +from gzip import FCOMMENT, FEXTRA, FHCRC, FNAME, FTEXT from urlparse import urlparse import os @@ -12,39 +15,66 @@ from AptPackages import AptPackages aptpkg_dir='.apt-dht' +DECOMPRESS_EXTS = ['.gz', '.bz2'] +DECOMPRESS_FILES = ['release', 'sources', 'packages'] + class MirrorError(Exception): """Exception raised when there's a problem with the mirror.""" class ProxyFileStream(stream.SimpleStream): """Saves a stream to a file while providing a new stream.""" - def __init__(self, stream, outFile, modtime = None): + def __init__(self, stream, outFile, hash, decompress = None, decFile = None): """Initializes the proxy. @type stream: C{twisted.web2.stream.IByteStream} @param stream: the input stream to read from @type outFile: C{twisted.python.filepath.FilePath} @param outFile: the file to write to - @type modtime: C{int} - @param modtime: the modification time to set for the file + @type hash: L{Hash.HashObject} + @param hash: the hash object to use for the file + @type decompress: C{string} + @param decompress: also decompress the file as this type + (currently only '.gz' and '.bz2' are supported) + @type decFile: C{twisted.python.filepath.FilePath} + @param decFile: the file to write the decompressed data to """ self.stream = stream - self.outFile = outFile - self.openFile = outFile.open('w') - self.modtime = modtime + self.outFile = outFile.open('w') + self.hash = hash + self.hash.new() + self.gzfile = None + self.bz2file = None + if decompress == ".gz": + self.gzheader = True + self.gzfile = decFile.open('w') + self.gzdec = decompressobj(-MAX_WBITS) + elif decompress == ".bz2": + self.bz2file = decFile.open('w') + self.bz2dec = BZ2Decompressor() self.length = self.stream.length self.start = 0 + self.doneDefer = defer.Deferred() def _done(self): """Close the output file.""" - if not self.openFile.closed: - self.openFile.close() - if self.modtime: - os.utime(self.outFile.path, (self.modtime, self.modtime)) + if not self.outFile.closed: + self.outFile.close() + self.hash.digest() + if self.gzfile: + data_dec = self.gzdec.flush() + self.gzfile.write(data_dec) + self.gzfile.close() + self.gzfile = None + if self.bz2file: + self.bz2file.close() + self.bz2file = None + + self.doneDefer.callback(self.hash) def read(self): """Read some data from the stream.""" - if self.openFile.closed: + if self.outFile.closed: return None data = self.stream.read() @@ -61,9 +91,55 @@ class ProxyFileStream(stream.SimpleStream): self._done() return data - self.openFile.write(data) + self.outFile.write(data) + self.hash.update(data) + if self.gzfile: + if self.gzheader: + self.gzheader = False + new_data = self._remove_gzip_header(data) + dec_data = self.gzdec.decompress(new_data) + else: + dec_data = self.gzdec.decompress(data) + self.gzfile.write(dec_data) + if self.bz2file: + dec_data = self.bz2dec.decompress(data) + self.bz2file.write(dec_data) return data + def _remove_gzip_header(self, data): + if data[:2] != '\037\213': + raise IOError, 'Not a gzipped file' + if ord(data[2]) != 8: + raise IOError, 'Unknown compression method' + flag = ord(data[3]) + # modtime = self.fileobj.read(4) + # extraflag = self.fileobj.read(1) + # os = self.fileobj.read(1) + + skip = 10 + if flag & FEXTRA: + # Read & discard the extra field, if present + xlen = ord(data[10]) + xlen = xlen + 256*ord(data[11]) + skip = skip + 2 + xlen + if flag & FNAME: + # Read and discard a null-terminated string containing the filename + while True: + if not data[skip] or data[skip] == '\000': + break + skip += 1 + skip += 1 + if flag & FCOMMENT: + # Read and discard a null-terminated string containing a comment + while True: + if not data[skip] or data[skip] == '\000': + break + skip += 1 + skip += 1 + if flag & FHCRC: + skip += 2 # Read & discard the 16-bit header CRC + return data[skip:] + def close(self): """Clean everything up and return None to future reads.""" self.length = 0 @@ -73,7 +149,8 @@ class ProxyFileStream(stream.SimpleStream): class MirrorManager: """Manages all requests for mirror objects.""" - def __init__(self, cache_dir): + def __init__(self, cache_dir, manager = None): + self.manager = manager self.cache_dir = cache_dir self.cache = filepath.FilePath(self.cache_dir) self.apt_caches = {} @@ -120,7 +197,7 @@ class MirrorManager: site, baseDir, path = self.extractPath(url) self.init(site, baseDir) self.apt_caches[site][baseDir].file_updated(path, file_path) - + def findHash(self, url): site, baseDir, path = self.extractPath(url) if site in self.apt_caches and baseDir in self.apt_caches[site]: @@ -128,9 +205,13 @@ class MirrorManager: d = defer.Deferred() d.errback(MirrorError("Site Not Found")) return d - - def save_file(self, response, hash, size, url): + + def save_file(self, response, hash, url): """Save a downloaded file to the cache and stream it.""" + if response.code != 200: + log.msg('File was not found (%r): %s' % (response, url)) + return response + log.msg('Returning file: %s' % url) parsed = urlparse(url) @@ -142,11 +223,50 @@ class MirrorManager: destFile.remove() else: destFile.parent().makedirs() - + + root, ext = os.path.splitext(destFile.basename()) + if root.lower() in DECOMPRESS_FILES and ext.lower() in DECOMPRESS_EXTS: + ext = ext.lower() + decFile = destFile.sibling(root) + log.msg('Decompressing to: %s' % decFile.path) + if decFile.exists(): + log.msg('File already exists, removing: %s' % decFile.path) + decFile.remove() + else: + ext = None + decFile = None + orig_stream = response.stream - response.stream = ProxyFileStream(orig_stream, destFile, response.headers.getHeader('Last-Modified')) + response.stream = ProxyFileStream(orig_stream, destFile, hash, ext, decFile) + response.stream.doneDefer.addCallback(self.save_complete, url, destFile, + response.headers.getHeader('Last-Modified'), + ext, decFile) + response.stream.doneDefer.addErrback(self.save_error, url) return response + def save_complete(self, hash, url, destFile, modtime = None, ext = None, decFile = None): + """Update the modification time and AptPackages.""" + if modtime: + os.utime(destFile.path, (modtime, modtime)) + if ext: + os.utime(decFile.path, (modtime, modtime)) + + result = hash.verify() + if result or result is None: + if result: + log.msg('Hashes match: %s' % url) + else: + log.msg('Hashed file to %s: %s' % (hash.hexdigest(), url)) + + self.updatedFile(url, destFile.path) + if ext: + self.updatedFile(url[:-len(ext)], decFile.path) + + if self.manager: + self.manager.download_complete(hash, url, destFile.path) + else: + log.msg("Hashes don't match %s != %s: %s" % (hash.hexexpected(), hash.hexdigest(), url)) + def save_error(self, failure, url): """An error has occurred in downloadign or saving the file.""" log.msg('Error occurred downloading %s' % url) @@ -180,8 +300,8 @@ class TestMirrorManager(unittest.TestCase): self.failUnless(path == "/dists/unstable/Release", "no match: %s" % path) def verifyHash(self, found_hash, path, true_hash): - self.failUnless(found_hash[0] == true_hash, - "%s hashes don't match: %s != %s" % (path, found_hash[0], true_hash)) + self.failUnless(found_hash.hexexpected() == true_hash, + "%s hashes don't match: %s != %s" % (path, found_hash.hexexpected(), true_hash)) def test_findHash(self): self.packagesFile = os.popen('ls -Sr /var/lib/apt/lists/ | grep -E "_main_.*Packages$" | tail -n 1').read().rstrip('\n')