]> git.mxchange.org Git - quix0rs-apt-p2p.git/blobdiff - apt_dht/MirrorManager.py
Upgrade pysqlite from 1.0 to 2.x (now uses sqlite3).
[quix0rs-apt-p2p.git] / apt_dht / MirrorManager.py
index e39e99b3f408bdde057d70eeeafc2593798658b8..c89a5915bc6622fdddd9fc4394656aa77732420f 100644 (file)
@@ -1,5 +1,9 @@
 
+from bz2 import BZ2Decompressor
+from zlib import decompressobj, MAX_WBITS
+from gzip import FCOMMENT, FEXTRA, FHCRC, FNAME, FTEXT
 from urlparse import urlparse
+from binascii import a2b_hex
 import os
 
 from twisted.python import log, filepath
@@ -12,28 +16,57 @@ from AptPackages import AptPackages
 
 aptpkg_dir='.apt-dht'
 
+DECOMPRESS_EXTS = ['.gz', '.bz2']
+DECOMPRESS_FILES = ['release', 'sources', 'packages']
+
 class MirrorError(Exception):
     """Exception raised when there's a problem with the mirror."""
 
 class ProxyFileStream(stream.SimpleStream):
     """Saves a stream to a file while providing a new stream."""
     
-    def __init__(self, stream, outFile):
+    def __init__(self, stream, outFile, decompress = None, decFile = None):
         """Initializes the proxy.
         
         @type stream: C{twisted.web2.stream.IByteStream}
         @param stream: the input stream to read from
         @type outFile: C{twisted.python.filepath.FilePath}
         @param outFile: the file to write to
+        @type decompress: C{string}
+        @param decompress: also decompress the file as this type
+            (currently only '.gz' and '.bz2' are supported)
+        @type decFile: C{twisted.python.filepath.FilePath}
+        @param decFile: the file to write the decompressed data to
         """
         self.stream = stream
         self.outFile = outFile.open('w')
+        self.gzfile = None
+        self.bz2file = None
+        if decompress == ".gz":
+            self.gzheader = True
+            self.gzfile = decFile.open('w')
+            self.gzdec = decompressobj(-MAX_WBITS)
+        elif decompress == ".bz2":
+            self.bz2file = decFile.open('w')
+            self.bz2dec = BZ2Decompressor()
         self.length = self.stream.length
         self.start = 0
+        self.doneDefer = defer.Deferred()
 
     def _done(self):
         """Close the output file."""
-        self.outFile.close()
+        if not self.outFile.closed:
+            self.outFile.close()
+            if self.gzfile:
+                data_dec = self.gzdec.flush()
+                self.gzfile.write(data_dec)
+                self.gzfile.close()
+                self.gzfile = None
+            if self.bz2file:
+                self.bz2file.close()
+                self.bz2file = None
+                
+            self.doneDefer.callback(1)
     
     def read(self):
         """Read some data from the stream."""
@@ -55,8 +88,53 @@ class ProxyFileStream(stream.SimpleStream):
             return data
         
         self.outFile.write(data)
+        if self.gzfile:
+            if self.gzheader:
+                self.gzheader = False
+                new_data = self._remove_gzip_header(data)
+                dec_data = self.gzdec.decompress(new_data)
+            else:
+                dec_data = self.gzdec.decompress(data)
+            self.gzfile.write(dec_data)
+        if self.bz2file:
+            dec_data = self.bz2dec.decompress(data)
+            self.bz2file.write(dec_data)
         return data
     
+    def _remove_gzip_header(self, data):
+        if data[:2] != '\037\213':
+            raise IOError, 'Not a gzipped file'
+        if ord(data[2]) != 8:
+            raise IOError, 'Unknown compression method'
+        flag = ord(data[3])
+        # modtime = self.fileobj.read(4)
+        # extraflag = self.fileobj.read(1)
+        # os = self.fileobj.read(1)
+
+        skip = 10
+        if flag & FEXTRA:
+            # Read & discard the extra field, if present
+            xlen = ord(data[10])
+            xlen = xlen + 256*ord(data[11])
+            skip = skip + 2 + xlen
+        if flag & FNAME:
+            # Read and discard a null-terminated string containing the filename
+            while True:
+                if not data[skip] or data[skip] == '\000':
+                    break
+                skip += 1
+            skip += 1
+        if flag & FCOMMENT:
+            # Read and discard a null-terminated string containing a comment
+            while True:
+                if not data[skip] or data[skip] == '\000':
+                    break
+                skip += 1
+            skip += 1
+        if flag & FHCRC:
+            skip += 2     # Read & discard the 16-bit header CRC
+        return data[skip:]
+
     def close(self):
         """Clean everything up and return None to future reads."""
         self.length = 0
@@ -99,7 +177,6 @@ class MirrorManager:
                         baseDir = base_match
             log.msg("Settled on baseDir: %s" % baseDir)
         
-        log.msg("Parsing '%s' gave '%s', '%s', '%s'" % (url, site, baseDir, path))
         return site, baseDir, path
         
     def init(self, site, baseDir):
@@ -114,15 +191,22 @@ class MirrorManager:
         site, baseDir, path = self.extractPath(url)
         self.init(site, baseDir)
         self.apt_caches[site][baseDir].file_updated(path, file_path)
-    
+
     def findHash(self, url):
-        log.msg('Trying to find hash for %s' % url)
         site, baseDir, path = self.extractPath(url)
         if site in self.apt_caches and baseDir in self.apt_caches[site]:
-            return self.apt_caches[site][baseDir].findHash(path)
+            d = self.apt_caches[site][baseDir].findHash(path)
+            d.addCallback(self.translateHash)
+            return d
         d = defer.Deferred()
         d.errback(MirrorError("Site Not Found"))
         return d
+    
+    def translateHash(self, (hash, size)):
+        """Translate a hash from apt's hex encoding to a string."""
+        if hash:
+            hash = a2b_hex(hash)
+        return (hash, size)
 
     def save_file(self, response, hash, size, url):
         """Save a downloaded file to the cache and stream it."""
@@ -130,20 +214,45 @@ class MirrorManager:
         
         parsed = urlparse(url)
         destFile = self.cache.preauthChild(parsed[1] + parsed[2])
-        log.msg('Cache file: %s' % destFile.path)
+        log.msg('Saving returned %r byte file to cache: %s' % (response.stream.length, destFile.path))
         
         if destFile.exists():
-            log.err('File already exists: %s', destFile.path)
-            d.callback(response)
-            return
-        
-        destFile.parent().makedirs()
-        log.msg('Saving returned %i byte file to: %s' % (response.stream.length, destFile.path))
+            log.msg('File already exists, removing: %s' % destFile.path)
+            destFile.remove()
+        else:
+            destFile.parent().makedirs()
+            
+        root, ext = os.path.splitext(destFile.basename())
+        if root.lower() in DECOMPRESS_FILES and ext.lower() in DECOMPRESS_EXTS:
+            ext = ext.lower()
+            decFile = destFile.sibling(root)
+            log.msg('Decompressing to: %s' % decFile.path)
+            if decFile.exists():
+                log.msg('File already exists, removing: %s' % decFile.path)
+                decFile.remove()
+        else:
+            ext = None
+            decFile = None
         
         orig_stream = response.stream
-        response.stream = ProxyFileStream(orig_stream, destFile)
+        response.stream = ProxyFileStream(orig_stream, destFile, ext, decFile)
+        response.stream.doneDefer.addCallback(self.save_complete, url, destFile,
+                                              response.headers.getHeader('Last-Modified'),
+                                              ext, decFile)
+        response.stream.doneDefer.addErrback(self.save_error, url)
         return response
 
+    def save_complete(self, result, url, destFile, modtime = None, ext = None, decFile = None):
+        """Update the modification time and AptPackages."""
+        if modtime:
+            os.utime(destFile.path, (modtime, modtime))
+            if ext:
+                os.utime(decFile.path, (modtime, modtime))
+            
+        self.updatedFile(url, destFile.path)
+        if ext:
+            self.updatedFile(url[:-len(ext)], decFile.path)
+
     def save_error(self, failure, url):
         """An error has occurred in downloadign or saving the file."""
         log.msg('Error occurred downloading %s' % url)
@@ -206,7 +315,7 @@ class TestMirrorManager(unittest.TestCase):
         idx_path = 'http://' + self.releaseFile.replace('_','/')[:-7] + 'main/binary-i386/Packages.bz2'
 
         d = self.client.findHash(idx_path)
-        d.addCallback(self.verifyHash, idx_path, idx_hash)
+        d.addCallback(self.verifyHash, idx_path, a2b_hex(idx_hash))
 
         pkg_hash = os.popen('grep -A 30 -E "^Package: dpkg$" ' + 
                             '/var/lib/apt/lists/' + self.packagesFile + 
@@ -219,7 +328,7 @@ class TestMirrorManager(unittest.TestCase):
                             ' | cut -d\  -f 2').read().rstrip('\n')
 
         d = self.client.findHash(pkg_path)
-        d.addCallback(self.verifyHash, pkg_path, pkg_hash)
+        d.addCallback(self.verifyHash, pkg_path, a2b_hex(pkg_hash))
 
         src_dir = os.popen('grep -A 30 -E "^Package: dpkg$" ' + 
                             '/var/lib/apt/lists/' + self.sourcesFile + 
@@ -237,7 +346,7 @@ class TestMirrorManager(unittest.TestCase):
         for i in range(len(src_hashes)):
             src_path = 'http://' + self.releaseFile[:self.releaseFile.find('_dists_')+1].replace('_','/') + src_dir + '/' + src_paths[i]
             d = self.client.findHash(src_path)
-            d.addCallback(self.verifyHash, src_path, src_hashes[i])
+            d.addCallback(self.verifyHash, src_path, a2b_hex(src_hashes[i]))
             
         idx_hash = os.popen('grep -A 3000 -E "^SHA1:" ' + 
                             '/var/lib/apt/lists/' + self.releaseFile + 
@@ -246,7 +355,7 @@ class TestMirrorManager(unittest.TestCase):
         idx_path = 'http://' + self.releaseFile.replace('_','/')[:-7] + 'main/source/Sources.bz2'
 
         d = self.client.findHash(idx_path)
-        d.addCallback(self.verifyHash, idx_path, idx_hash)
+        d.addCallback(self.verifyHash, idx_path, a2b_hex(idx_hash))
 
         d.addBoth(lastDefer.callback)
         return lastDefer