Unload the AptPackages caches after a period of inactivity.
[quix0rs-apt-p2p.git] / apt_dht / MirrorManager.py
index d56e16da16520f259fb1d12b8f44a7f6bac2b537..738fdebc4d7e668d6dee6e42ee7ce2860c68ceb7 100644 (file)
 
-from bz2 import BZ2Decompressor
-from zlib import decompressobj, MAX_WBITS
-from gzip import FCOMMENT, FEXTRA, FHCRC, FNAME, FTEXT
 from urlparse import urlparse
-from binascii import a2b_hex, b2a_hex
-import os, sha, md5
+import os
 
-from twisted.python import log, filepath
+from twisted.python import log
+from twisted.python.filepath import FilePath
 from twisted.internet import defer
 from twisted.trial import unittest
-from twisted.web2 import stream
 from twisted.web2.http import splitHostPort
 
 from AptPackages import AptPackages
 
-aptpkg_dir='.apt-dht'
-
-DECOMPRESS_EXTS = ['.gz', '.bz2']
-DECOMPRESS_FILES = ['release', 'sources', 'packages']
+aptpkg_dir='apt-packages'
 
 class MirrorError(Exception):
     """Exception raised when there's a problem with the mirror."""
 
-class ProxyFileStream(stream.SimpleStream):
-    """Saves a stream to a file while providing a new stream."""
-    
-    def __init__(self, stream, outFile, hashType = "sha1", decompress = None, decFile = None):
-        """Initializes the proxy.
-        
-        @type stream: C{twisted.web2.stream.IByteStream}
-        @param stream: the input stream to read from
-        @type outFile: C{twisted.python.filepath.FilePath}
-        @param outFile: the file to write to
-        @type hashType: C{string}
-        @param hashType: also hash the file using this hashing function
-            (currently only 'sha1' and 'md5' are supported)
-        @type decompress: C{string}
-        @param decompress: also decompress the file as this type
-            (currently only '.gz' and '.bz2' are supported)
-        @type decFile: C{twisted.python.filepath.FilePath}
-        @param decFile: the file to write the decompressed data to
-        """
-        self.stream = stream
-        self.outFile = outFile.open('w')
-        self.hasher = None
-        if hashType == "sha1":
-            self.hasher = sha.new()
-        elif hashType == "md5":
-            self.hasher = md5.new()
-        self.gzfile = None
-        self.bz2file = None
-        if decompress == ".gz":
-            self.gzheader = True
-            self.gzfile = decFile.open('w')
-            self.gzdec = decompressobj(-MAX_WBITS)
-        elif decompress == ".bz2":
-            self.bz2file = decFile.open('w')
-            self.bz2dec = BZ2Decompressor()
-        self.length = self.stream.length
-        self.start = 0
-        self.doneDefer = defer.Deferred()
-
-    def _done(self):
-        """Close the output file."""
-        if not self.outFile.closed:
-            self.outFile.close()
-            fileHash = None
-            if self.hasher:
-                fileHash = self.hasher.digest()
-            if self.gzfile:
-                data_dec = self.gzdec.flush()
-                self.gzfile.write(data_dec)
-                self.gzfile.close()
-                self.gzfile = None
-            if self.bz2file:
-                self.bz2file.close()
-                self.bz2file = None
-                
-            self.doneDefer.callback(fileHash)
-    
-    def read(self):
-        """Read some data from the stream."""
-        if self.outFile.closed:
-            return None
-        
-        data = self.stream.read()
-        if isinstance(data, defer.Deferred):
-            data.addCallbacks(self._write, self._done)
-            return data
-        
-        self._write(data)
-        return data
-    
-    def _write(self, data):
-        """Write the stream data to the file and return it for others to use."""
-        if data is None:
-            self._done()
-            return data
-        
-        self.outFile.write(data)
-        if self.hasher:
-            self.hasher.update(data)
-        if self.gzfile:
-            if self.gzheader:
-                self.gzheader = False
-                new_data = self._remove_gzip_header(data)
-                dec_data = self.gzdec.decompress(new_data)
-            else:
-                dec_data = self.gzdec.decompress(data)
-            self.gzfile.write(dec_data)
-        if self.bz2file:
-            dec_data = self.bz2dec.decompress(data)
-            self.bz2file.write(dec_data)
-        return data
-    
-    def _remove_gzip_header(self, data):
-        if data[:2] != '\037\213':
-            raise IOError, 'Not a gzipped file'
-        if ord(data[2]) != 8:
-            raise IOError, 'Unknown compression method'
-        flag = ord(data[3])
-        # modtime = self.fileobj.read(4)
-        # extraflag = self.fileobj.read(1)
-        # os = self.fileobj.read(1)
-
-        skip = 10
-        if flag & FEXTRA:
-            # Read & discard the extra field, if present
-            xlen = ord(data[10])
-            xlen = xlen + 256*ord(data[11])
-            skip = skip + 2 + xlen
-        if flag & FNAME:
-            # Read and discard a null-terminated string containing the filename
-            while True:
-                if not data[skip] or data[skip] == '\000':
-                    break
-                skip += 1
-            skip += 1
-        if flag & FCOMMENT:
-            # Read and discard a null-terminated string containing a comment
-            while True:
-                if not data[skip] or data[skip] == '\000':
-                    break
-                skip += 1
-            skip += 1
-        if flag & FHCRC:
-            skip += 2     # Read & discard the 16-bit header CRC
-        return data[skip:]
-
-    def close(self):
-        """Clean everything up and return None to future reads."""
-        self.length = 0
-        self._done()
-        self.stream.close()
-
 class MirrorManager:
     """Manages all requests for mirror objects."""
     
-    def __init__(self, cache_dir):
+    def __init__(self, cache_dir, unload_delay):
         self.cache_dir = cache_dir
-        self.cache = filepath.FilePath(self.cache_dir)
+        self.unload_delay = unload_delay
         self.apt_caches = {}
     
     def extractPath(self, url):
@@ -197,8 +58,9 @@ class MirrorManager:
             self.apt_caches[site] = {}
             
         if baseDir not in self.apt_caches[site]:
-            site_cache = os.path.join(self.cache_dir, aptpkg_dir, 'mirrors', site + baseDir.replace('/', '_'))
-            self.apt_caches[site][baseDir] = AptPackages(site_cache)
+            site_cache = self.cache_dir.child(aptpkg_dir).child('mirrors').child(site + baseDir.replace('/', '_'))
+            site_cache.makedirs
+            self.apt_caches[site][baseDir] = AptPackages(site_cache, self.unload_delay)
     
     def updatedFile(self, url, file_path):
         site, baseDir, path = self.extractPath(url)
@@ -208,83 +70,11 @@ class MirrorManager:
     def findHash(self, url):
         site, baseDir, path = self.extractPath(url)
         if site in self.apt_caches and baseDir in self.apt_caches[site]:
-            d = self.apt_caches[site][baseDir].findHash(path)
-            d.addCallback(self.translateHash)
-            return d
+            return self.apt_caches[site][baseDir].findHash(path)
         d = defer.Deferred()
         d.errback(MirrorError("Site Not Found"))
         return d
     
-    def translateHash(self, (hash, size)):
-        """Translate a hash from apt's hex encoding to a string."""
-        if hash:
-            hash = a2b_hex(hash)
-        return (hash, size)
-
-    def save_file(self, response, hash, size, url):
-        """Save a downloaded file to the cache and stream it."""
-        log.msg('Returning file: %s' % url)
-        
-        parsed = urlparse(url)
-        destFile = self.cache.preauthChild(parsed[1] + parsed[2])
-        log.msg('Saving returned %r byte file to cache: %s' % (response.stream.length, destFile.path))
-        
-        if destFile.exists():
-            log.msg('File already exists, removing: %s' % destFile.path)
-            destFile.remove()
-        else:
-            destFile.parent().makedirs()
-            
-        root, ext = os.path.splitext(destFile.basename())
-        if root.lower() in DECOMPRESS_FILES and ext.lower() in DECOMPRESS_EXTS:
-            ext = ext.lower()
-            decFile = destFile.sibling(root)
-            log.msg('Decompressing to: %s' % decFile.path)
-            if decFile.exists():
-                log.msg('File already exists, removing: %s' % decFile.path)
-                decFile.remove()
-        else:
-            ext = None
-            decFile = None
-            
-        if hash and len(hash) == 16:
-            hashType = "md5"
-        else:
-            hashType = "sha1"
-        
-        orig_stream = response.stream
-        response.stream = ProxyFileStream(orig_stream, destFile, hashType, ext, decFile)
-        response.stream.doneDefer.addCallback(self.save_complete, hash, size, url, destFile,
-                                              response.headers.getHeader('Last-Modified'),
-                                              ext, decFile)
-        response.stream.doneDefer.addErrback(self.save_error, url)
-        return response
-
-    def save_complete(self, result, hash, size, url, destFile, modtime = None, ext = None, decFile = None):
-        """Update the modification time and AptPackages."""
-        if modtime:
-            os.utime(destFile.path, (modtime, modtime))
-            if ext:
-                os.utime(decFile.path, (modtime, modtime))
-        
-        if not hash or result == hash:
-            if hash:
-                log.msg('Hashes match: %s' % url)
-            else:
-                log.msg('Hashed file to %s: %s' % (b2a_hex(result), url))
-                
-            self.updatedFile(url, destFile.path)
-            if ext:
-                self.updatedFile(url[:-len(ext)], decFile.path)
-        else:
-            log.msg("Hashes don't match %s != %s: %s" % (b2a_hex(hash), b2a_hex(result), url))
-
-    def save_error(self, failure, url):
-        """An error has occurred in downloadign or saving the file."""
-        log.msg('Error occurred downloading %s' % url)
-        log.err(failure)
-        return failure
-
 class TestMirrorManager(unittest.TestCase):
     """Unit tests for the mirror manager."""
     
@@ -293,7 +83,7 @@ class TestMirrorManager(unittest.TestCase):
     client = None
     
     def setUp(self):
-        self.client = MirrorManager('/tmp')
+        self.client = MirrorManager(FilePath('/tmp/.apt-dht'), 300)
         
     def test_extractPath(self):
         site, baseDir, path = self.client.extractPath('http://ftp.us.debian.org/debian/dists/unstable/Release')
@@ -312,8 +102,8 @@ class TestMirrorManager(unittest.TestCase):
         self.failUnless(path == "/dists/unstable/Release", "no match: %s" % path)
 
     def verifyHash(self, found_hash, path, true_hash):
-        self.failUnless(found_hash[0] == true_hash, 
-                    "%s hashes don't match: %s != %s" % (path, found_hash[0], true_hash))
+        self.failUnless(found_hash.hexexpected() == true_hash, 
+                    "%s hashes don't match: %s != %s" % (path, found_hash.hexexpected(), true_hash))
 
     def test_findHash(self):
         self.packagesFile = os.popen('ls -Sr /var/lib/apt/lists/ | grep -E "_main_.*Packages$" | tail -n 1').read().rstrip('\n')
@@ -324,13 +114,13 @@ class TestMirrorManager(unittest.TestCase):
                 break
         
         self.client.updatedFile('http://' + self.releaseFile.replace('_','/'), 
-                                '/var/lib/apt/lists/' + self.releaseFile)
+                                FilePath('/var/lib/apt/lists/' + self.releaseFile))
         self.client.updatedFile('http://' + self.releaseFile[:self.releaseFile.find('_dists_')+1].replace('_','/') +
                                 self.packagesFile[self.packagesFile.find('_dists_')+1:].replace('_','/'), 
-                                '/var/lib/apt/lists/' + self.packagesFile)
+                                FilePath('/var/lib/apt/lists/' + self.packagesFile))
         self.client.updatedFile('http://' + self.releaseFile[:self.releaseFile.find('_dists_')+1].replace('_','/') +
                                 self.sourcesFile[self.sourcesFile.find('_dists_')+1:].replace('_','/'), 
-                                '/var/lib/apt/lists/' + self.sourcesFile)
+                                FilePath('/var/lib/apt/lists/' + self.sourcesFile))
 
         lastDefer = defer.Deferred()
         
@@ -341,7 +131,7 @@ class TestMirrorManager(unittest.TestCase):
         idx_path = 'http://' + self.releaseFile.replace('_','/')[:-7] + 'main/binary-i386/Packages.bz2'
 
         d = self.client.findHash(idx_path)
-        d.addCallback(self.verifyHash, idx_path, a2b_hex(idx_hash))
+        d.addCallback(self.verifyHash, idx_path, idx_hash)
 
         pkg_hash = os.popen('grep -A 30 -E "^Package: dpkg$" ' + 
                             '/var/lib/apt/lists/' + self.packagesFile + 
@@ -354,7 +144,7 @@ class TestMirrorManager(unittest.TestCase):
                             ' | cut -d\  -f 2').read().rstrip('\n')
 
         d = self.client.findHash(pkg_path)
-        d.addCallback(self.verifyHash, pkg_path, a2b_hex(pkg_hash))
+        d.addCallback(self.verifyHash, pkg_path, pkg_hash)
 
         src_dir = os.popen('grep -A 30 -E "^Package: dpkg$" ' + 
                             '/var/lib/apt/lists/' + self.sourcesFile + 
@@ -372,7 +162,7 @@ class TestMirrorManager(unittest.TestCase):
         for i in range(len(src_hashes)):
             src_path = 'http://' + self.releaseFile[:self.releaseFile.find('_dists_')+1].replace('_','/') + src_dir + '/' + src_paths[i]
             d = self.client.findHash(src_path)
-            d.addCallback(self.verifyHash, src_path, a2b_hex(src_hashes[i]))
+            d.addCallback(self.verifyHash, src_path, src_hashes[i])
             
         idx_hash = os.popen('grep -A 3000 -E "^SHA1:" ' + 
                             '/var/lib/apt/lists/' + self.releaseFile + 
@@ -381,7 +171,7 @@ class TestMirrorManager(unittest.TestCase):
         idx_path = 'http://' + self.releaseFile.replace('_','/')[:-7] + 'main/source/Sources.bz2'
 
         d = self.client.findHash(idx_path)
-        d.addCallback(self.verifyHash, idx_path, a2b_hex(idx_hash))
+        d.addCallback(self.verifyHash, idx_path, idx_hash)
 
         d.addBoth(lastDefer.callback)
         return lastDefer