Use python-debian for parsing RFC 822 files (untested).
[quix0rs-apt-p2p.git] / apt_dht / AptPackages.py
index a605b2ff58c0993cb7b872a76bae3d73c611072b..1c38a003c4776ded668544d94a88167b110cea87 100644 (file)
@@ -10,13 +10,19 @@ from UserDict import DictMixin
 
 from twisted.internet import threads, defer
 from twisted.python import log
+from twisted.python.filepath import FilePath
 from twisted.trial import unittest
 
 import apt_pkg, apt_inst
 from apt import OpProgress
+from debian_bundle import deb822
+
+from Hash import HashObject
 
 apt_pkg.init()
 
+TRACKED_FILES = ['release', 'sources', 'packages']
+
 class PackageFileList(DictMixin):
     """Manages a list of package files belonging to a backend.
     
@@ -26,15 +32,16 @@ class PackageFileList(DictMixin):
     
     def __init__(self, cache_dir):
         self.cache_dir = cache_dir
-        if not os.path.exists(self.cache_dir):
-            os.makedirs(self.cache_dir)
+        self.cache_dir.restat(False)
+        if not self.cache_dir.exists():
+            self.cache_dir.makedirs()
         self.packages = None
         self.open()
 
     def open(self):
         """Open the persistent dictionary of files in this backend."""
         if self.packages is None:
-            self.packages = shelve.open(self.cache_dir+'/packages.db')
+            self.packages = shelve.open(self.cache_dir.child('packages.db').path)
 
     def close(self):
         """Close the persistent dictionary."""
@@ -48,7 +55,7 @@ class PackageFileList(DictMixin):
         fake lists and sources.list.
         """
         filename = cache_path.split('/')[-1]
-        if filename=="Packages" or filename=="Release" or filename=="Sources":
+        if filename.lower() in TRACKED_FILES:
             log.msg("Registering package file: "+cache_path)
             self.packages[cache_path] = file_path
             return True
@@ -58,7 +65,8 @@ class PackageFileList(DictMixin):
         """Check all files in the database to make sure they exist."""
         files = self.packages.keys()
         for f in files:
-            if not os.path.exists(self.packages[f]):
+            self.packages[f].restat(False)
+            if not self.packages[f].exists():
                 log.msg("File in packages database has been deleted: "+f)
                 del self.packages[f]
 
@@ -120,19 +128,16 @@ class AptPackages:
         self.apt_config = deepcopy(self.DEFAULT_APT_CONFIG)
 
         for dir in self.essential_dirs:
-            path = os.path.join(self.cache_dir, dir)
-            if not os.path.exists(path):
-                os.makedirs(path)
+            path = self.cache_dir.preauthChild(dir)
+            if not path.exists():
+                path.makedirs()
         for file in self.essential_files:
-            path = os.path.join(self.cache_dir, file)
-            if not os.path.exists(path):
-                f = open(path,'w')
-                f.close()
-                del f
+            path = self.cache_dir.preauthChild(file)
+            if not path.exists():
+                path.touch()
                 
-        self.apt_config['Dir'] = self.cache_dir
-        self.apt_config['Dir::State::status'] = os.path.join(self.cache_dir, 
-                      self.apt_config['Dir::State'], self.apt_config['Dir::State::status'])
+        self.apt_config['Dir'] = self.cache_dir.path
+        self.apt_config['Dir::State::status'] = self.cache_dir.preauthChild(self.apt_config['Dir::State']).preauthChild(self.apt_config['Dir::State::status']).path
         self.packages = PackageFileList(cache_dir)
         self.loaded = 0
         self.loading = None
@@ -148,31 +153,13 @@ class AptPackages:
         self.indexrecords[cache_path] = {}
 
         read_packages = False
-        f = open(file_path, 'r')
+        f = file_path.open('r')
         
-        for line in f:
-            line = line.rstrip()
-    
-            if line[:1] != " ":
-                read_packages = False
-                try:
-                    # Read the various headers from the file
-                    h, v = line.split(":", 1)
-                    if h == "MD5Sum" or h == "SHA1" or h == "SHA256":
-                        read_packages = True
-                        hash_type = h
-                except:
-                    # Bad header line, just ignore it
-                    log.msg("WARNING: Ignoring badly formatted Release line: %s" % line)
-    
-                # Skip to the next line
-                continue
+        rel = deb822.Release(f, fields = ['MD5Sum', 'SHA1', 'SHA256'])
+        for hash_type in rel:
+            for file in rel[hash_type]:
+                self.indexrecords[cache_path].setdefault(file['name'], {})[hash_type.upper()] = (file['hash_type'], file['size'])
             
-            # Read file names from the multiple hash sections of the file
-            if read_packages:
-                p = line.split()
-                self.indexrecords[cache_path].setdefault(p[2], {})[hash_type] = (p[0], p[1])
-        
         f.close()
 
     def file_updated(self, cache_path, file_path):
@@ -200,54 +187,58 @@ class AptPackages:
         """Regenerates the fake configuration and load the packages cache."""
         if self.loaded: return True
         apt_pkg.InitSystem()
-        rmtree(os.path.join(self.cache_dir, self.apt_config['Dir::State'], 
-                            self.apt_config['Dir::State::Lists']))
-        os.makedirs(os.path.join(self.cache_dir, self.apt_config['Dir::State'], 
-                                 self.apt_config['Dir::State::Lists'], 'partial'))
-        sources_filename = os.path.join(self.cache_dir, self.apt_config['Dir::Etc'], 
-                                        self.apt_config['Dir::Etc::sourcelist'])
-        sources = open(sources_filename, 'w')
+        self.cache_dir.preauthChild(self.apt_config['Dir::State']
+                     ).preauthChild(self.apt_config['Dir::State::Lists']).remove()
+        self.cache_dir.preauthChild(self.apt_config['Dir::State']
+                     ).preauthChild(self.apt_config['Dir::State::Lists']
+                     ).child('partial').makedirs()
+        sources_file = self.cache_dir.preauthChild(self.apt_config['Dir::Etc']
+                               ).preauthChild(self.apt_config['Dir::Etc::sourcelist'])
+        sources = sources_file.open('w')
         sources_count = 0
+        deb_src_added = False
         self.packages.check_files()
         self.indexrecords = {}
         for f in self.packages:
             # we should probably clear old entries from self.packages and
             # take into account the recorded mtime as optimization
-            filepath = self.packages[f]
+            file = self.packages[f]
             if f.split('/')[-1] == "Release":
-                self.addRelease(f, filepath)
+                self.addRelease(f, file)
             fake_uri='http://apt-dht'+f
             fake_dirname = '/'.join(fake_uri.split('/')[:-1])
             if f.endswith('Sources'):
+                deb_src_added = True
                 source_line='deb-src '+fake_dirname+'/ /'
             else:
                 source_line='deb '+fake_dirname+'/ /'
-            listpath=(os.path.join(self.cache_dir, self.apt_config['Dir::State'], 
-                                   self.apt_config['Dir::State::Lists'], 
-                                   apt_pkg.URItoFileName(fake_uri)))
+            listpath = self.cache_dir.preauthChild(self.apt_config['Dir::State']
+                                    ).preauthChild(self.apt_config['Dir::State::Lists']
+                                    ).child(apt_pkg.URItoFileName(fake_uri))
             sources.write(source_line+'\n')
             log.msg("Sources line: " + source_line)
             sources_count = sources_count + 1
 
-            try:
+            if listpath.exists():
                 #we should empty the directory instead
-                os.unlink(listpath)
-            except:
-                pass
-            os.symlink(filepath, listpath)
+                listpath.remove()
+            os.symlink(file.path, listpath.path)
         sources.close()
 
         if sources_count == 0:
-            log.msg("No Packages files available for %s backend"%(self.cache_dir))
+            log.msg("No Packages files available for %s backend"%(self.cache_dir.path))
             return False
 
-        log.msg("Loading Packages database for "+self.cache_dir)
+        log.msg("Loading Packages database for "+self.cache_dir.path)
         for key, value in self.apt_config.items():
             apt_pkg.Config[key] = value
 
         self.cache = apt_pkg.GetCache(OpProgress())
         self.records = apt_pkg.GetPkgRecords(self.cache)
-        self.srcrecords = apt_pkg.GetPkgSrcRecords()
+        if deb_src_added:
+            self.srcrecords = apt_pkg.GetPkgSrcRecords()
+        else:
+            self.srcrecords = None
 
         self.loaded = 1
         return True
@@ -275,9 +266,16 @@ class AptPackages:
 
         deferLoad = self.load()
         deferLoad.addCallback(self._findHash, path, d)
+        deferLoad.addErrback(self._findHash_error, path, d)
         
         return d
 
+    def _findHash_error(self, failure, path, d):
+        """An error occurred while trying to find a hash."""
+        log.msg('An error occurred while looking up a hash for: %s' % path)
+        log.err(failure)
+        d.callback(HashObject())
+
     def _findHash(self, loadResult, path, d):
         """Really find the hash for a path.
         
@@ -285,7 +283,7 @@ class AptPackages:
         function are pending.
         """
         if not loadResult:
-            d.callback((None, None))
+            d.callback(HashObject())
             return loadResult
         
         # First look for the path in the cache of index files
@@ -293,7 +291,9 @@ class AptPackages:
             if path.startswith(release[:-7]):
                 for indexFile in self.indexrecords[release]:
                     if release[:-7] + indexFile == path:
-                        d.callback(self.indexrecords[release][indexFile]['SHA1'])
+                        h = HashObject()
+                        h.setFromIndexRecord(self.indexrecords[release][indexFile])
+                        d.callback(h)
                         return loadResult
         
         package = path.split('/')[-1].split('_')[0]
@@ -305,20 +305,25 @@ class AptPackages:
                 for verFile in version.FileList:
                     if self.records.Lookup(verFile):
                         if '/' + self.records.FileName == path:
-                            d.callback((self.records.SHA1Hash, size))
+                            h = HashObject()
+                            h.setFromPkgRecord(self.records, size)
+                            d.callback(h)
                             return loadResult
         except KeyError:
             pass
 
         # Check the source packages' files
-        self.srcrecords.Restart()
-        if self.srcrecords.Lookup(package):
-            for f in self.srcrecords.Files:
-                if path == '/' + f[2]:
-                    d.callback((f[0], f[1]))
-                    return loadResult
+        if self.srcrecords:
+            self.srcrecords.Restart()
+            if self.srcrecords.Lookup(package):
+                for f in self.srcrecords.Files:
+                    if path == '/' + f[2]:
+                        h = HashObject()
+                        h.setFromSrcRecord(f)
+                        d.callback(h)
+                        return loadResult
         
-        d.callback((None, None))
+        d.callback(HashObject())
         return loadResult
 
 class TestAptPackages(unittest.TestCase):
@@ -332,7 +337,7 @@ class TestAptPackages(unittest.TestCase):
     releaseFile = ''
     
     def setUp(self):
-        self.client = AptPackages('/tmp/.apt-dht')
+        self.client = AptPackages(FilePath('/tmp/.apt-dht'))
     
         self.packagesFile = os.popen('ls -Sr /var/lib/apt/lists/ | grep -E "_main_.*Packages$" | tail -n 1').read().rstrip('\n')
         self.sourcesFile = os.popen('ls -Sr /var/lib/apt/lists/ | grep -E "_main_.*Sources$" | tail -n 1').read().rstrip('\n')
@@ -342,11 +347,11 @@ class TestAptPackages(unittest.TestCase):
                 break
         
         self.client.file_updated(self.releaseFile[self.releaseFile.find('_dists_'):].replace('_','/'), 
-                                 '/var/lib/apt/lists/' + self.releaseFile)
+                                 FilePath('/var/lib/apt/lists/' + self.releaseFile))
         self.client.file_updated(self.packagesFile[self.packagesFile.find('_dists_'):].replace('_','/'), 
-                                 '/var/lib/apt/lists/' + self.packagesFile)
+                                 FilePath('/var/lib/apt/lists/' + self.packagesFile))
         self.client.file_updated(self.sourcesFile[self.sourcesFile.find('_dists_'):].replace('_','/'), 
-                                 '/var/lib/apt/lists/' + self.sourcesFile)
+                                 FilePath('/var/lib/apt/lists/' + self.sourcesFile))
     
     def test_pkg_hash(self):
         self.client._load()
@@ -377,7 +382,7 @@ class TestAptPackages(unittest.TestCase):
     def test_index_hash(self):
         self.client._load()
 
-        indexhash = self.client.indexrecords[self.releaseFile[self.releaseFile.find('_dists_')-6:].replace('_','/')]['main/binary-i386/Packages.bz2']['SHA1'][0]
+        indexhash = self.client.indexrecords[self.releaseFile[self.releaseFile.find('_dists_'):].replace('_','/')]['main/binary-i386/Packages.bz2']['SHA1'][0]
 
         idx_hash = os.popen('grep -A 3000 -E "^SHA1:" ' + 
                             '/var/lib/apt/lists/' + self.releaseFile + 
@@ -387,8 +392,8 @@ class TestAptPackages(unittest.TestCase):
         self.failUnless(indexhash == idx_hash, "Hashes don't match: %s != %s" % (indexhash, idx_hash))
 
     def verifyHash(self, found_hash, path, true_hash):
-        self.failUnless(found_hash[0] == true_hash, 
-                    "%s hashes don't match: %s != %s" % (path, found_hash[0], true_hash))
+        self.failUnless(found_hash.hexexpected() == true_hash, 
+                    "%s hashes don't match: %s != %s" % (path, found_hash.hexexpected(), true_hash))
 
     def test_findIndexHash(self):
         lastDefer = defer.Deferred()
@@ -491,7 +496,7 @@ class TestAptPackages(unittest.TestCase):
                             '/var/lib/apt/lists/' + self.releaseFile + 
                             ' | grep -E " main/source/Sources.bz2$"'
                             ' | head -n 1 | cut -d\  -f 2').read().rstrip('\n')
-        idx_path = self.releaseFile[self.releaseFile.find('_dists_')-6:].replace('_','/')[:-7] + 'main/source/Sources.bz2'
+        idx_path = '/' + self.releaseFile[self.releaseFile.find('_dists_')+1:].replace('_','/')[:-7] + 'main/source/Sources.bz2'
 
         d = self.client.findHash(idx_path)
         d.addCallback(self.verifyHash, idx_path, idx_hash)