Add all files to the DB with their hashes.

author Cameron Dale <camrdale@gmail.com>

Mon, 21 Apr 2008 02:42:31 +0000 (19:42 -0700)

committer Cameron Dale <camrdale@gmail.com>

Mon, 21 Apr 2008 02:42:31 +0000 (19:42 -0700)
author Cameron Dale <camrdale@gmail.com>
Mon, 21 Apr 2008 02:42:31 +0000 (19:42 -0700)
committer Cameron Dale <camrdale@gmail.com>
Mon, 21 Apr 2008 02:42:31 +0000 (19:42 -0700)
diff --git a/TODO b/TODO

index 7a586fe9a2c4b15861764d4b103f9bcd22a2f5f0..72f93931b6dc9560ebdd1da2a5f2e30f6ef9c37c 100644 (file)
--- a/TODO
+++ b/TODO
@@ -1,11 +1,3 @@
-Add all cache files to the database.
-
-All files in the cache should be added to the database, so that they can
-be checked to make sure nothing has happened to them. The database would
-then need a flag to indicate files that are hashed and available, but
-that shouldn't be added to the DHT.
-
-
  Packages.diff files need to be considered.
  
  The Packages.diff/Index files contain hashes of Packages.diff/rred.gz 
  Packages.diff files need to be considered.
  
  The Packages.diff/Index files contain hashes of Packages.diff/rred.gz 
diff --git a/apt_p2p/CacheManager.py b/apt_p2p/CacheManager.py

index eda247d61335a39318521b1838b554c97e1a6010..6801e023d6e152e981e1021c3e1ee47e53ca9de9 100644 (file)
--- a/apt_p2p/CacheManager.py
+++ b/apt_p2p/CacheManager.py
@@ -320,7 +320,7 @@ class CacheManager:
                  url = 'http:/' + file.path[len(self.cache_dir.path):]
                  
              # Store the hashed file in the database
                  url = 'http:/' + file.path[len(self.cache_dir.path):]
                  
              # Store the hashed file in the database
-            new_hash = self.db.storeFile(file, result.digest(),
+            new_hash = self.db.storeFile(file, result.digest(), True,
                                           ''.join(result.pieceDigests()))
              
              # Tell the main program to handle the new cache file
                                           ''.join(result.pieceDigests()))
              
              # Tell the main program to handle the new cache file
@@ -404,27 +404,31 @@ class CacheManager:
          @param decFile: the file where the decompressed download was written to
              (optional, defaults to the file not having been compressed)
          """
          @param decFile: the file where the decompressed download was written to
              (optional, defaults to the file not having been compressed)
          """
-        if modtime:
-            os.utime(destFile.path, (modtime, modtime))
-            if decFile:
-                os.utime(decFile.path, (modtime, modtime))
-        
          result = hash.verify()
          if result or result is None:
          result = hash.verify()
          if result or result is None:
+            if modtime:
+                os.utime(destFile.path, (modtime, modtime))
+            
              if result:
                  log.msg('Hashes match: %s' % url)
              if result:
                  log.msg('Hashes match: %s' % url)
+                dht = True
              else:
                  log.msg('Hashed file to %s: %s' % (hash.hexdigest(), url))
              else:
                  log.msg('Hashed file to %s: %s' % (hash.hexdigest(), url))
+                dht = False
                  
                  
-            new_hash = self.db.storeFile(destFile, hash.digest(),
+            new_hash = self.db.storeFile(destFile, hash.digest(), dht,
                                           ''.join(hash.pieceDigests()))
                                           ''.join(hash.pieceDigests()))
-            log.msg('now avaliable: %s' % (url))
  
              if self.manager:
                  self.manager.new_cached_file(destFile, hash, new_hash, url)
  
              if self.manager:
                  self.manager.new_cached_file(destFile, hash, new_hash, url)
-                if decFile:
-                    ext_len = len(destFile.path) - len(decFile.path)
-                    self.manager.new_cached_file(decFile, None, False, url[:-ext_len])
+
+            if decFile:
+                # Hash the decompressed file and add it to the DB
+                decHash = HashObject()
+                ext_len = len(destFile.path) - len(decFile.path)
+                df = decHash.hashInThread(decFile)
+                df.addCallback(self._save_complete, url[:-ext_len], decFile, modtime)
+                df.addErrback(self._save_error, url[:-ext_len], decFile)
          else:
              log.msg("Hashes don't match %s != %s: %s" % (hash.hexexpected(), hash.hexdigest(), url))
              destFile.remove()
          else:
              log.msg("Hashes don't match %s != %s: %s" % (hash.hexexpected(), hash.hexdigest(), url))
              destFile.remove()
diff --git a/apt_p2p/HTTPServer.py b/apt_p2p/HTTPServer.py

index 5a5b00a92cc395a374d51d8b207697967968ff9b..0c17d3fd8f6c5b34c2e4e70bbf7eadbf41d05a23 100644 (file)
--- a/apt_p2p/HTTPServer.py
+++ b/apt_p2p/HTTPServer.py
@@ -30,9 +30,27 @@ class FileDownloader(static.File):
      def __init__(self, path, manager, defaultType="text/plain", ignoredExts=(), processors=None, indexNames=None):
          self.manager = manager
          super(FileDownloader, self).__init__(path, defaultType, ignoredExts, processors, indexNames)
      def __init__(self, path, manager, defaultType="text/plain", ignoredExts=(), processors=None, indexNames=None):
          self.manager = manager
          super(FileDownloader, self).__init__(path, defaultType, ignoredExts, processors, indexNames)
-        
+    
+    def locateChild(self, req, segments):
+        child, segments = super(FileDownloader, self).locateChild(req, segments)
+        # Make sure we always call renderHTTP()
+        if isinstance(child, FileDownloader):
+            return child, segments
+        else:
+            return self, server.StopTraversal
+            
      def renderHTTP(self, req):
          log.msg('Got request for %s from %s' % (req.uri, req.remoteAddr))
      def renderHTTP(self, req):
          log.msg('Got request for %s from %s' % (req.uri, req.remoteAddr))
+        
+        # Make sure the file is in the DB and unchanged
+        if self.manager and not self.manager.db.isUnchanged(self.fp):
+            if self.fp.exists() and self.fp.isfile():
+                self.fp.remove()
+            return self._renderHTTP_done(http.Response(404,
+                        {'content-type': http_headers.MimeType('text', 'html')},
+                        '<html><body><p>File found but it has changed.</body></html>'),
+                        req)
+            
          resp = super(FileDownloader, self).renderHTTP(req)
          if isinstance(resp, defer.Deferred):
              resp.addCallbacks(self._renderHTTP_done, self._renderHTTP_error,
          resp = super(FileDownloader, self).renderHTTP(req)
          if isinstance(resp, defer.Deferred):
              resp.addCallbacks(self._renderHTTP_done, self._renderHTTP_error,
diff --git a/apt_p2p/db.py b/apt_p2p/db.py

index 44e692b416d92e47122deee6ca9fc9788bdd697e..f72a3268603d2e020e73ecbe8efea75eb33fe596 100644 (file)
--- a/apt_p2p/db.py
+++ b/apt_p2p/db.py
@@ -65,7 +65,7 @@ class DB:
          self.conn = sqlite.connect(database=self.db.path, detect_types=sqlite.PARSE_DECLTYPES)
          c = self.conn.cursor()
          c.execute("CREATE TABLE files (path TEXT PRIMARY KEY UNIQUE, hashID INTEGER, " +
          self.conn = sqlite.connect(database=self.db.path, detect_types=sqlite.PARSE_DECLTYPES)
          c = self.conn.cursor()
          c.execute("CREATE TABLE files (path TEXT PRIMARY KEY UNIQUE, hashID INTEGER, " +
-                                      "size NUMBER, mtime NUMBER)")
+                                      "dht BOOL, size NUMBER, mtime NUMBER)")
          c.execute("CREATE TABLE hashes (hashID INTEGER PRIMARY KEY AUTOINCREMENT, " +
                                         "hash KHASH UNIQUE, pieces KHASH, " +
                                         "piecehash KHASH, refreshed TIMESTAMP)")
          c.execute("CREATE TABLE hashes (hashID INTEGER PRIMARY KEY AUTOINCREMENT, " +
                                         "hash KHASH UNIQUE, pieces KHASH, " +
                                         "piecehash KHASH, refreshed TIMESTAMP)")
@@ -106,13 +106,15 @@ class DB:
                  c.close()
          return res
          
                  c.close()
          return res
          
-    def storeFile(self, file, hash, pieces = ''):
+    def storeFile(self, file, hash, dht = True, pieces = ''):
          """Store or update a file in the database.
          
          @type file: L{twisted.python.filepath.FilePath}
          @param file: the file to check
          @type hash: C{string}
          @param hash: the hash of the file
          """Store or update a file in the database.
          
          @type file: L{twisted.python.filepath.FilePath}
          @param file: the file to check
          @type hash: C{string}
          @param hash: the hash of the file
+        @param dht: whether the file is added to the DHT
+            (optional, defaults to true)
          @type pieces: C{string}
          @param pieces: the concatenated list of the hashes of the pieces of
              the file (optional, defaults to the empty string)
          @type pieces: C{string}
          @param pieces: the concatenated list of the hashes of the pieces of
              the file (optional, defaults to the empty string)
@@ -143,8 +145,8 @@ class DB:
  
          # Add the file to the database
          file.restat()
  
          # Add the file to the database
          file.restat()
-        c.execute("INSERT OR REPLACE INTO files (path, hashID, size, mtime) VALUES (?, ?, ?, ?)",
-                  (file.path, hashID, file.getsize(), file.getmtime()))
+        c.execute("INSERT OR REPLACE INTO files (path, hashID, dht, size, mtime) VALUES (?, ?, ?, ?, ?)",
+                  (file.path, hashID, dht, file.getsize(), file.getmtime()))
          self.conn.commit()
          c.close()
          
          self.conn.commit()
          c.close()
          
@@ -254,20 +256,30 @@ class DB:
              res['pieces'] = row['pieces']
              row = c.fetchone()
  
              res['pieces'] = row['pieces']
              row = c.fetchone()
  
-        # Make sure there are still valid files for each hash
+        # Make sure there are still valid DHT files for each hash
          for hash in expired.values():
          for hash in expired.values():
-            valid = False
-            c.execute("SELECT path, size, mtime FROM files WHERE hashID = ?", (hash['hashID'], ))
+            dht = False
+            non_dht = False
+            c.execute("SELECT path, dht, size, mtime FROM files WHERE hashID = ?", (hash['hashID'], ))
              row = c.fetchone()
              while row:
                  res = self._removeChanged(FilePath(row['path']), row)
                  if res:
              row = c.fetchone()
              while row:
                  res = self._removeChanged(FilePath(row['path']), row)
                  if res:
-                    valid = True
+                    if row['dht']:
+                        dht = True
+                    else:
+                        non_dht = True
                  row = c.fetchone()
                  row = c.fetchone()
-            if not valid:
-                # Remove hashes for which no files are still available
+            if not dht:
+                # Remove hashes for which no DHT files are still available
                  del expired[hash['hash']]
                  del expired[hash['hash']]
-                c.execute("DELETE FROM hashes WHERE hashID = ?", (hash['hashID'], ))
+                if not non_dht:
+                    # Remove hashes for which no files are still available
+                    c.execute("DELETE FROM hashes WHERE hashID = ?", (hash['hashID'], ))
+                else:
+                    # There are still some non-DHT files available, so refresh them
+                    c.execute("UPDATE hashes SET refreshed = ? WHERE hashID = ?",
+                              (datetime.now(), hash['hashID']))
                  
          self.conn.commit()
          c.close()
                  
          self.conn.commit()
          c.close()
author	Cameron Dale <camrdale@gmail.com>
	Mon, 21 Apr 2008 02:42:31 +0000 (19:42 -0700)
committer	Cameron Dale <camrdale@gmail.com>
	Mon, 21 Apr 2008 02:42:31 +0000 (19:42 -0700)
TODO		patch \| blob \| history
apt_p2p/CacheManager.py		patch \| blob \| history
apt_p2p/HTTPServer.py		patch \| blob \| history
apt_p2p/db.py		patch \| blob \| history