From: Cameron Dale Date: Mon, 21 Apr 2008 02:42:31 +0000 (-0700) Subject: Add all files to the DB with their hashes. X-Git-Url: https://git.mxchange.org/?a=commitdiff_plain;h=458fed367aab6b449d6a59d992fa164f7cc6579d;p=quix0rs-apt-p2p.git Add all files to the DB with their hashes. Non-DHT files are marked as such. HTTP server looks up the cache path to decide whether to return a file. --- diff --git a/TODO b/TODO index 7a586fe..72f9393 100644 --- a/TODO +++ b/TODO @@ -1,11 +1,3 @@ -Add all cache files to the database. - -All files in the cache should be added to the database, so that they can -be checked to make sure nothing has happened to them. The database would -then need a flag to indicate files that are hashed and available, but -that shouldn't be added to the DHT. - - Packages.diff files need to be considered. The Packages.diff/Index files contain hashes of Packages.diff/rred.gz diff --git a/apt_p2p/CacheManager.py b/apt_p2p/CacheManager.py index eda247d..6801e02 100644 --- a/apt_p2p/CacheManager.py +++ b/apt_p2p/CacheManager.py @@ -320,7 +320,7 @@ class CacheManager: url = 'http:/' + file.path[len(self.cache_dir.path):] # Store the hashed file in the database - new_hash = self.db.storeFile(file, result.digest(), + new_hash = self.db.storeFile(file, result.digest(), True, ''.join(result.pieceDigests())) # Tell the main program to handle the new cache file @@ -404,27 +404,31 @@ class CacheManager: @param decFile: the file where the decompressed download was written to (optional, defaults to the file not having been compressed) """ - if modtime: - os.utime(destFile.path, (modtime, modtime)) - if decFile: - os.utime(decFile.path, (modtime, modtime)) - result = hash.verify() if result or result is None: + if modtime: + os.utime(destFile.path, (modtime, modtime)) + if result: log.msg('Hashes match: %s' % url) + dht = True else: log.msg('Hashed file to %s: %s' % (hash.hexdigest(), url)) + dht = False - new_hash = self.db.storeFile(destFile, hash.digest(), + new_hash = self.db.storeFile(destFile, hash.digest(), dht, ''.join(hash.pieceDigests())) - log.msg('now avaliable: %s' % (url)) if self.manager: self.manager.new_cached_file(destFile, hash, new_hash, url) - if decFile: - ext_len = len(destFile.path) - len(decFile.path) - self.manager.new_cached_file(decFile, None, False, url[:-ext_len]) + + if decFile: + # Hash the decompressed file and add it to the DB + decHash = HashObject() + ext_len = len(destFile.path) - len(decFile.path) + df = decHash.hashInThread(decFile) + df.addCallback(self._save_complete, url[:-ext_len], decFile, modtime) + df.addErrback(self._save_error, url[:-ext_len], decFile) else: log.msg("Hashes don't match %s != %s: %s" % (hash.hexexpected(), hash.hexdigest(), url)) destFile.remove() diff --git a/apt_p2p/HTTPServer.py b/apt_p2p/HTTPServer.py index 5a5b00a..0c17d3f 100644 --- a/apt_p2p/HTTPServer.py +++ b/apt_p2p/HTTPServer.py @@ -30,9 +30,27 @@ class FileDownloader(static.File): def __init__(self, path, manager, defaultType="text/plain", ignoredExts=(), processors=None, indexNames=None): self.manager = manager super(FileDownloader, self).__init__(path, defaultType, ignoredExts, processors, indexNames) - + + def locateChild(self, req, segments): + child, segments = super(FileDownloader, self).locateChild(req, segments) + # Make sure we always call renderHTTP() + if isinstance(child, FileDownloader): + return child, segments + else: + return self, server.StopTraversal + def renderHTTP(self, req): log.msg('Got request for %s from %s' % (req.uri, req.remoteAddr)) + + # Make sure the file is in the DB and unchanged + if self.manager and not self.manager.db.isUnchanged(self.fp): + if self.fp.exists() and self.fp.isfile(): + self.fp.remove() + return self._renderHTTP_done(http.Response(404, + {'content-type': http_headers.MimeType('text', 'html')}, + '

File found but it has changed.'), + req) + resp = super(FileDownloader, self).renderHTTP(req) if isinstance(resp, defer.Deferred): resp.addCallbacks(self._renderHTTP_done, self._renderHTTP_error, diff --git a/apt_p2p/db.py b/apt_p2p/db.py index 44e692b..f72a326 100644 --- a/apt_p2p/db.py +++ b/apt_p2p/db.py @@ -65,7 +65,7 @@ class DB: self.conn = sqlite.connect(database=self.db.path, detect_types=sqlite.PARSE_DECLTYPES) c = self.conn.cursor() c.execute("CREATE TABLE files (path TEXT PRIMARY KEY UNIQUE, hashID INTEGER, " + - "size NUMBER, mtime NUMBER)") + "dht BOOL, size NUMBER, mtime NUMBER)") c.execute("CREATE TABLE hashes (hashID INTEGER PRIMARY KEY AUTOINCREMENT, " + "hash KHASH UNIQUE, pieces KHASH, " + "piecehash KHASH, refreshed TIMESTAMP)") @@ -106,13 +106,15 @@ class DB: c.close() return res - def storeFile(self, file, hash, pieces = ''): + def storeFile(self, file, hash, dht = True, pieces = ''): """Store or update a file in the database. @type file: L{twisted.python.filepath.FilePath} @param file: the file to check @type hash: C{string} @param hash: the hash of the file + @param dht: whether the file is added to the DHT + (optional, defaults to true) @type pieces: C{string} @param pieces: the concatenated list of the hashes of the pieces of the file (optional, defaults to the empty string) @@ -143,8 +145,8 @@ class DB: # Add the file to the database file.restat() - c.execute("INSERT OR REPLACE INTO files (path, hashID, size, mtime) VALUES (?, ?, ?, ?)", - (file.path, hashID, file.getsize(), file.getmtime())) + c.execute("INSERT OR REPLACE INTO files (path, hashID, dht, size, mtime) VALUES (?, ?, ?, ?, ?)", + (file.path, hashID, dht, file.getsize(), file.getmtime())) self.conn.commit() c.close() @@ -254,20 +256,30 @@ class DB: res['pieces'] = row['pieces'] row = c.fetchone() - # Make sure there are still valid files for each hash + # Make sure there are still valid DHT files for each hash for hash in expired.values(): - valid = False - c.execute("SELECT path, size, mtime FROM files WHERE hashID = ?", (hash['hashID'], )) + dht = False + non_dht = False + c.execute("SELECT path, dht, size, mtime FROM files WHERE hashID = ?", (hash['hashID'], )) row = c.fetchone() while row: res = self._removeChanged(FilePath(row['path']), row) if res: - valid = True + if row['dht']: + dht = True + else: + non_dht = True row = c.fetchone() - if not valid: - # Remove hashes for which no files are still available + if not dht: + # Remove hashes for which no DHT files are still available del expired[hash['hash']] - c.execute("DELETE FROM hashes WHERE hashID = ?", (hash['hashID'], )) + if not non_dht: + # Remove hashes for which no files are still available + c.execute("DELETE FROM hashes WHERE hashID = ?", (hash['hashID'], )) + else: + # There are still some non-DHT files available, so refresh them + c.execute("UPDATE hashes SET refreshed = ? WHERE hashID = ?", + (datetime.now(), hash['hashID'])) self.conn.commit() c.close()