from time import sleep
import os
+from twisted.python.filepath import FilePath
from twisted.trial import unittest
assert sqlite.version_info >= (2, 1)
def __init__(self, db):
self.db = db
- try:
- os.stat(db)
- except OSError:
- self._createNewDB(db)
+ self.db.restat(False)
+ if self.db.exists():
+ self._loadDB()
else:
- self._loadDB(db)
+ self._createNewDB()
self.conn.text_factory = str
self.conn.row_factory = sqlite.Row
- def _loadDB(self, db):
+ def _loadDB(self):
try:
- self.conn = sqlite.connect(database=db, detect_types=sqlite.PARSE_DECLTYPES)
+ self.conn = sqlite.connect(database=self.db.path, detect_types=sqlite.PARSE_DECLTYPES)
except:
import traceback
raise DBExcept, "Couldn't open DB", traceback.format_exc()
- def _createNewDB(self, db):
- self.conn = sqlite.connect(database=db, detect_types=sqlite.PARSE_DECLTYPES)
+ def _createNewDB(self):
+ if not self.db.parent().exists():
+ self.db.parent().makedirs()
+ self.conn = sqlite.connect(database=self.db.path, detect_types=sqlite.PARSE_DECLTYPES)
c = self.conn.cursor()
- c.execute("CREATE TABLE files (path TEXT PRIMARY KEY, hash KHASH, urldir INTEGER, dirlength INTEGER, size NUMBER, mtime NUMBER, refreshed TIMESTAMP)")
- c.execute("CREATE INDEX files_urldir ON files(urldir)")
- c.execute("CREATE INDEX files_refreshed ON files(refreshed)")
- c.execute("CREATE TABLE dirs (urldir INTEGER PRIMARY KEY AUTOINCREMENT, path TEXT)")
- c.execute("CREATE INDEX dirs_path ON dirs(path)")
+ c.execute("CREATE TABLE files (path TEXT PRIMARY KEY UNIQUE, hashID INTEGER, " +
+ "size NUMBER, mtime NUMBER)")
+ c.execute("CREATE TABLE hashes (hashID INTEGER PRIMARY KEY AUTOINCREMENT, " +
+ "hash KHASH UNIQUE, pieces KHASH, " +
+ "piecehash KHASH, refreshed TIMESTAMP)")
+ c.execute("CREATE INDEX hashes_refreshed ON hashes(refreshed)")
+ c.execute("CREATE INDEX hashes_piecehash ON hashes(piecehash)")
c.close()
self.conn.commit()
- def _removeChanged(self, path, row):
+ def _removeChanged(self, file, row):
res = None
if row:
- try:
- stat = os.stat(path)
- except:
- stat = None
- if stat:
- res = (row['size'] == stat.st_size and row['mtime'] == stat.st_mtime)
+ file.restat(False)
+ if file.exists():
+ res = (row['size'] == file.getsize() and row['mtime'] == file.getmtime())
if not res:
c = self.conn.cursor()
- c.execute("DELETE FROM files WHERE path = ?", (path, ))
+ c.execute("DELETE FROM files WHERE path = ?", (file.path, ))
self.conn.commit()
c.close()
return res
- def storeFile(self, path, hash, directory):
+ def storeFile(self, file, hash, pieces = ''):
"""Store or update a file in the database.
- @return: the urlpath to access the file, and whether a
- new url top-level directory was needed
+ @return: True if the hash was not in the database before
+ (so it needs to be added to the DHT)
"""
- path = os.path.abspath(path)
- directory = os.path.abspath(directory)
- assert path.startswith(directory)
- stat = os.stat(path)
+ piecehash = ''
+ if pieces:
+ s = sha.new().update(pieces)
+ piecehash = sha.digest()
c = self.conn.cursor()
- c.execute("SELECT dirs.urldir AS urldir, dirs.path AS directory FROM dirs JOIN files USING (urldir) WHERE files.path = ?", (path, ))
+ c.execute("SELECT hashID, piecehash FROM hashes WHERE hash = ?", (khash(hash), ))
row = c.fetchone()
- if row and directory == row['directory']:
- c.execute("UPDATE files SET hash = ?, size = ?, mtime = ?, refreshed = ?",
- (khash(hash), stat.st_size, stat.st_mtime, datetime.now()))
- newdir = False
- urldir = row['urldir']
+ if row:
+ assert piecehash == row['piecehash']
+ new_hash = False
+ hashID = row['hashID']
else:
- urldir, newdir = self.findDirectory(directory)
- c.execute("INSERT OR REPLACE INTO files VALUES(?, ?, ?, ?, ?, ?, ?)",
- (path, khash(hash), urldir, len(directory), stat.st_size, stat.st_mtime, datetime.now()))
+ c = self.conn.cursor()
+ c.execute("INSERT OR REPLACE INTO hashes (hash, pieces, piecehash, refreshed) VALUES (?, ?, ?, ?)",
+ (khash(hash), khash(pieces), khash(piecehash), datetime.now()))
+ self.conn.commit()
+ new_hash = True
+ hashID = c.lastrowid
+
+ file.restat()
+ c.execute("INSERT OR REPLACE INTO files (path, hashID, size, mtime) VALUES (?, ?, ?, ?)",
+ (file.path, hashID, file.getsize(), file.getmtime()))
self.conn.commit()
c.close()
- return '/~' + str(urldir) + path[len(directory):], newdir
- def getFile(self, path):
+ return new_hash
+
+ def getFile(self, file):
"""Get a file from the database.
If it has changed or is missing, it is removed from the database.
@return: dictionary of info for the file, False if changed, or
None if not in database or missing
"""
- path = os.path.abspath(path)
c = self.conn.cursor()
- c.execute("SELECT hash, urldir, dirlength, size, mtime FROM files WHERE path = ?", (path, ))
+ c.execute("SELECT hash, size, mtime, pieces FROM files JOIN hashes USING (hashID) WHERE path = ?", (file.path, ))
row = c.fetchone()
- res = self._removeChanged(path, row)
- if res:
- res = {}
- res['hash'] = row['hash']
- res['urlpath'] = '/~' + str(row['urldir']) + path[row['dirlength']:]
+ res = None
+ if row:
+ res = self._removeChanged(file, row)
+ if res:
+ res = {}
+ res['hash'] = row['hash']
+ res['size'] = row['size']
+ res['pieces'] = row['pieces']
c.close()
return res
- def isUnchanged(self, path):
- """Check if a file in the file system has changed.
+ def lookupHash(self, hash, filesOnly = False):
+ """Find a file by hash in the database.
- If it has changed, it is removed from the table.
+ If any found files have changed or are missing, they are removed
+ from the database. If filesOnly is False then it will also look for
+ piece string hashes if no files can be found.
- @return: True if unchanged, False if changed, None if not in database
+ @return: list of dictionaries of info for the found files
"""
- path = os.path.abspath(path)
c = self.conn.cursor()
- c.execute("SELECT size, mtime FROM files WHERE path = ?", (path, ))
+ c.execute("SELECT path, size, mtime, refreshed, pieces FROM files JOIN hashes USING (hashID) WHERE hash = ?", (khash(hash), ))
row = c.fetchone()
- return self._removeChanged(path, row)
+ files = []
+ while row:
+ file = FilePath(row['path'])
+ res = self._removeChanged(file, row)
+ if res:
+ res = {}
+ res['path'] = file
+ res['size'] = row['size']
+ res['refreshed'] = row['refreshed']
+ res['pieces'] = row['pieces']
+ files.append(res)
+ row = c.fetchone()
+
+ if not filesOnly and not files:
+ c.execute("SELECT refreshed, pieces, piecehash FROM hashes WHERE piecehash = ?", (khash(hash), ))
+ row = c.fetchone()
+ if row:
+ res = {}
+ res['refreshed'] = row['refreshed']
+ res['pieces'] = row['pieces']
+ files.append(res)
- def refreshFile(self, path):
- """Refresh the publishing time of a file.
+ c.close()
+ return files
+
+ def isUnchanged(self, file):
+ """Check if a file in the file system has changed.
- If it has changed or is missing, it is removed from the table.
+ If it has changed, it is removed from the table.
@return: True if unchanged, False if changed, None if not in database
"""
- path = os.path.abspath(path)
c = self.conn.cursor()
- c.execute("SELECT size, mtime FROM files WHERE path = ?", (path, ))
+ c.execute("SELECT size, mtime FROM files WHERE path = ?", (file.path, ))
row = c.fetchone()
- res = self._removeChanged(path, row)
- if res:
- c.execute("UPDATE files SET refreshed = ? WHERE path = ?", (datetime.now(), path))
- return res
+ return self._removeChanged(file, row)
+
+ def refreshHash(self, hash):
+ """Refresh the publishing time all files with a hash."""
+ c = self.conn.cursor()
+ c.execute("UPDATE hashes SET refreshed = ? WHERE hash = ?", (datetime.now(), khash(hash)))
+ c.close()
- def expiredFiles(self, expireAfter):
+ def expiredHashes(self, expireAfter):
"""Find files that need refreshing after expireAfter seconds.
- Also removes any entries from the table that no longer exist.
+ For each hash that needs refreshing, finds all the files with that hash.
+ If the file has changed or is missing, it is removed from the table.
- @return: dictionary with keys the hashes, values a list of url paths
+ @return: dictionary with keys the hashes, values a list of FilePaths
"""
t = datetime.now() - timedelta(seconds=expireAfter)
+
+ # First find the hashes that need refreshing
c = self.conn.cursor()
- c.execute("SELECT path, hash, urldir, dirlength, size, mtime FROM files WHERE refreshed < ?", (t, ))
+ c.execute("SELECT hashID, hash, pieces FROM hashes WHERE refreshed < ?", (t, ))
row = c.fetchone()
expired = {}
while row:
- res = self._removeChanged(row['path'], row)
- if res:
- expired.setdefault(row['hash'], []).append('/~' + str(row['urldir']) + row['path'][row['dirlength']:])
+ res = expired.setdefault(row['hash'], {})
+ res['hashID'] = row['hashID']
+ res['hash'] = row['hash']
+ res['pieces'] = row['pieces']
+ row = c.fetchone()
+
+ # Make sure there are still valid files for each hash
+ for hash in expired.values():
+ valid = False
+ c.execute("SELECT path, size, mtime FROM files WHERE hashID = ?", (hash['hashID'], ))
row = c.fetchone()
+ while row:
+ res = self._removeChanged(FilePath(row['path']), row)
+ if res:
+ valid = True
+ row = c.fetchone()
+ if not valid:
+ del expired[hash['hash']]
+ c.execute("DELETE FROM hashes WHERE hashID = ?", (hash['hashID'], ))
+
+ self.conn.commit()
c.close()
+
return expired
def removeUntrackedFiles(self, dirs):
newdirs = []
sql = "WHERE"
for dir in dirs:
- newdirs.append(os.path.abspath(dir) + os.sep + '*')
+ newdirs.append(dir.child('*').path)
sql += " path NOT GLOB ? AND"
sql = sql[:-4]
row = c.fetchone()
removed = []
while row:
- removed.append(row['path'])
+ removed.append(FilePath(row['path']))
row = c.fetchone()
if removed:
self.conn.commit()
return removed
- def findDirectory(self, directory):
- """Store or update a directory in the database.
-
- @return: the index of the url directory, and whether it is new or not
- """
- directory = os.path.abspath(directory)
- c = self.conn.cursor()
- c.execute("SELECT min(urldir) AS urldir FROM dirs WHERE path = ?", (directory, ))
- row = c.fetchone()
- c.close()
- if row['urldir']:
- return row['urldir'], False
-
- # Not found, need to add a new one
- c = self.conn.cursor()
- c.execute("INSERT INTO dirs (path) VALUES (?)", (directory, ))
- self.conn.commit()
- urldir = c.lastrowid
- c.close()
- return urldir, True
-
- def getAllDirectories(self):
- """Get all the current directories avaliable."""
- c = self.conn.cursor()
- c.execute("SELECT urldir, path FROM dirs")
- row = c.fetchone()
- dirs = {}
- while row:
- dirs['~' + str(row['urldir'])] = row['path']
- row = c.fetchone()
- c.close()
- return dirs
-
- def reconcileDirectories(self):
- """Remove any unneeded directories by checking which are used by files."""
- c = self.conn.cursor()
- c.execute('DELETE FROM dirs WHERE urldir NOT IN (SELECT DISTINCT urldir FROM files)')
- self.conn.commit()
- return bool(c.rowcount)
-
def close(self):
self.conn.close()
"""Tests for the khashmir database."""
timeout = 5
- db = '/tmp/khashmir.db'
- path = '/tmp/khashmir.test'
+ db = FilePath('/tmp/khashmir.db')
hash = '\xca\xec\xb8\x0c\x00\xe7\x07\xf8~])\x8f\x9d\xe5_B\xff\x1a\xc4!'
- directory = '/tmp/'
- urlpath = '/~1/khashmir.test'
- dirs = ['/tmp/apt-dht/top1', '/tmp/apt-dht/top2/sub1', '/tmp/apt-dht/top2/sub2/']
+ directory = FilePath('/tmp/apt-dht/')
+ file = FilePath('/tmp/apt-dht/khashmir.test')
+ testfile = 'tmp/khashmir.test'
+ dirs = [FilePath('/tmp/apt-dht/top1'),
+ FilePath('/tmp/apt-dht/top2/sub1'),
+ FilePath('/tmp/apt-dht/top2/sub2/')]
def setUp(self):
- f = open(self.path, 'w')
- f.write('fgfhds')
- f.close()
- os.utime(self.path, None)
+ if not self.file.parent().exists():
+ self.file.parent().makedirs()
+ self.file.setContent('fgfhds')
+ self.file.touch()
self.store = DB(self.db)
- self.store.storeFile(self.path, self.hash, self.directory)
+ self.store.storeFile(self.file, self.hash)
+
+ def test_openExistingDB(self):
+ self.store.close()
+ self.store = None
+ sleep(1)
+ self.store = DB(self.db)
+ res = self.store.isUnchanged(self.file)
+ self.failUnless(res)
def test_getFile(self):
- res = self.store.getFile(self.path)
+ res = self.store.getFile(self.file)
self.failUnless(res)
self.failUnlessEqual(res['hash'], self.hash)
- self.failUnlessEqual(res['urlpath'], self.urlpath)
- def test_getAllDirectories(self):
- res = self.store.getAllDirectories()
+ def test_lookupHash(self):
+ res = self.store.lookupHash(self.hash)
self.failUnless(res)
- self.failUnlessEqual(len(res.keys()), 1)
- self.failUnlessEqual(res.keys()[0], '~1')
- self.failUnlessEqual(res['~1'], os.path.abspath(self.directory))
+ self.failUnlessEqual(len(res), 1)
+ self.failUnlessEqual(res[0]['path'].path, self.file.path)
def test_isUnchanged(self):
- res = self.store.isUnchanged(self.path)
+ res = self.store.isUnchanged(self.file)
self.failUnless(res)
sleep(2)
- os.utime(self.path, None)
- res = self.store.isUnchanged(self.path)
+ self.file.touch()
+ res = self.store.isUnchanged(self.file)
self.failUnless(res == False)
- os.unlink(self.path)
- res = self.store.isUnchanged(self.path)
- self.failUnless(res == None)
+ res = self.store.isUnchanged(self.file)
+ self.failUnless(res is None)
def test_expiry(self):
- res = self.store.expiredFiles(1)
+ res = self.store.expiredHashes(1)
self.failUnlessEqual(len(res.keys()), 0)
sleep(2)
- res = self.store.expiredFiles(1)
+ res = self.store.expiredHashes(1)
self.failUnlessEqual(len(res.keys()), 1)
self.failUnlessEqual(res.keys()[0], self.hash)
- self.failUnlessEqual(len(res[self.hash]), 1)
- self.failUnlessEqual(res[self.hash][0], self.urlpath)
- res = self.store.refreshFile(self.path)
- self.failUnless(res)
- res = self.store.expiredFiles(1)
+ self.store.refreshHash(self.hash)
+ res = self.store.expiredHashes(1)
self.failUnlessEqual(len(res.keys()), 0)
def build_dirs(self):
for dir in self.dirs:
- path = os.path.join(dir, self.path[1:])
- os.makedirs(os.path.dirname(path))
- f = open(path, 'w')
- f.write(path)
- f.close()
- os.utime(path, None)
- self.store.storeFile(path, self.hash, dir)
+ file = dir.preauthChild(self.testfile)
+ if not file.parent().exists():
+ file.parent().makedirs()
+ file.setContent(file.path)
+ file.touch()
+ self.store.storeFile(file, self.hash)
+
+ def test_multipleHashes(self):
+ self.build_dirs()
+ res = self.store.expiredHashes(1)
+ self.failUnlessEqual(len(res.keys()), 0)
+ res = self.store.lookupHash(self.hash)
+ self.failUnless(res)
+ self.failUnlessEqual(len(res), 4)
+ self.failUnlessEqual(res[0]['refreshed'], res[1]['refreshed'])
+ self.failUnlessEqual(res[0]['refreshed'], res[2]['refreshed'])
+ self.failUnlessEqual(res[0]['refreshed'], res[3]['refreshed'])
+ sleep(2)
+ res = self.store.expiredHashes(1)
+ self.failUnlessEqual(len(res.keys()), 1)
+ self.failUnlessEqual(res.keys()[0], self.hash)
+ self.store.refreshHash(self.hash)
+ res = self.store.expiredHashes(1)
+ self.failUnlessEqual(len(res.keys()), 0)
def test_removeUntracked(self):
self.build_dirs()
res = self.store.removeUntrackedFiles(self.dirs)
self.failUnlessEqual(len(res), 1, 'Got removed paths: %r' % res)
- self.failUnlessEqual(res[0], self.path, 'Got removed paths: %r' % res)
+ self.failUnlessEqual(res[0], self.file, 'Got removed paths: %r' % res)
res = self.store.removeUntrackedFiles(self.dirs)
self.failUnlessEqual(len(res), 0, 'Got removed paths: %r' % res)
res = self.store.removeUntrackedFiles(self.dirs[1:])
self.failUnlessEqual(len(res), 1, 'Got removed paths: %r' % res)
- self.failUnlessEqual(res[0], os.path.join(self.dirs[0], self.path[1:]), 'Got removed paths: %r' % res)
+ self.failUnlessEqual(res[0], self.dirs[0].preauthChild(self.testfile), 'Got removed paths: %r' % res)
res = self.store.removeUntrackedFiles(self.dirs[:1])
self.failUnlessEqual(len(res), 2, 'Got removed paths: %r' % res)
- self.failUnlessIn(os.path.join(self.dirs[1], self.path[1:]), res, 'Got removed paths: %r' % res)
- self.failUnlessIn(os.path.join(self.dirs[2], self.path[1:]), res, 'Got removed paths: %r' % res)
-
- def test_reconcileDirectories(self):
- self.build_dirs()
- res = self.store.getAllDirectories()
- self.failUnless(res)
- self.failUnlessEqual(len(res.keys()), 4)
- res = self.store.reconcileDirectories()
- self.failUnlessEqual(res, False)
- res = self.store.getAllDirectories()
- self.failUnless(res)
- self.failUnlessEqual(len(res.keys()), 4)
- res = self.store.removeUntrackedFiles(self.dirs)
- res = self.store.reconcileDirectories()
- self.failUnlessEqual(res, True)
- res = self.store.getAllDirectories()
- self.failUnless(res)
- self.failUnlessEqual(len(res.keys()), 3)
- res = self.store.removeUntrackedFiles(self.dirs[:1])
- res = self.store.reconcileDirectories()
- self.failUnlessEqual(res, True)
- res = self.store.getAllDirectories()
- self.failUnless(res)
- self.failUnlessEqual(len(res.keys()), 1)
- res = self.store.removeUntrackedFiles(['/what'])
- res = self.store.reconcileDirectories()
- self.failUnlessEqual(res, True)
- res = self.store.getAllDirectories()
- self.failUnlessEqual(len(res.keys()), 0)
+ self.failUnlessIn(self.dirs[1].preauthChild(self.testfile), res, 'Got removed paths: %r' % res)
+ self.failUnlessIn(self.dirs[2].preauthChild(self.testfile), res, 'Got removed paths: %r' % res)
def tearDown(self):
- for root, dirs, files in os.walk('/tmp/apt-dht', topdown=False):
- for name in files:
- os.remove(os.path.join(root, name))
- for name in dirs:
- os.rmdir(os.path.join(root, name))
+ self.directory.remove()
self.store.close()
- os.unlink(self.db)
+ self.db.remove()
+