2 """Hash and store hash information for a file.
4 @var PIECE_SIZE: the piece size to use for hashing pieces of files
8 from binascii import b2a_hex, a2b_hex
11 from twisted.internet import threads, defer
12 from twisted.trial import unittest
16 class HashError(ValueError):
17 """An error has occurred while hashing a file."""
20 """Manages hashes and hashing for a file.
22 @ivar ORDER: the priority ordering of hashes, and how to extract them
26 ORDER = [ {'name': 'sha1',
28 'AptPkgRecord': 'SHA1Hash',
29 'AptSrcRecord': False,
30 'AptIndexRecord': 'SHA1',
32 'hashlib_func': 'sha1',
36 'AptPkgRecord': 'SHA256Hash',
37 'AptSrcRecord': False,
38 'AptIndexRecord': 'SHA256',
39 'hashlib_func': 'sha256',
43 'AptPkgRecord': 'MD5Hash',
45 'AptIndexRecord': 'MD5SUM',
47 'hashlib_func': 'md5',
51 def __init__(self, digest = None, size = None, pieces = ''):
52 """Initialize the hash object."""
53 self.hashTypeNum = 0 # Use the first if nothing else matters
54 if sys.version_info < (2, 5):
55 # sha256 is not available in python before 2.5, remove it
56 for hashType in self.ORDER:
57 if hashType['name'] == 'sha256':
58 del self.ORDER[self.ORDER.index(hashType)]
64 self.expNormHash = None
65 self.fileHasher = None
66 self.pieceHasher = None
67 self.fileHash = digest
68 self.pieceHash = [pieces[x:x+20] for x in xrange(0, len(pieces), 20)]
71 self.fileNormHash = None
76 def new(self, force = False):
77 """Generate a new hashing object suitable for hashing a file.
79 @param force: set to True to force creating a new object even if
80 the hash has been verified already
82 if self.result is None or force:
85 self.fileHasher = self._new()
86 if self.ORDER[self.hashTypeNum]['name'] == 'sha1':
87 self.pieceHasher = None
89 self.pieceHasher = self._newSHA1()
95 self.fileNormHash = None
98 """Create a new hashing object according to the hash type."""
99 if sys.version_info < (2, 5):
100 mod = __import__(self.ORDER[self.hashTypeNum]['old_module'], globals(), locals(), [])
104 func = getattr(hashlib, self.ORDER[self.hashTypeNum]['hashlib_func'])
108 """Create a new SHA1 hashing object."""
109 if sys.version_info < (2, 5):
114 return hashlib.sha1()
116 def update(self, data):
117 """Add more data to the file hasher."""
118 if self.result is None:
120 raise HashError, "Already done, you can't add more data after calling digest() or verify()"
121 if self.fileHasher is None:
122 raise HashError, "file hasher not initialized"
124 if not self.pieceHasher and self.size + len(data) > PIECE_SIZE:
125 # Hash up to the piece size
126 self.fileHasher.update(data[:(PIECE_SIZE - self.size)])
127 data = data[(PIECE_SIZE - self.size):]
128 self.size = PIECE_SIZE
131 # Save the first piece digest and initialize a new piece hasher
132 self.pieceHash.append(self.fileHasher.digest())
133 self.pieceHasher = self._newSHA1()
136 # Loop in case the data contains multiple pieces
137 while self.pieceSize + len(data) > PIECE_SIZE:
138 # Save the piece hash and start a new one
139 self.pieceHasher.update(data[:(PIECE_SIZE - self.pieceSize)])
140 self.pieceHash.append(self.pieceHasher.digest())
141 self.pieceHasher = self._newSHA1()
143 # Don't forget to hash the data normally
144 self.fileHasher.update(data[:(PIECE_SIZE - self.pieceSize)])
145 data = data[(PIECE_SIZE - self.pieceSize):]
146 self.size += PIECE_SIZE - self.pieceSize
149 # Hash any remaining data
150 self.pieceHasher.update(data)
151 self.pieceSize += len(data)
153 self.fileHasher.update(data)
154 self.size += len(data)
156 def hashInThread(self, file):
157 """Hashes a file in a separate thread, returning a deferred that will callback with the result."""
159 if not file.exists():
160 df = defer.Deferred()
161 df.errback(HashError("file not found"))
164 df = threads.deferToThread(self._hashInThread, file)
167 def _hashInThread(self, file):
168 """Hashes a file, returning itself as the result."""
170 self.new(force = True)
178 #{ Checking hashes of data
179 def pieceDigests(self):
180 """Get the piece hashes of the added file data."""
182 return self.pieceHash
185 """Get the hash of the added file data."""
186 if self.fileHash is None:
187 if self.fileHasher is None:
188 raise HashError, "you must hash some data first"
189 self.fileHash = self.fileHasher.digest()
192 # Save the last piece hash
194 self.pieceHash.append(self.pieceHasher.digest())
198 """Get the hash of the added file data in hex format."""
199 if self.fileHex is None:
200 self.fileHex = b2a_hex(self.digest())
204 """Verify that the added file data hash matches the expected hash."""
205 if self.result is None and self.fileHash is not None and self.expHash is not None:
206 self.result = (self.fileHash == self.expHash and self.size == self.expSize)
211 """Get the expected hash."""
214 def hexexpected(self):
215 """Get the expected hash in hex format."""
216 if self.expHex is None and self.expHash is not None:
217 self.expHex = b2a_hex(self.expHash)
220 #{ Setting the expected hash
221 def set(self, hashType, hashHex, size):
222 """Initialize the hash object.
224 @param hashType: must be one of the dictionaries from L{ORDER}
226 self.hashTypeNum = self.ORDER.index(hashType) # error if not found
227 self.expHex = hashHex
228 self.expSize = int(size)
229 self.expHash = a2b_hex(self.expHex)
231 def setFromIndexRecord(self, record):
232 """Set the hash from the cache of index file records.
234 @type record: C{dictionary}
235 @param record: keys are hash types, values are tuples of (hash, size)
237 for hashType in self.ORDER:
238 result = record.get(hashType['AptIndexRecord'], None)
240 self.set(hashType, result[0], result[1])
244 def setFromPkgRecord(self, record, size):
245 """Set the hash from Apt's binary packages cache.
247 @param record: whatever is returned by apt_pkg.GetPkgRecords()
249 for hashType in self.ORDER:
250 hashHex = getattr(record, hashType['AptPkgRecord'], None)
252 self.set(hashType, hashHex, size)
256 def setFromSrcRecord(self, record):
257 """Set the hash from Apt's source package records cache.
259 Currently very simple since Apt only tracks MD5 hashes of source files.
261 @type record: (C{string}, C{int}, C{string})
262 @param record: the hash, size and path of the source file
264 for hashType in self.ORDER:
265 if hashType['AptSrcRecord']:
266 self.set(hashType, record[0], record[1])
270 class TestHashObject(unittest.TestCase):
271 """Unit tests for the hash objects."""
274 if sys.version_info < (2, 4):
277 def test_failure(self):
278 """Tests that the hash object fails when treated badly."""
280 h.set(h.ORDER[0], b2a_hex('12345678901234567890'), '0')
281 self.failUnlessRaises(HashError, h.digest)
282 self.failUnlessRaises(HashError, h.hexdigest)
283 self.failUnlessRaises(HashError, h.update, 'gfgf')
285 def test_pieces(self):
286 """Tests updating of pieces a little at a time."""
289 for i in xrange(120*1024):
290 h.update('1234567890')
291 pieces = h.pieceDigests()
292 self.failUnless(h.digest() == '1(j\xd2q\x0b\n\x91\xd2\x13\x90\x15\xa3E\xcc\xb0\x8d.\xc3\xc5')
293 self.failUnless(len(pieces) == 3)
294 self.failUnless(pieces[0] == ',G \xd8\xbbPl\xf1\xa3\xa0\x0cW\n\xe6\xe6a\xc9\x95/\xe5')
295 self.failUnless(pieces[1] == '\xf6V\xeb/\xa8\xad[\x07Z\xf9\x87\xa4\xf5w\xdf\xe1|\x00\x8e\x93')
296 self.failUnless(pieces[2] == 'M[\xbf\xee\xaa+\x19\xbaV\xf699\r\x17o\xcb\x8e\xcfP\x19')
298 def test_pieces_at_once(self):
299 """Tests the updating of multiple pieces at once."""
302 h.update('1234567890'*120*1024)
303 self.failUnless(h.digest() == '1(j\xd2q\x0b\n\x91\xd2\x13\x90\x15\xa3E\xcc\xb0\x8d.\xc3\xc5')
304 pieces = h.pieceDigests()
305 self.failUnless(len(pieces) == 3)
306 self.failUnless(pieces[0] == ',G \xd8\xbbPl\xf1\xa3\xa0\x0cW\n\xe6\xe6a\xc9\x95/\xe5')
307 self.failUnless(pieces[1] == '\xf6V\xeb/\xa8\xad[\x07Z\xf9\x87\xa4\xf5w\xdf\xe1|\x00\x8e\x93')
308 self.failUnless(pieces[2] == 'M[\xbf\xee\xaa+\x19\xbaV\xf699\r\x17o\xcb\x8e\xcfP\x19')
310 def test_pieces_boundaries(self):
311 """Tests the updating exactly to piece boundaries."""
314 h.update('1234567890'*52428)
316 assert h.size % PIECE_SIZE == 0
318 h.update('1234567890'*52428)
320 assert h.size % PIECE_SIZE == 0
322 h.update('1234567890'*18022)
323 assert h.size == 10*120*1024
324 pieces = h.pieceDigests()
325 self.failUnless(h.digest() == '1(j\xd2q\x0b\n\x91\xd2\x13\x90\x15\xa3E\xcc\xb0\x8d.\xc3\xc5')
326 self.failUnless(len(pieces) == 3)
327 self.failUnless(pieces[0] == ',G \xd8\xbbPl\xf1\xa3\xa0\x0cW\n\xe6\xe6a\xc9\x95/\xe5')
328 self.failUnless(pieces[1] == '\xf6V\xeb/\xa8\xad[\x07Z\xf9\x87\xa4\xf5w\xdf\xe1|\x00\x8e\x93')
329 self.failUnless(pieces[2] == 'M[\xbf\xee\xaa+\x19\xbaV\xf699\r\x17o\xcb\x8e\xcfP\x19')
331 def test_pieces_other_hashes(self):
332 """Tests updating of pieces a little at a time."""
334 for hashType in h.ORDER:
335 if hashType['name'] != 'sha1':
336 h.hashTypeNum = h.ORDER.index(hashType)
338 assert h.ORDER[h.hashTypeNum]['name'] != 'sha1'
340 for i in xrange(120*1024):
341 h.update('1234567890')
342 pieces = h.pieceDigests()
343 self.failUnless(len(pieces) == 3)
344 self.failUnless(pieces[0] == ',G \xd8\xbbPl\xf1\xa3\xa0\x0cW\n\xe6\xe6a\xc9\x95/\xe5')
345 self.failUnless(pieces[1] == '\xf6V\xeb/\xa8\xad[\x07Z\xf9\x87\xa4\xf5w\xdf\xe1|\x00\x8e\x93')
346 self.failUnless(pieces[2] == 'M[\xbf\xee\xaa+\x19\xbaV\xf699\r\x17o\xcb\x8e\xcfP\x19')
349 """Test hashing using the SHA1 hash."""
352 for hashType in h.ORDER:
353 if hashType['name'] == 'sha1':
356 self.failUnless(found == True)
357 h.set(hashType, '3bba0a5d97b7946ad2632002bf9caefe2cb18e00', '19')
359 h.update('apt-p2p is the best')
360 self.failUnless(h.hexdigest() == '3bba0a5d97b7946ad2632002bf9caefe2cb18e00')
361 self.failUnlessRaises(HashError, h.update, 'gfgf')
362 self.failUnless(h.verify() == True)
365 """Test hashing using the MD5 hash."""
368 for hashType in h.ORDER:
369 if hashType['name'] == 'md5':
372 self.failUnless(found == True)
373 h.set(hashType, '6b5abdd30d7ed80edd229f9071d8c23c', '19')
375 h.update('apt-p2p is the best')
376 self.failUnless(h.hexdigest() == '6b5abdd30d7ed80edd229f9071d8c23c')
377 self.failUnlessRaises(HashError, h.update, 'gfgf')
378 self.failUnless(h.verify() == True)
380 def test_sha256(self):
381 """Test hashing using the SHA256 hash."""
384 for hashType in h.ORDER:
385 if hashType['name'] == 'sha256':
388 self.failUnless(found == True)
389 h.set(hashType, '47f2238a30a0340faa2bf01a9bdc42ba77b07b411cda1e24cd8d7b5c4b7d82a7', '19')
391 h.update('apt-p2p is the best')
392 self.failUnless(h.hexdigest() == '47f2238a30a0340faa2bf01a9bdc42ba77b07b411cda1e24cd8d7b5c4b7d82a7')
393 self.failUnlessRaises(HashError, h.update, 'gfgf')
394 self.failUnless(h.verify() == True)
396 if sys.version_info < (2, 5):
397 test_sha256.skip = "SHA256 hashes are not supported by Python until version 2.5"