2 from binascii import b2a_hex, a2b_hex
5 from twisted.internet import threads, defer
6 from twisted.trial import unittest
10 class HashError(ValueError):
11 """An error has occurred while hashing a file."""
14 """Manages hashes and hashing for a file."""
16 """The priority ordering of hashes, and how to extract them."""
17 ORDER = [ {'name': 'sha1',
18 'AptPkgRecord': 'SHA1Hash',
19 'AptSrcRecord': False,
20 'AptIndexRecord': 'SHA1',
22 'hashlib_func': 'sha1',
25 'AptPkgRecord': 'SHA256Hash',
26 'AptSrcRecord': False,
27 'AptIndexRecord': 'SHA256',
28 'hashlib_func': 'sha256',
31 'AptPkgRecord': 'MD5Hash',
33 'AptIndexRecord': 'MD5SUM',
35 'hashlib_func': 'md5',
39 def __init__(self, digest = None, size = None):
40 self.hashTypeNum = 0 # Use the first if nothing else matters
44 self.expNormHash = None
45 self.fileHasher = None
46 self.pieceHasher = None
47 self.fileHash = digest
51 self.fileNormHash = None
54 if sys.version_info < (2, 5):
55 # sha256 is not available in python before 2.5, remove it
56 for hashType in self.ORDER:
57 if hashType['name'] == 'sha256':
58 del self.ORDER[self.ORDER.index(hashType)]
61 def _norm_hash(self, hashString, bits=None, bytes=None):
63 bytes = (bits - 1) // 8 + 1
66 raise HashError, "you must specify one of bits or bytes"
67 if len(hashString) < bytes:
68 hashString = hashString + '\000'*(bytes - len(hashString))
69 elif len(hashString) > bytes:
70 hashString = hashString[:bytes]
73 #### Methods for returning the expected hash
75 """Get the expected hash."""
78 def hexexpected(self):
79 """Get the expected hash in hex format."""
80 if self.expHex is None and self.expHash is not None:
81 self.expHex = b2a_hex(self.expHash)
84 def normexpected(self, bits=None, bytes=None):
85 """Normalize the binary hash for the given length.
87 You must specify one of bits or bytes.
89 if self.expNormHash is None and self.expHash is not None:
90 self.expNormHash = self._norm_hash(self.expHash, bits, bytes)
91 return self.expNormHash
93 #### Methods for hashing data
94 def new(self, force = False):
95 """Generate a new hashing object suitable for hashing a file.
97 @param force: set to True to force creating a new hasher even if
98 the hash has been verified already
100 if self.result is None or force == True:
103 self.fileHasher = self._new()
104 self.pieceHasher = None
109 self.fileNormHash = None
112 """Create a new hashing object according to the hash type."""
113 if sys.version_info < (2, 5):
114 mod = __import__(self.ORDER[self.hashTypeNum]['old_module'], globals(), locals(), [])
118 func = getattr(hashlib, self.ORDER[self.hashTypeNum]['hashlib_func'])
121 def update(self, data):
122 """Add more data to the file hasher."""
123 if self.result is None:
125 raise HashError, "Already done, you can't add more data after calling digest() or verify()"
126 if self.fileHasher is None:
127 raise HashError, "file hasher not initialized"
129 if not self.pieceHasher and self.size + len(data) > PIECE_SIZE:
130 # Hash up to the piece size
131 self.fileHasher.update(data[:(PIECE_SIZE - self.size)])
132 data = data[(PIECE_SIZE - self.size):]
133 self.size = PIECE_SIZE
135 # Save the first piece digest and initialize a new piece hasher
136 self.pieceHash.append(self.fileHasher.digest())
137 self.pieceHasher = self._new()
140 # Loop in case the data contains multiple pieces
141 piece_size = self.size % PIECE_SIZE
142 while piece_size + len(data) > PIECE_SIZE:
143 # Save the piece hash and start a new one
144 self.pieceHasher.update(data[:(PIECE_SIZE - piece_size)])
145 self.pieceHash.append(self.pieceHasher.digest())
146 self.pieceHasher = self._new()
148 # Don't forget to hash the data normally
149 self.fileHasher.update(data[:(PIECE_SIZE - piece_size)])
150 data = data[(PIECE_SIZE - piece_size):]
151 self.size += PIECE_SIZE - piece_size
152 piece_size = self.size % PIECE_SIZE
154 # Hash any remaining data
155 self.pieceHasher.update(data)
157 self.fileHasher.update(data)
158 self.size += len(data)
160 def pieceDigests(self):
161 """Get the piece hashes of the added file data."""
163 return self.pieceHash
166 """Get the hash of the added file data."""
167 if self.fileHash is None:
168 if self.fileHasher is None:
169 raise HashError, "you must hash some data first"
170 self.fileHash = self.fileHasher.digest()
173 # Save the last piece hash
175 self.pieceHash.append(self.pieceHasher.digest())
177 # If there are no piece hashes, then the file hash is the only piece hash
178 self.pieceHash.append(self.fileHash)
182 """Get the hash of the added file data in hex format."""
183 if self.fileHex is None:
184 self.fileHex = b2a_hex(self.digest())
187 def norm(self, bits=None, bytes=None):
188 """Normalize the binary hash for the given length.
190 You must specify one of bits or bytes.
192 if self.fileNormHash is None:
193 self.fileNormHash = self._norm_hash(self.digest(), bits, bytes)
194 return self.fileNormHash
197 """Verify that the added file data hash matches the expected hash."""
198 if self.result is None and self.fileHash is not None and self.expHash is not None:
199 self.result = (self.fileHash == self.expHash and self.size == self.expSize)
202 def hashInThread(self, file):
203 """Hashes a file in a separate thread, callback with the result."""
205 if not file.exists():
206 df = defer.Deferred()
207 df.errback(HashError("file not found"))
210 df = threads.deferToThread(self._hashInThread, file)
213 def _hashInThread(self, file):
214 """Hashes a file, returning itself as the result."""
216 self.new(force = True)
224 #### Methods for setting the expected hash
225 def set(self, hashType, hashHex, size):
226 """Initialize the hash object.
228 @param hashType: must be one of the dictionaries from L{ORDER}
230 self.hashTypeNum = self.ORDER.index(hashType) # error if not found
231 self.expHex = hashHex
232 self.expSize = int(size)
233 self.expHash = a2b_hex(self.expHex)
235 def setFromIndexRecord(self, record):
236 """Set the hash from the cache of index file records.
238 @type record: C{dictionary}
239 @param record: keys are hash types, values are tuples of (hash, size)
241 for hashType in self.ORDER:
242 result = record.get(hashType['AptIndexRecord'], None)
244 self.set(hashType, result[0], result[1])
248 def setFromPkgRecord(self, record, size):
249 """Set the hash from Apt's binary packages cache.
251 @param record: whatever is returned by apt_pkg.GetPkgRecords()
253 for hashType in self.ORDER:
254 hashHex = getattr(record, hashType['AptPkgRecord'], None)
256 self.set(hashType, hashHex, size)
260 def setFromSrcRecord(self, record):
261 """Set the hash from Apt's source package records cache.
263 Currently very simple since Apt only tracks MD5 hashes of source files.
265 @type record: (C{string}, C{int}, C{string})
266 @param record: the hash, size and path of the source file
268 for hashType in self.ORDER:
269 if hashType['AptSrcRecord']:
270 self.set(hashType, record[0], record[1])
274 class TestHashObject(unittest.TestCase):
275 """Unit tests for the hash objects."""
278 if sys.version_info < (2, 4):
281 def test_normalize(self):
283 h.set(h.ORDER[0], b2a_hex('12345678901234567890'), '0')
284 self.failUnless(h.normexpected(bits = 160) == '12345678901234567890')
286 h.set(h.ORDER[0], b2a_hex('12345678901234567'), '0')
287 self.failUnless(h.normexpected(bits = 160) == '12345678901234567\000\000\000')
289 h.set(h.ORDER[0], b2a_hex('1234567890123456789012345'), '0')
290 self.failUnless(h.normexpected(bytes = 20) == '12345678901234567890')
292 h.set(h.ORDER[0], b2a_hex('1234567890123456789'), '0')
293 self.failUnless(h.normexpected(bytes = 20) == '1234567890123456789\000')
295 h.set(h.ORDER[0], b2a_hex('123456789012345678901'), '0')
296 self.failUnless(h.normexpected(bits = 160) == '12345678901234567890')
298 def test_failure(self):
300 h.set(h.ORDER[0], b2a_hex('12345678901234567890'), '0')
301 self.failUnlessRaises(HashError, h.normexpected)
302 self.failUnlessRaises(HashError, h.digest)
303 self.failUnlessRaises(HashError, h.hexdigest)
304 self.failUnlessRaises(HashError, h.update, 'gfgf')
306 def test_pieces(self):
309 h.update('1234567890'*120*1024)
310 self.failUnless(h.digest() == '1(j\xd2q\x0b\n\x91\xd2\x13\x90\x15\xa3E\xcc\xb0\x8d.\xc3\xc5')
311 pieces = h.pieceDigests()
312 self.failUnless(len(pieces) == 3)
313 self.failUnless(pieces[0] == ',G \xd8\xbbPl\xf1\xa3\xa0\x0cW\n\xe6\xe6a\xc9\x95/\xe5')
314 self.failUnless(pieces[1] == '\xf6V\xeb/\xa8\xad[\x07Z\xf9\x87\xa4\xf5w\xdf\xe1|\x00\x8e\x93')
315 self.failUnless(pieces[2] == 'M[\xbf\xee\xaa+\x19\xbaV\xf699\r\x17o\xcb\x8e\xcfP\x19')
317 for i in xrange(120*1024):
318 h.update('1234567890')
319 pieces = h.pieceDigests()
320 self.failUnless(h.digest() == '1(j\xd2q\x0b\n\x91\xd2\x13\x90\x15\xa3E\xcc\xb0\x8d.\xc3\xc5')
321 self.failUnless(len(pieces) == 3)
322 self.failUnless(pieces[0] == ',G \xd8\xbbPl\xf1\xa3\xa0\x0cW\n\xe6\xe6a\xc9\x95/\xe5')
323 self.failUnless(pieces[1] == '\xf6V\xeb/\xa8\xad[\x07Z\xf9\x87\xa4\xf5w\xdf\xe1|\x00\x8e\x93')
324 self.failUnless(pieces[2] == 'M[\xbf\xee\xaa+\x19\xbaV\xf699\r\x17o\xcb\x8e\xcfP\x19')
329 for hashType in h.ORDER:
330 if hashType['name'] == 'sha1':
333 self.failUnless(found == True)
334 h.set(hashType, 'c722df87e1acaa64b27aac4e174077afc3623540', '19')
336 h.update('apt-dht is the best')
337 self.failUnless(h.hexdigest() == 'c722df87e1acaa64b27aac4e174077afc3623540')
338 self.failUnlessRaises(HashError, h.update, 'gfgf')
339 self.failUnless(h.verify() == True)
344 for hashType in h.ORDER:
345 if hashType['name'] == 'md5':
348 self.failUnless(found == True)
349 h.set(hashType, '2a586bcd1befc5082c872dcd96a01403', '19')
351 h.update('apt-dht is the best')
352 self.failUnless(h.hexdigest() == '2a586bcd1befc5082c872dcd96a01403')
353 self.failUnlessRaises(HashError, h.update, 'gfgf')
354 self.failUnless(h.verify() == True)
356 def test_sha256(self):
359 for hashType in h.ORDER:
360 if hashType['name'] == 'sha256':
363 self.failUnless(found == True)
364 h.set(hashType, '55b971f64d9772f733de03f23db39224f51a455cc5ad4c2db9d5740d2ab259a7', '19')
366 h.update('apt-dht is the best')
367 self.failUnless(h.hexdigest() == '55b971f64d9772f733de03f23db39224f51a455cc5ad4c2db9d5740d2ab259a7')
368 self.failUnlessRaises(HashError, h.update, 'gfgf')
369 self.failUnless(h.verify() == True)
371 if sys.version_info < (2, 5):
372 test_sha256.skip = "SHA256 hashes are not supported by Python until version 2.5"