2 """Functions for bencoding and bdecoding data.
4 @type decode_func: C{dictionary} of C{function}
5 @var decode_func: a dictionary of function calls to be made, based on data,
6 the keys are the first character of the data and the value is the
7 function to use to decode that data
8 @type bencached_marker: C{list}
9 @var bencached_marker: mutable type to ensure class origination
10 @type encode_func: C{dictionary} of C{function}
11 @var encode_func: a dictionary of function calls to be made, based on data,
12 the keys are the type of the data and the value is the
13 function to use to encode that data
14 @type BencachedType: C{type}
15 @var BencachedType: the L{Bencached} type
18 from types import IntType, LongType, StringType, ListType, TupleType, DictType, BooleanType
20 from types import UnicodeType
23 from datetime import datetime
26 from twisted.python import log
27 from twisted.trial import unittest
29 class BencodeError(ValueError):
33 """Bdecode an integer.
36 @param x: the data to decode
38 @param f: the offset in the data to start at
39 @rtype: C{int}, C{int}
40 @return: the bdecoded integer, and the offset to read next
41 @raise BencodeError: if the data is improperly encoded
46 newf = x.index('e', f)
53 raise BencodeError, "integer has a leading zero after a negative sign"
54 elif x[f] == '0' and newf != f+1:
55 raise BencodeError, "integer has a leading zero"
58 def decode_string(x, f):
62 @param x: the data to decode
64 @param f: the offset in the data to start at
65 @rtype: C{string}, C{int}
66 @return: the bdecoded string, and the offset to read next
67 @raise BencodeError: if the data is improperly encoded
71 colon = x.index(':', f)
74 except (OverflowError, ValueError):
76 if x[f] == '0' and colon != f+1:
77 raise BencodeError, "string length has a leading zero"
79 return (x[colon:colon+n], colon+n)
81 def decode_unicode(x, f):
82 """Bdecode a unicode string.
85 @param x: the data to decode
87 @param f: the offset in the data to start at
88 @rtype: C{int}, C{int}
89 @return: the bdecoded unicode string, and the offset to read next
93 s, f = decode_string(x, f+1)
94 return (s.decode('UTF-8'),f)
96 def decode_datetime(x, f):
97 """Bdecode a datetime value.
100 @param x: the data to decode
102 @param f: the offset in the data to start at
103 @rtype: C{datetime.datetime}, C{int}
104 @return: the bdecoded integer, and the offset to read next
105 @raise BencodeError: if the data is improperly encoded
110 newf = x.index('e', f)
112 date = datetime(*(time.strptime(x[f:newf], '%Y-%m-%dT%H:%M:%S')[0:6]))
114 raise BencodeError, "datetime value could not be decoded: %s" % x[f:newf]
115 return (date, newf+1)
117 def decode_list(x, f):
121 @param x: the data to decode
123 @param f: the offset in the data to start at
124 @rtype: C{list}, C{int}
125 @return: the bdecoded list, and the offset to read next
131 v, f = decode_func[x[f]](x, f)
135 def decode_dict(x, f):
136 """Bdecode a dictionary.
139 @param x: the data to decode
141 @param f: the offset in the data to start at
142 @rtype: C{dictionary}, C{int}
143 @return: the bdecoded dictionary, and the offset to read next
144 @raise BencodeError: if the data is improperly encoded
151 k, f = decode_string(x, f)
153 raise BencodeError, "dictionary keys must be in sorted order"
155 r[k], f = decode_func[x[f]](x, f)
159 decode_func['l'] = decode_list
160 decode_func['d'] = decode_dict
161 decode_func['i'] = decode_int
162 decode_func['0'] = decode_string
163 decode_func['1'] = decode_string
164 decode_func['2'] = decode_string
165 decode_func['3'] = decode_string
166 decode_func['4'] = decode_string
167 decode_func['5'] = decode_string
168 decode_func['6'] = decode_string
169 decode_func['7'] = decode_string
170 decode_func['8'] = decode_string
171 decode_func['9'] = decode_string
172 decode_func['u'] = decode_unicode
173 decode_func['t'] = decode_datetime
175 def bdecode(x, sloppy = 0):
176 """Bdecode a string of data.
179 @param x: the data to decode
180 @type sloppy: C{boolean}
181 @param sloppy: whether to allow errors in the decoding
183 @return: the bdecoded data
184 @raise BencodeError: if the data is improperly encoded
189 r, l = decode_func[x[0]](x, 0)
190 # except (IndexError, KeyError):
191 except (IndexError, KeyError, ValueError):
192 raise BencodeError, "bad bencoded data"
193 if not sloppy and l != len(x):
194 raise BencodeError, "bad bencoded data, all could not be decoded"
197 bencached_marker = []
200 """Dummy data structure for storing bencoded data in memory.
202 @type marker: C{list}
203 @ivar marker: mutable type to make sure the data was encoded by this class
204 @type bencoded: C{string}
205 @ivar bencoded: the bencoded data stored in a string
209 def __init__(self, s):
213 @param s: the new bencoded data to store
217 self.marker = bencached_marker
220 BencachedType = type(Bencached('')) # insufficient, but good as a filter
222 def encode_bencached(x,r):
223 """Bencode L{Bencached} data.
225 @type x: L{Bencached}
226 @param x: the data to encode
228 @param r: the currently bencoded data, to which the bencoding of x
233 assert x.marker == bencached_marker
237 """Bencode an integer.
240 @param x: the data to encode
242 @param r: the currently bencoded data, to which the bencoding of x
247 r.extend(('i',str(x),'e'))
249 def encode_bool(x,r):
250 """Bencode a boolean.
253 @param x: the data to encode
255 @param r: the currently bencoded data, to which the bencoding of x
262 def encode_string(x,r):
266 @param x: the data to encode
268 @param r: the currently bencoded data, to which the bencoding of x
273 r.extend((str(len(x)),':',x))
275 def encode_unicode(x,r):
276 """Bencode a unicode string.
279 @param x: the data to encode
281 @param r: the currently bencoded data, to which the bencoding of x
287 encode_string(x.encode('UTF-8'),r)
289 def encode_datetime(x,r):
290 """Bencode a datetime value in UTC.
292 If the datetime object has time zone info, it is converted to UTC time.
293 Otherwise it is assumed that the time is already in UTC time.
294 Microseconds are removed.
296 @type x: C{datetime.datetime}
297 @param x: the data to encode
299 @param r: the currently bencoded data, to which the bencoding of x
304 date = x.replace(microsecond = 0)
305 offset = date.utcoffset()
306 if offset is not None:
307 utcdate = date.replace(tzinfo = None) + offset
310 r.extend(('t',utcdate.isoformat(),'e'))
312 def encode_list(x,r):
316 @param x: the data to encode
318 @param r: the currently bencoded data, to which the bencoding of x
325 encode_func[type(e)](e, r)
328 def encode_dict(x,r):
329 """Bencode a dictionary.
331 @type x: C{dictionary}
332 @param x: the data to encode
334 @param r: the currently bencoded data, to which the bencoding of x
343 r.extend((str(len(k)),':',k))
344 encode_func[type(v)](v, r)
348 encode_func[BencachedType] = encode_bencached
349 encode_func[IntType] = encode_int
350 encode_func[LongType] = encode_int
351 encode_func[StringType] = encode_string
352 encode_func[ListType] = encode_list
353 encode_func[TupleType] = encode_list
354 encode_func[DictType] = encode_dict
355 encode_func[BooleanType] = encode_bool
356 encode_func[datetime] = encode_datetime
358 encode_func[UnicodeType] = encode_unicode
361 """Bencode some data.
364 @param x: the data to encode
366 @return: the bencoded data
367 @raise BencodeError: if the data contains a type that cannot be encoded
372 encode_func[type(x)](x, r)
374 raise BencodeError, "failed to bencode the data"
377 class TestBencode(unittest.TestCase):
378 """Test the bencoding and bdecoding of data."""
382 def test_bdecode_string(self):
383 self.failUnlessRaises(BencodeError, bdecode, '0:0:')
384 self.failUnlessRaises(BencodeError, bdecode, '')
385 self.failUnlessRaises(BencodeError, bdecode, '35208734823ljdahflajhdf')
386 self.failUnlessRaises(BencodeError, bdecode, '2:abfdjslhfld')
387 self.failUnlessEqual(bdecode('0:'), '')
388 self.failUnlessEqual(bdecode('3:abc'), 'abc')
389 self.failUnlessEqual(bdecode('10:1234567890'), '1234567890')
390 self.failUnlessRaises(BencodeError, bdecode, '02:xy')
391 self.failUnlessRaises(BencodeError, bdecode, '9999:x')
393 def test_bdecode_int(self):
394 self.failUnlessRaises(BencodeError, bdecode, 'ie')
395 self.failUnlessRaises(BencodeError, bdecode, 'i341foo382e')
396 self.failUnlessEqual(bdecode('i4e'), 4L)
397 self.failUnlessEqual(bdecode('i0e'), 0L)
398 self.failUnlessEqual(bdecode('i123456789e'), 123456789L)
399 self.failUnlessEqual(bdecode('i-10e'), -10L)
400 self.failUnlessRaises(BencodeError, bdecode, 'i-0e')
401 self.failUnlessRaises(BencodeError, bdecode, 'i123')
402 self.failUnlessRaises(BencodeError, bdecode, 'i6easd')
403 self.failUnlessRaises(BencodeError, bdecode, 'i03e')
405 def test_bdecode_list(self):
406 self.failUnlessRaises(BencodeError, bdecode, 'l')
407 self.failUnlessEqual(bdecode('le'), [])
408 self.failUnlessRaises(BencodeError, bdecode, 'leanfdldjfh')
409 self.failUnlessEqual(bdecode('l0:0:0:e'), ['', '', ''])
410 self.failUnlessRaises(BencodeError, bdecode, 'relwjhrlewjh')
411 self.failUnlessEqual(bdecode('li1ei2ei3ee'), [1, 2, 3])
412 self.failUnlessEqual(bdecode('l3:asd2:xye'), ['asd', 'xy'])
413 self.failUnlessEqual(bdecode('ll5:Alice3:Bobeli2ei3eee'), [['Alice', 'Bob'], [2, 3]])
414 self.failUnlessRaises(BencodeError, bdecode, 'l01:ae')
415 self.failUnlessRaises(BencodeError, bdecode, 'l0:')
417 def test_bdecode_dict(self):
418 self.failUnlessRaises(BencodeError, bdecode, 'd')
419 self.failUnlessRaises(BencodeError, bdecode, 'defoobar')
420 self.failUnlessEqual(bdecode('de'), {})
421 self.failUnlessEqual(bdecode('d3:agei25e4:eyes4:bluee'), {'age': 25, 'eyes': 'blue'})
422 self.failUnlessEqual(bdecode('d8:spam.mp3d6:author5:Alice6:lengthi100000eee'),
423 {'spam.mp3': {'author': 'Alice', 'length': 100000}})
424 self.failUnlessRaises(BencodeError, bdecode, 'd3:fooe')
425 self.failUnlessRaises(BencodeError, bdecode, 'di1e0:e')
426 self.failUnlessRaises(BencodeError, bdecode, 'd1:b0:1:a0:e')
427 self.failUnlessRaises(BencodeError, bdecode, 'd1:a0:1:a0:e')
428 self.failUnlessRaises(BencodeError, bdecode, 'd0:0:')
429 self.failUnlessRaises(BencodeError, bdecode, 'd0:')
431 def test_bdecode_unicode(self):
432 self.failUnlessRaises(BencodeError, bdecode, 'u0:0:')
433 self.failUnlessRaises(BencodeError, bdecode, 'u')
434 self.failUnlessRaises(BencodeError, bdecode, 'u35208734823ljdahflajhdf')
435 self.failUnlessRaises(BencodeError, bdecode, 'u2:abfdjslhfld')
436 self.failUnlessEqual(bdecode('u0:'), '')
437 self.failUnlessEqual(bdecode('u3:abc'), 'abc')
438 self.failUnlessEqual(bdecode('u10:1234567890'), '1234567890')
439 self.failUnlessRaises(BencodeError, bdecode, 'u02:xy')
440 self.failUnlessRaises(BencodeError, bdecode, 'u9999:x')
442 def test_bencode_int(self):
443 self.failUnlessEqual(bencode(4), 'i4e')
444 self.failUnlessEqual(bencode(0), 'i0e')
445 self.failUnlessEqual(bencode(-10), 'i-10e')
446 self.failUnlessEqual(bencode(12345678901234567890L), 'i12345678901234567890e')
448 def test_bencode_string(self):
449 self.failUnlessEqual(bencode(''), '0:')
450 self.failUnlessEqual(bencode('abc'), '3:abc')
451 self.failUnlessEqual(bencode('1234567890'), '10:1234567890')
453 def test_bencode_list(self):
454 self.failUnlessEqual(bencode([]), 'le')
455 self.failUnlessEqual(bencode([1, 2, 3]), 'li1ei2ei3ee')
456 self.failUnlessEqual(bencode([['Alice', 'Bob'], [2, 3]]), 'll5:Alice3:Bobeli2ei3eee')
458 def test_bencode_dict(self):
459 self.failUnlessEqual(bencode({}), 'de')
460 self.failUnlessEqual(bencode({'age': 25, 'eyes': 'blue'}), 'd3:agei25e4:eyes4:bluee')
461 self.failUnlessEqual(bencode({'spam.mp3': {'author': 'Alice', 'length': 100000}}),
462 'd8:spam.mp3d6:author5:Alice6:lengthi100000eee')
463 self.failUnlessRaises(BencodeError, bencode, {1: 'foo'})
465 def test_bencode_unicode(self):
466 self.failUnlessEqual(bencode(u''), '0:')
467 self.failUnlessEqual(bencode(u'abc'), '3:abc')
468 self.failUnlessEqual(bencode(u'1234567890'), '10:1234567890')
471 self.failUnless(bdecode(bencode(True)))
472 self.failIf(bdecode(bencode(False)))
474 def test_datetime(self):
475 date = datetime.utcnow()
476 self.failUnlessEqual(bdecode(bencode(date)), date.replace(microsecond = 0))
478 if UnicodeType == None:
479 test_bencode_unicode.skip = "Python was not compiled with unicode support"
480 test_bdecode_unicode.skip = "Python was not compiled with unicode support"