2 """Functions for bencoding and bdecoding data.
4 @type decode_func: C{dictionary} of C{function}
5 @var decode_func: a dictionary of function calls to be made, based on data,
6 the keys are the first character of the data and the value is the
7 function to use to decode that data
8 @type bencached_marker: C{list}
9 @var bencached_marker: mutable type to ensure class origination
10 @type encode_func: C{dictionary} of C{function}
11 @var encode_func: a dictionary of function calls to be made, based on data,
12 the keys are the type of the data and the value is the
13 function to use to encode that data
14 @type BencachedType: C{type}
15 @var BencachedType: the L{Bencached} type
18 from types import IntType, LongType, StringType, ListType, TupleType, DictType, BooleanType
20 from types import UnicodeType
24 from twisted.python import log
25 from twisted.trial import unittest
28 """Bdecode an integer.
31 @param x: the data to decode
33 @param f: the offset in the data to start at
34 @rtype: C{int}, C{int}
35 @return: the bdecoded integer, and the offset to read next
36 @raise ValueError: if the data is improperly encoded
41 newf = x.index('e', f)
49 elif x[f] == '0' and newf != f+1:
53 def decode_string(x, f):
57 @param x: the data to decode
59 @param f: the offset in the data to start at
60 @rtype: C{string}, C{int}
61 @return: the bdecoded string, and the offset to read next
62 @raise ValueError: if the data is improperly encoded
66 colon = x.index(':', f)
69 except (OverflowError, ValueError):
71 if x[f] == '0' and colon != f+1:
74 return (x[colon:colon+n], colon+n)
76 def decode_unicode(x, f):
77 """Bdecode a unicode string.
80 @param x: the data to decode
82 @param f: the offset in the data to start at
83 @rtype: C{int}, C{int}
84 @return: the bdecoded unicode string, and the offset to read next
88 s, f = decode_string(x, f+1)
89 return (s.decode('UTF-8'),f)
91 def decode_list(x, f):
95 @param x: the data to decode
97 @param f: the offset in the data to start at
98 @rtype: C{list}, C{int}
99 @return: the bdecoded list, and the offset to read next
105 v, f = decode_func[x[f]](x, f)
109 def decode_dict(x, f):
110 """Bdecode a dictionary.
113 @param x: the data to decode
115 @param f: the offset in the data to start at
116 @rtype: C{dictionary}, C{int}
117 @return: the bdecoded dictionary, and the offset to read next
118 @raise ValueError: if the data is improperly encoded
125 k, f = decode_string(x, f)
129 r[k], f = decode_func[x[f]](x, f)
133 decode_func['l'] = decode_list
134 decode_func['d'] = decode_dict
135 decode_func['i'] = decode_int
136 decode_func['0'] = decode_string
137 decode_func['1'] = decode_string
138 decode_func['2'] = decode_string
139 decode_func['3'] = decode_string
140 decode_func['4'] = decode_string
141 decode_func['5'] = decode_string
142 decode_func['6'] = decode_string
143 decode_func['7'] = decode_string
144 decode_func['8'] = decode_string
145 decode_func['9'] = decode_string
146 decode_func['u'] = decode_unicode
148 def bdecode(x, sloppy = 0):
149 """Bdecode a string of data.
152 @param x: the data to decode
153 @type sloppy: C{boolean}
154 @param sloppy: whether to allow errors in the decoding
156 @return: the bdecoded data
157 @raise ValueError: if the data is improperly encoded
162 r, l = decode_func[x[0]](x, 0)
163 # except (IndexError, KeyError):
164 except (IndexError, KeyError, ValueError):
165 raise ValueError, "bad bencoded data"
166 if not sloppy and l != len(x):
167 raise ValueError, "bad bencoded data"
170 bencached_marker = []
173 """Dummy data structure for storing bencoded data in memory.
175 @type marker: C{list}
176 @ivar marker: mutable type to make sure the data was encoded by this class
177 @type bencoded: C{string}
178 @ivar bencoded: the bencoded data stored in a string
182 def __init__(self, s):
186 @param s: the new bencoded data to store
190 self.marker = bencached_marker
193 BencachedType = type(Bencached('')) # insufficient, but good as a filter
195 def encode_bencached(x,r):
196 """Bencode L{Bencached} data.
198 @type x: L{Bencached}
199 @param x: the data to encode
201 @param r: the currently bencoded data, to which the bencoding of x
206 assert x.marker == bencached_marker
210 """Bencode an integer.
213 @param x: the data to encode
215 @param r: the currently bencoded data, to which the bencoding of x
220 r.extend(('i',str(x),'e'))
222 def encode_bool(x,r):
223 """Bencode a boolean.
226 @param x: the data to encode
228 @param r: the currently bencoded data, to which the bencoding of x
235 def encode_string(x,r):
239 @param x: the data to encode
241 @param r: the currently bencoded data, to which the bencoding of x
246 r.extend((str(len(x)),':',x))
248 def encode_unicode(x,r):
249 """Bencode a unicode string.
252 @param x: the data to encode
254 @param r: the currently bencoded data, to which the bencoding of x
260 encode_string(x.encode('UTF-8'),r)
262 def encode_list(x,r):
266 @param x: the data to encode
268 @param r: the currently bencoded data, to which the bencoding of x
275 encode_func[type(e)](e, r)
278 def encode_dict(x,r):
279 """Bencode a dictionary.
281 @type x: C{dictionary}
282 @param x: the data to encode
284 @param r: the currently bencoded data, to which the bencoding of x
293 r.extend((str(len(k)),':',k))
294 encode_func[type(v)](v, r)
298 encode_func[BencachedType] = encode_bencached
299 encode_func[IntType] = encode_int
300 encode_func[LongType] = encode_int
301 encode_func[StringType] = encode_string
302 encode_func[ListType] = encode_list
303 encode_func[TupleType] = encode_list
304 encode_func[DictType] = encode_dict
305 encode_func[BooleanType] = encode_bool
307 encode_func[UnicodeType] = encode_unicode
310 """Bencode some data.
313 @param x: the data to encode
315 @return: the bencoded data
316 @raise ValueError: if the data contains a type that cannot be encoded
321 encode_func[type(x)](x, r)
323 raise ValueError, "failed to bencode the data"
326 class TestBencode(unittest.TestCase):
327 """Test the bencoding and bdecoding of data."""
331 def test_bdecode_string(self):
332 self.failUnlessRaises(ValueError, bdecode, '0:0:')
333 self.failUnlessRaises(ValueError, bdecode, '')
334 self.failUnlessRaises(ValueError, bdecode, '35208734823ljdahflajhdf')
335 self.failUnlessRaises(ValueError, bdecode, '2:abfdjslhfld')
336 self.failUnlessEqual(bdecode('0:'), '')
337 self.failUnlessEqual(bdecode('3:abc'), 'abc')
338 self.failUnlessEqual(bdecode('10:1234567890'), '1234567890')
339 self.failUnlessRaises(ValueError, bdecode, '02:xy')
340 self.failUnlessRaises(ValueError, bdecode, '9999:x')
342 def test_bdecode_int(self):
343 self.failUnlessRaises(ValueError, bdecode, 'ie')
344 self.failUnlessRaises(ValueError, bdecode, 'i341foo382e')
345 self.failUnlessEqual(bdecode('i4e'), 4L)
346 self.failUnlessEqual(bdecode('i0e'), 0L)
347 self.failUnlessEqual(bdecode('i123456789e'), 123456789L)
348 self.failUnlessEqual(bdecode('i-10e'), -10L)
349 self.failUnlessRaises(ValueError, bdecode, 'i-0e')
350 self.failUnlessRaises(ValueError, bdecode, 'i123')
351 self.failUnlessRaises(ValueError, bdecode, 'i6easd')
352 self.failUnlessRaises(ValueError, bdecode, 'i03e')
354 def test_bdecode_list(self):
355 self.failUnlessRaises(ValueError, bdecode, 'l')
356 self.failUnlessEqual(bdecode('le'), [])
357 self.failUnlessRaises(ValueError, bdecode, 'leanfdldjfh')
358 self.failUnlessEqual(bdecode('l0:0:0:e'), ['', '', ''])
359 self.failUnlessRaises(ValueError, bdecode, 'relwjhrlewjh')
360 self.failUnlessEqual(bdecode('li1ei2ei3ee'), [1, 2, 3])
361 self.failUnlessEqual(bdecode('l3:asd2:xye'), ['asd', 'xy'])
362 self.failUnlessEqual(bdecode('ll5:Alice3:Bobeli2ei3eee'), [['Alice', 'Bob'], [2, 3]])
363 self.failUnlessRaises(ValueError, bdecode, 'l01:ae')
364 self.failUnlessRaises(ValueError, bdecode, 'l0:')
366 def test_bdecode_dict(self):
367 self.failUnlessRaises(ValueError, bdecode, 'd')
368 self.failUnlessRaises(ValueError, bdecode, 'defoobar')
369 self.failUnlessEqual(bdecode('de'), {})
370 self.failUnlessEqual(bdecode('d3:agei25e4:eyes4:bluee'), {'age': 25, 'eyes': 'blue'})
371 self.failUnlessEqual(bdecode('d8:spam.mp3d6:author5:Alice6:lengthi100000eee'),
372 {'spam.mp3': {'author': 'Alice', 'length': 100000}})
373 self.failUnlessRaises(ValueError, bdecode, 'd3:fooe')
374 self.failUnlessRaises(ValueError, bdecode, 'di1e0:e')
375 self.failUnlessRaises(ValueError, bdecode, 'd1:b0:1:a0:e')
376 self.failUnlessRaises(ValueError, bdecode, 'd1:a0:1:a0:e')
377 self.failUnlessRaises(ValueError, bdecode, 'd0:0:')
378 self.failUnlessRaises(ValueError, bdecode, 'd0:')
380 def test_bdecode_unicode(self):
381 self.failUnlessRaises(ValueError, bdecode, 'u0:0:')
382 self.failUnlessRaises(ValueError, bdecode, 'u')
383 self.failUnlessRaises(ValueError, bdecode, 'u35208734823ljdahflajhdf')
384 self.failUnlessRaises(ValueError, bdecode, 'u2:abfdjslhfld')
385 self.failUnlessEqual(bdecode('u0:'), '')
386 self.failUnlessEqual(bdecode('u3:abc'), 'abc')
387 self.failUnlessEqual(bdecode('u10:1234567890'), '1234567890')
388 self.failUnlessRaises(ValueError, bdecode, 'u02:xy')
389 self.failUnlessRaises(ValueError, bdecode, 'u9999:x')
391 def test_bencode_int(self):
392 self.failUnlessEqual(bencode(4), 'i4e')
393 self.failUnlessEqual(bencode(0), 'i0e')
394 self.failUnlessEqual(bencode(-10), 'i-10e')
395 self.failUnlessEqual(bencode(12345678901234567890L), 'i12345678901234567890e')
397 def test_bencode_string(self):
398 self.failUnlessEqual(bencode(''), '0:')
399 self.failUnlessEqual(bencode('abc'), '3:abc')
400 self.failUnlessEqual(bencode('1234567890'), '10:1234567890')
402 def test_bencode_list(self):
403 self.failUnlessEqual(bencode([]), 'le')
404 self.failUnlessEqual(bencode([1, 2, 3]), 'li1ei2ei3ee')
405 self.failUnlessEqual(bencode([['Alice', 'Bob'], [2, 3]]), 'll5:Alice3:Bobeli2ei3eee')
407 def test_bencode_dict(self):
408 self.failUnlessEqual(bencode({}), 'de')
409 self.failUnlessEqual(bencode({'age': 25, 'eyes': 'blue'}), 'd3:agei25e4:eyes4:bluee')
410 self.failUnlessEqual(bencode({'spam.mp3': {'author': 'Alice', 'length': 100000}}),
411 'd8:spam.mp3d6:author5:Alice6:lengthi100000eee')
412 self.failUnlessRaises(ValueError, bencode, {1: 'foo'})
414 def test_bencode_unicode(self):
415 self.failUnlessEqual(bencode(u''), '0:')
416 self.failUnlessEqual(bencode(u'abc'), '3:abc')
417 self.failUnlessEqual(bencode(u'1234567890'), '10:1234567890')
420 self.failUnless(bdecode(bencode(True)))
421 self.failIf(bdecode(bencode(False)))
423 if UnicodeType == None:
424 test_bencode_unicode.skip = "Python was not compiled with unicode support"
425 test_bdecode_unicode.skip = "Python was not compiled with unicode support"