1 # Written by Petru Paler, Uoti Urpala, Ross Cohen and John Hoffman
2 # Modified by Cameron Dale
3 # see LICENSE.txt for license information
5 # $Id: bencode.py 268 2007-08-18 23:45:45Z camrdale-guest $
7 """Functions for bencoding and bdecoding data.
9 @type logger: C{logging.Logger}
10 @var logger: the logger to send all log messages to for this module
11 @type decode_func: C{dictionary} of C{function}
12 @var decode_func: a dictionary of function calls to be made, based on data,
13 the keys are the first character of the data and the value is the
14 function to use to decode that data
15 @type bencached_marker: C{list}
16 @var bencached_marker: mutable type to ensure class origination
17 @type encode_func: C{dictionary} of C{function}
18 @var encode_func: a dictionary of function calls to be made, based on data,
19 the keys are the type of the data and the value is the
20 function to use to encode that data
21 @type BencachedType: C{type}
22 @var BencachedType: the L{Bencached} type
25 from types import IntType, LongType, StringType, ListType, TupleType, DictType
28 from types import BooleanType
32 from types import UnicodeType
35 from cStringIO import StringIO
37 logger = logging.getLogger('DebTorrent.bencode')
40 """Bdecode an integer.
43 @param x: the data to decode
45 @param f: the offset in the data to start at
46 @rtype: C{int}, C{int}
47 @return: the bdecoded integer, and the offset to read next
48 @raise ValueError: if the data is improperly encoded
53 newf = x.index('e', f)
61 elif x[f] == '0' and newf != f+1:
65 def decode_string(x, f):
69 @param x: the data to decode
71 @param f: the offset in the data to start at
72 @rtype: C{string}, C{int}
73 @return: the bdecoded string, and the offset to read next
74 @raise ValueError: if the data is improperly encoded
78 colon = x.index(':', f)
81 except (OverflowError, ValueError):
83 if x[f] == '0' and colon != f+1:
86 return (x[colon:colon+n], colon+n)
88 def decode_unicode(x, f):
89 """Bdecode a unicode string.
92 @param x: the data to decode
94 @param f: the offset in the data to start at
95 @rtype: C{int}, C{int}
96 @return: the bdecoded unicode string, and the offset to read next
100 s, f = decode_string(x, f+1)
101 return (s.decode('UTF-8'),f)
103 def decode_list(x, f):
107 @param x: the data to decode
109 @param f: the offset in the data to start at
110 @rtype: C{list}, C{int}
111 @return: the bdecoded list, and the offset to read next
117 v, f = decode_func[x[f]](x, f)
121 def decode_dict(x, f):
122 """Bdecode a dictionary.
125 @param x: the data to decode
127 @param f: the offset in the data to start at
128 @rtype: C{dictionary}, C{int}
129 @return: the bdecoded dictionary, and the offset to read next
130 @raise ValueError: if the data is improperly encoded
137 k, f = decode_string(x, f)
141 r[k], f = decode_func[x[f]](x, f)
145 decode_func['l'] = decode_list
146 decode_func['d'] = decode_dict
147 decode_func['i'] = decode_int
148 decode_func['0'] = decode_string
149 decode_func['1'] = decode_string
150 decode_func['2'] = decode_string
151 decode_func['3'] = decode_string
152 decode_func['4'] = decode_string
153 decode_func['5'] = decode_string
154 decode_func['6'] = decode_string
155 decode_func['7'] = decode_string
156 decode_func['8'] = decode_string
157 decode_func['9'] = decode_string
158 #decode_func['u'] = decode_unicode
160 def bdecode(x, sloppy = 0):
161 """Bdecode a string of data.
164 @param x: the data to decode
165 @type sloppy: C{boolean}
166 @param sloppy: whether to allow errors in the decoding
168 @return: the bdecoded data
169 @raise ValueError: if the data is improperly encoded
174 r, l = decode_func[x[0]](x, 0)
175 # except (IndexError, KeyError):
176 except (IndexError, KeyError, ValueError):
177 logger.exception('bad bencoded data')
178 raise ValueError, "bad bencoded data"
179 if not sloppy and l != len(x):
180 raise ValueError, "bad bencoded data"
184 """A test routine for the bdecoding functions."""
196 bdecode('i341foo382e')
200 assert bdecode('i4e') == 4L
201 assert bdecode('i0e') == 0L
202 assert bdecode('i123456789e') == 123456789L
203 assert bdecode('i-10e') == -10L
225 bdecode('35208734823ljdahflajhdf')
230 bdecode('2:abfdjslhfld')
234 assert bdecode('0:') == ''
235 assert bdecode('3:abc') == 'abc'
236 assert bdecode('10:1234567890') == '1234567890'
247 assert bdecode('le') == []
249 bdecode('leanfdldjfh')
253 assert bdecode('l0:0:0:e') == ['', '', '']
255 bdecode('relwjhrlewjh')
259 assert bdecode('li1ei2ei3ee') == [1, 2, 3]
260 assert bdecode('l3:asd2:xye') == ['asd', 'xy']
261 assert bdecode('ll5:Alice3:Bobeli2ei3eee') == [['Alice', 'Bob'], [2, 3]]
272 assert bdecode('de') == {}
273 assert bdecode('d3:agei25e4:eyes4:bluee') == {'age': 25, 'eyes': 'blue'}
274 assert bdecode('d8:spam.mp3d6:author5:Alice6:lengthi100000eee') == {'spam.mp3': {'author': 'Alice', 'length': 100000}}
286 bdecode('d1:b0:1:a0:e')
291 bdecode('d1:a0:1:a0:e')
326 bencached_marker = []
329 """Dummy data structure for storing bencoded data in memory.
331 @type marker: C{list}
332 @ivar marker: mutable type to make sure the data was encoded by this class
333 @type bencoded: C{string}
334 @ivar bencoded: the bencoded data stored in a string
338 def __init__(self, s):
342 @param s: the new bencoded data to store
346 self.marker = bencached_marker
349 BencachedType = type(Bencached('')) # insufficient, but good as a filter
351 def encode_bencached(x,r):
352 """Bencode L{Bencached} data.
354 @type x: L{Bencached}
355 @param x: the data to encode
357 @param r: the currently bencoded data, to which the bencoding of x
362 assert x.marker == bencached_marker
366 """Bencode an integer.
369 @param x: the data to encode
371 @param r: the currently bencoded data, to which the bencoding of x
376 r.extend(('i',str(x),'e'))
378 def encode_bool(x,r):
379 """Bencode a boolean.
382 @param x: the data to encode
384 @param r: the currently bencoded data, to which the bencoding of x
391 def encode_string(x,r):
395 @param x: the data to encode
397 @param r: the currently bencoded data, to which the bencoding of x
402 r.extend((str(len(x)),':',x))
404 def encode_unicode(x,r):
405 """Bencode a unicode string.
408 @param x: the data to encode
410 @param r: the currently bencoded data, to which the bencoding of x
416 encode_string(x.encode('UTF-8'),r)
418 def encode_list(x,r):
422 @param x: the data to encode
424 @param r: the currently bencoded data, to which the bencoding of x
431 encode_func[type(e)](e, r)
434 def encode_dict(x,r):
435 """Bencode a dictionary.
437 @type x: C{dictionary}
438 @param x: the data to encode
440 @param r: the currently bencoded data, to which the bencoding of x
449 r.extend((str(len(k)),':',k))
450 encode_func[type(v)](v, r)
454 encode_func[BencachedType] = encode_bencached
455 encode_func[IntType] = encode_int
456 encode_func[LongType] = encode_int
457 encode_func[StringType] = encode_string
458 encode_func[ListType] = encode_list
459 encode_func[TupleType] = encode_list
460 encode_func[DictType] = encode_dict
462 encode_func[BooleanType] = encode_bool
464 encode_func[UnicodeType] = encode_unicode
467 """Bencode some data.
470 @param x: the data to encode
472 @return: the bencoded data
473 @raise ValueError: if the data contains a type that cannot be encoded
478 encode_func[type(x)](x, r)
480 logger.exception('could not encode type '+str(type(x))+' (value: '+str(x)+')')
485 """A test routine for the bencoding functions."""
486 assert bencode(4) == 'i4e'
487 assert bencode(0) == 'i0e'
488 assert bencode(-10) == 'i-10e'
489 assert bencode(12345678901234567890L) == 'i12345678901234567890e'
490 assert bencode('') == '0:'
491 assert bencode('abc') == '3:abc'
492 assert bencode('1234567890') == '10:1234567890'
493 assert bencode([]) == 'le'
494 assert bencode([1, 2, 3]) == 'li1ei2ei3ee'
495 assert bencode([['Alice', 'Bob'], [2, 3]]) == 'll5:Alice3:Bobeli2ei3eee'
496 assert bencode({}) == 'de'
497 assert bencode({'age': 25, 'eyes': 'blue'}) == 'd3:agei25e4:eyes4:bluee'
498 assert bencode({'spam.mp3': {'author': 'Alice', 'length': 100000}}) == 'd8:spam.mp3d6:author5:Alice6:lengthi100000eee'
502 except AssertionError: