2 A library for streaming and unstreaming of simple objects, designed
3 for speed, compactness, and ease of implementation.
5 The basic functions are bencode and bdecode. bencode takes an object
6 and returns a string, bdecode takes a string and returns an object.
7 bdecode raises a ValueError if you give it an invalid string.
9 The objects passed in may be nested dicts, lists, ints, strings,
10 and None. For example, all of the following may be bencoded -
12 {'a': [0, 1], 'b': None}
14 [None, ['a', 2, ['c', None]]]
18 {'name': 'Cronus', 'spouse': 'Rhea', 'children': ['Hades', 'Poseidon']}
20 In general bdecode(bencode(spam)) == spam, but tuples and lists are
21 encoded the same, so bdecode(bencode((0, 1))) is [0, 1] rather
22 than (0, 1). Longs and ints are also encoded the same way, so
23 bdecode(bencode(4)) is a long.
25 dict keys are required to be strings, to avoid a mess of potential
26 implementation incompatibilities. bencode is intended to be used
27 for protocols which are going to be re-implemented many times, so
28 it's very conservative in that regard.
30 Which type is encoded is determined by the first character, 'i', 'n',
31 'd', 'l' and any digit. They indicate integer, null, dict, list, and
34 Strings are length-prefixed in base 10, followed by a colon.
36 bencode('spam') == '4:spam'
38 Nulls are indicated by a single 'n'.
42 integers are encoded base 10 and terminated with an 'e'.
45 bencode(-20) == 'i-20e'
47 Lists are encoded in list order, terminated by an 'e' -
49 bencode(['abc', 'd']) == 'l3:abc1:de'
50 bencode([2, 'f']) == 'li2e1:fe'
52 Dicts are encoded by containing alternating keys and values,
53 with the keys in sorted order, terminated by an 'e'. For example -
55 bencode({'spam': 'eggs'}) == 'd4:spam4:eggse'
56 bencode({'ab': 2, 'a': None}) == 'd1:an2:abi2ee'
58 Truncated strings come first, so in sort order 'a' comes before 'abc'.
60 If a function is passed to bencode, it's called and it's return value
61 is included as a raw string, for example -
63 bdecode(bencode(lambda: None)) == None
66 # This file is licensed under the GNU Lesser General Public License v2.1.
67 # originally written for Mojo Nation by Bryce Wilcox, Bram Cohen, and Greg P. Smith
68 # since then, almost completely rewritten by Bram Cohen
71 from cStringIO import StringIO
76 encodes objects as strings, see module documentation for more info
80 return result.getvalue()
82 def bwrite(data, result):
83 encoder = encoders.get(type(data))
84 assert encoder is not None, 'unsupported data type: ' + `type(data)`
89 def encode_int(data, result):
90 result.write('i' + str(data) + 'e')
92 encoders[IntType] = encode_int
93 encoders[LongType] = encode_int
95 def encode_list(data, result):
101 encoders[TupleType] = encode_list
102 encoders[ListType] = encode_list
104 def encode_string(data, result):
105 result.write(str(len(data)) + ':' + data)
107 encoders[StringType] = encode_string
109 def encode_dict(data, result):
114 assert type(key) is StringType, 'bencoded dictionary key must be a string'
116 bwrite(data[key], result)
119 encoders[DictType] = encode_dict
121 encoders[NoneType] = lambda data, result: result.write('n')
123 encoders[FunctionType] = lambda data, result: result.write(data())
124 encoders[MethodType] = encoders[FunctionType]
128 Does the opposite of bencode. Raises a ValueError if there's a problem.
131 result, index = bread(s, 0)
133 raise ValueError('left over stuff at end')
135 except IndexError, e:
136 raise ValueError(str(e))
138 raise ValueError(str(e))
141 return decoders[s[index]](s, index)
145 _bre = re.compile(r'(0|[1-9][0-9]*):')
147 def decode_raw_string(s, index):
148 x = _bre.match(s, index)
150 raise ValueError('invalid integer encoding')
151 endindex = x.end() + long(s[index:x.end() - 1])
152 if endindex > len(s):
153 raise ValueError('length encoding indicated premature end of string')
154 return s[x.end(): endindex], endindex
156 for c in '0123456789':
157 decoders[c] = decode_raw_string
159 _int_re = re.compile(r'i(0|-?[1-9][0-9]*)e')
161 def decode_int(s, index):
162 x = _int_re.match(s, index)
164 raise ValueError('invalid integer encoding')
165 return long(s[index + 1:x.end() - 1]), x.end()
167 decoders['i'] = decode_int
169 decoders['n'] = lambda s, index: (None, index + 1)
171 def decode_list(s, index):
174 while s[index] != 'e':
175 next, index = bread(s, index)
177 return result, index + 1
179 decoders['l'] = decode_list
181 def decode_dict(s, index):
185 while s[index] != 'e':
186 key, index = decode_raw_string(s, index)
188 raise ValueError("out of order keys")
190 value, index = bread(s, index)
192 return result, index + 1
194 decoders['d'] = decode_dict
196 def test_decode_raw_string():
197 assert decode_raw_string('1:a', 0) == ('a', 3)
198 assert decode_raw_string('0:', 0) == ('', 2)
199 assert decode_raw_string('10:aaaaaaaaaaaaaaaaaaaaaaaaa', 0) == ('aaaaaaaaaa', 13)
200 assert decode_raw_string('10:', 1) == ('', 3)
202 decode_raw_string('01:a', 0)
207 decode_raw_string('--1:a', 0)
212 decode_raw_string('h', 0)
217 decode_raw_string('h:', 0)
222 decode_raw_string('1', 0)
227 decode_raw_string('', 0)
232 decode_raw_string('5:a', 0)
237 def test_dict_enforces_order():
238 bdecode('d1:an1:bne')
240 bdecode('d1:bn1:ane')
245 def test_dict_forbids_non_string_key():
252 def test_dict_forbids_key_repeat():
254 bdecode('d1:an1:ane')
259 def test_empty_dict():
260 assert bdecode('de') == {}
262 def test_ValueError_in_decode_unknown():
269 def test_encode_and_decode_none():
270 assert bdecode(bencode(None)) == None
272 def test_encode_and_decode_long():
273 assert bdecode(bencode(-23452422452342L)) == -23452422452342L
275 def test_encode_and_decode_int():
276 assert bdecode(bencode(2)) == 2
278 def test_decode_noncanonical_int():
300 def test_encode_and_decode_dict():
302 assert bdecode(bencode(x)) == x
304 def test_encode_and_decode_list():
305 assert bdecode(bencode([])) == []
307 def test_encode_and_decode_tuple():
308 assert bdecode(bencode(())) == []
310 def test_encode_and_decode_empty_dict():
311 assert bdecode(bencode({})) == {}
313 def test_encode_and_decode_complex_object():
314 spam = [[], 0, -3, -345234523543245234523L, {}, 'spam', None, {'a': [3]}, {}]
315 assert bencode(bdecode(bencode(spam))) == bencode(spam)
316 assert bdecode(bencode(spam)) == spam
318 def test_unfinished_list():
325 def test_unfinished_dict():