From 2f3c17ab7b51cbad0abf11fa66f6cafcb5a41c5e Mon Sep 17 00:00:00 2001 From: Cameron Dale Date: Wed, 16 Apr 2008 10:40:56 -0700 Subject: [PATCH] Clean up the DHT config, making K and HASH_LENGTH constants instead. --- apt-p2p.conf | 6 ------ apt_p2p/apt_p2p_conf.py | 11 +---------- apt_p2p_Khashmir/DHT.py | 24 +++++++++--------------- apt_p2p_Khashmir/actions.py | 5 +++-- apt_p2p_Khashmir/khash.py | 21 +++++++++++++-------- apt_p2p_Khashmir/khashmir.py | 6 +++--- apt_p2p_Khashmir/krpc.py | 2 +- apt_p2p_Khashmir/ktable.py | 25 +++++++++++++++---------- apt_p2p_Khashmir/node.py | 2 +- apt_p2p_Khashmir/stats.py | 10 +++------- apt_p2p_Khashmir/util.py | 10 ++++++---- debian/apt-p2p.conf.sgml | 15 --------------- test.py | 9 --------- 13 files changed, 55 insertions(+), 91 deletions(-) diff --git a/apt-p2p.conf b/apt-p2p.conf index 6121547..500bd23 100644 --- a/apt-p2p.conf +++ b/apt-p2p.conf @@ -72,12 +72,6 @@ BOOTSTRAP = www.camrdale.org:9977 # whether this node is a bootstrap node BOOTSTRAP_NODE = no -# Kademlia "K" constant, this should be an even number -K = 8 - -# SHA1 is 160 bits long -HASH_LENGTH = 160 - # interval between saving the running state CHECKPOINT_INTERVAL = 5m diff --git a/apt_p2p/apt_p2p_conf.py b/apt_p2p/apt_p2p_conf.py index 69f6a9c..20ba412 100644 --- a/apt_p2p/apt_p2p_conf.py +++ b/apt_p2p/apt_p2p_conf.py @@ -58,9 +58,6 @@ DEFAULTS = { # for everybody to download 'OTHER_DIRS': """""", - # User name to try and run as - 'USERNAME': '', - # Whether it's OK to use an IP address from a known local/private range 'LOCAL_OK': 'no', @@ -89,12 +86,6 @@ DHT_DEFAULTS = { # whether this node is a bootstrap node 'BOOTSTRAP_NODE': "no", - # Kademlia "K" constant, this should be an even number - 'K': '8', - - # SHA1 is 160 bits long - 'HASH_LENGTH': '160', - # checkpoint every this many seconds 'CHECKPOINT_INTERVAL': '5m', # five minutes @@ -127,7 +118,7 @@ DHT_DEFAULTS = { 'KEY_EXPIRE': '1h', # 60 minutes # whether to spew info about the requests/responses in the protocol - 'SPEW': 'yes', + 'SPEW': 'no', } class AptP2PConfigParser(SafeConfigParser): diff --git a/apt_p2p_Khashmir/DHT.py b/apt_p2p_Khashmir/DHT.py index 1870841..1aaf7a2 100644 --- a/apt_p2p_Khashmir/DHT.py +++ b/apt_p2p_Khashmir/DHT.py @@ -16,6 +16,7 @@ from zope.interface import implements from apt_p2p.interfaces import IDHT, IDHTStats, IDHTStatsFactory from khashmir import Khashmir from bencode import bencode, bdecode +from khash import HASH_LENGTH try: from twisted.web2 import channel, server, resource, http, http_headers @@ -105,7 +106,7 @@ class DHT: self.bootstrap_node = self.config_parser.getboolean(section, 'BOOTSTRAP_NODE') for k in self.config_parser.options(section): # The numbers in the config file - if k in ['K', 'HASH_LENGTH', 'CONCURRENT_REQS', 'STORE_REDUNDANCY', + if k in ['CONCURRENT_REQS', 'STORE_REDUNDANCY', 'RETRIEVE_VALUES', 'MAX_FAILURES', 'PORT']: self.config[k] = self.config_parser.getint(section, k) # The times in the config file @@ -230,21 +231,14 @@ class DHT: self.joined = False self.khashmir.shutdown() - def _normKey(self, key, bits=None, bytes=None): + def _normKey(self, key): """Normalize the length of keys used in the DHT.""" - bits = self.config["HASH_LENGTH"] - if bits is not None: - bytes = (bits - 1) // 8 + 1 - else: - if bytes is None: - raise DHTError, "you must specify one of bits or bytes for normalization" - # Extend short keys with null bytes - if len(key) < bytes: - key = key + '\000'*(bytes - len(key)) + if len(key) < HASH_LENGTH: + key = key + '\000'*(HASH_LENGTH - len(key)) # Truncate long keys - elif len(key) > bytes: - key = key[:bytes] + elif len(key) > HASH_LENGTH: + key = key[:HASH_LENGTH] return key def getValue(self, key): @@ -337,7 +331,7 @@ class TestSimpleDHT(unittest.TestCase): """Simple 2-node unit tests for the DHT.""" timeout = 50 - DHT_DEFAULTS = {'PORT': 9977, 'K': 8, 'HASH_LENGTH': 160, + DHT_DEFAULTS = {'PORT': 9977, 'CHECKPOINT_INTERVAL': 300, 'CONCURRENT_REQS': 4, 'STORE_REDUNDANCY': 3, 'RETRIEVE_VALUES': -10000, 'MAX_FAILURES': 3, @@ -457,7 +451,7 @@ class TestMultiDHT(unittest.TestCase): timeout = 80 num = 20 - DHT_DEFAULTS = {'PORT': 9977, 'K': 8, 'HASH_LENGTH': 160, + DHT_DEFAULTS = {'PORT': 9977, 'CHECKPOINT_INTERVAL': 300, 'CONCURRENT_REQS': 4, 'STORE_REDUNDANCY': 3, 'RETRIEVE_VALUES': -10000, 'MAX_FAILURES': 3, diff --git a/apt_p2p_Khashmir/actions.py b/apt_p2p_Khashmir/actions.py index b546320..5254fe4 100644 --- a/apt_p2p_Khashmir/actions.py +++ b/apt_p2p_Khashmir/actions.py @@ -7,6 +7,7 @@ from twisted.internet import reactor, defer from twisted.python import log from khash import intify +from ktable import K from util import uncompact class ActionBase: @@ -218,7 +219,7 @@ class ActionBase: This implementation is suitable for a recurring search over all nodes. """ self.sortNodes() - return self.sorted_nodes[:self.config['K']] + return self.sorted_nodes[:K] def generateArgs(self, node): """Generate the arguments to the node's action. @@ -253,7 +254,7 @@ class FindNode(ActionBase): def generateResult(self): """Result is the K closest nodes to the target.""" self.sortNodes() - return (self.sorted_nodes[:self.config['K']], ) + return (self.sorted_nodes[:K], ) class FindValue(ActionBase): diff --git a/apt_p2p_Khashmir/khash.py b/apt_p2p_Khashmir/khash.py index 91db232..8df2cd9 100644 --- a/apt_p2p_Khashmir/khash.py +++ b/apt_p2p_Khashmir/khash.py @@ -1,16 +1,21 @@ ## Copyright 2002-2003 Andrew Loewenstern, All Rights Reserved # see LICENSE.txt for license information -"""Functions to deal with hashes (node IDs and keys).""" +"""Functions to deal with hashes (node IDs and keys). + +@var HASH_LENGTH: the length of the hash to use in bytes +""" from sha import sha from os import urandom from twisted.trial import unittest +HASH_LENGTH = 20 + def intify(hstr): """Convert a hash (big-endian) to a long python integer.""" - assert len(hstr) == 20 + assert len(hstr) == HASH_LENGTH return long(hstr.encode('hex'), 16) def stringify(num): @@ -21,7 +26,7 @@ def stringify(num): if len(str) % 2 != 0: str = '0' + str str = str.decode('hex') - return (20 - len(str)) *'\x00' + str + return (HASH_LENGTH - len(str)) *'\x00' + str def distance(a, b): """Calculate the distance between two hashes expressed as strings.""" @@ -30,7 +35,7 @@ def distance(a, b): def newID(): """Get a new pseudorandom globally unique hash string.""" h = sha() - h.update(urandom(20)) + h.update(urandom(HASH_LENGTH)) return h.digest() def newIDInRange(min, max): @@ -48,15 +53,15 @@ def newTID(): class TestNewID(unittest.TestCase): """Test the newID function.""" def testLength(self): - self.failUnlessEqual(len(newID()), 20) + self.failUnlessEqual(len(newID()), HASH_LENGTH) def testHundreds(self): for x in xrange(100): self.testLength class TestIntify(unittest.TestCase): """Test the intify function.""" - known = [('\0' * 20, 0), - ('\xff' * 20, 2L**160 - 1), + known = [('\0' * HASH_LENGTH, 0), + ('\xff' * HASH_LENGTH, 2L**(HASH_LENGTH*8) - 1), ] def testKnown(self): for str, value in self.known: @@ -74,7 +79,7 @@ class TestIntify(unittest.TestCase): class TestDisantance(unittest.TestCase): """Test the distance function.""" known = [ - (("\0" * 20, "\xff" * 20), 2**160L -1), + (("\0" * HASH_LENGTH, "\xff" * HASH_LENGTH), 2L**(HASH_LENGTH*8) -1), ((sha("foo").digest(), sha("foo").digest()), 0), ((sha("bar").digest(), sha("bar").digest()), 0) ] diff --git a/apt_p2p_Khashmir/khashmir.py b/apt_p2p_Khashmir/khashmir.py index 8860c67..c5b199a 100644 --- a/apt_p2p_Khashmir/khashmir.py +++ b/apt_p2p_Khashmir/khashmir.py @@ -80,7 +80,7 @@ class KhashmirBase(protocol.Factory): self.node = self._loadSelfNode('', self.port) self.table = KTable(self.node, config) self.token_secrets = [newID()] - self.stats = StatsLogger(self.table, self.store, self.config) + self.stats = StatsLogger(self.table, self.store) # Start listening self.udp = krpc.hostbroker(self, self.stats, config) @@ -514,7 +514,7 @@ class Khashmir(KhashmirWrite): class SimpleTests(unittest.TestCase): timeout = 10 - DHT_DEFAULTS = {'PORT': 9977, 'K': 8, 'HASH_LENGTH': 160, + DHT_DEFAULTS = {'PORT': 9977, 'CHECKPOINT_INTERVAL': 300, 'CONCURRENT_REQS': 4, 'STORE_REDUNDANCY': 3, 'RETRIEVE_VALUES': -10000, 'MAX_FAILURES': 3, @@ -587,7 +587,7 @@ class MultiTest(unittest.TestCase): timeout = 30 num = 20 - DHT_DEFAULTS = {'PORT': 9977, 'K': 8, 'HASH_LENGTH': 160, + DHT_DEFAULTS = {'PORT': 9977, 'CHECKPOINT_INTERVAL': 300, 'CONCURRENT_REQS': 4, 'STORE_REDUNDANCY': 3, 'RETRIEVE_VALUES': -10000, 'MAX_FAILURES': 3, diff --git a/apt_p2p_Khashmir/krpc.py b/apt_p2p_Khashmir/krpc.py index 92d03a7..dadf0f2 100644 --- a/apt_p2p_Khashmir/krpc.py +++ b/apt_p2p_Khashmir/krpc.py @@ -582,7 +582,7 @@ class Receiver(protocol.Factory): def make(port): from stats import StatsLogger af = Receiver() - a = hostbroker(af, StatsLogger(None, None, {}), {'SPEW': False}) + a = hostbroker(af, StatsLogger(None, None), {'SPEW': False}) a.protocol = KRPC p = reactor.listenUDP(port, a) return af, a, p diff --git a/apt_p2p_Khashmir/ktable.py b/apt_p2p_Khashmir/ktable.py index 25a99ec..499c4d8 100644 --- a/apt_p2p_Khashmir/ktable.py +++ b/apt_p2p_Khashmir/ktable.py @@ -1,7 +1,10 @@ ## Copyright 2002-2003 Andrew Loewenstern, All Rights Reserved # see LICENSE.txt for license information -"""The routing table and buckets for a kademlia-like DHT.""" +"""The routing table and buckets for a kademlia-like DHT. + +@var K: the Kademlia "K" constant, this should be an even number +""" from datetime import datetime from bisect import bisect_left @@ -13,6 +16,8 @@ from twisted.trial import unittest import khash from node import Node, NULL_ID +K = 8 + class KTable: """Local routing table for a kademlia-like distributed hash table. @@ -36,7 +41,7 @@ class KTable: assert node.id != NULL_ID self.node = node self.config = config - self.buckets = [KBucket([], 0L, 2L**self.config['HASH_LENGTH'])] + self.buckets = [KBucket([], 0L, 2L**(khash.HASH_LENGTH*8))] def _bucketIndexForInt(self, num): """Find the index of the bucket that should hold the node's ID number.""" @@ -75,11 +80,11 @@ class KTable: nodes = list(self.buckets[i].l) # Make sure we have enough - if len(nodes) < self.config['K']: + if len(nodes) < K: # Look in adjoining buckets for nodes min = i - 1 max = i + 1 - while len(nodes) < self.config['K'] and (min >= 0 or max < len(self.buckets)): + while len(nodes) < K and (min >= 0 or max < len(self.buckets)): # Add the adjoining buckets' nodes to the list if min >= 0: nodes = nodes + self.buckets[min].l @@ -90,7 +95,7 @@ class KTable: # Sort the found nodes by proximity to the id and return the closest K nodes.sort(lambda a, b, num=num: cmp(num ^ a.num, num ^ b.num)) - return nodes[:self.config['K']] + return nodes[:K] def _splitBucket(self, a): """Split a bucket in two. @@ -129,7 +134,7 @@ class KTable: otherBucket = i+1 # Decide if we should do a merge - if otherBucket is not None and len(self.buckets[i].l) + len(self.buckets[otherBucket].l) <= self.config['K']: + if otherBucket is not None and len(self.buckets[i].l) + len(self.buckets[otherBucket].l) <= K: # Remove one bucket and set the other to cover its range as well b = self.buckets[i] a = self.buckets.pop(otherBucket) @@ -172,7 +177,7 @@ class KTable: removed = True # Insert the new node - if new and self._bucketIndexForInt(new.num) == i and len(self.buckets[i].l) < self.config['K']: + if new and self._bucketIndexForInt(new.num) == i and len(self.buckets[i].l) < K: self.buckets[i].l.append(new) elif removed: self._mergeBucket(i) @@ -219,7 +224,7 @@ class KTable: return # We don't have this node, check to see if the bucket is full - if len(self.buckets[i].l) < self.config['K']: + if len(self.buckets[i].l) < K: # Not full, append this node and return if contacted: node.updateLastSeen() @@ -233,7 +238,7 @@ class KTable: return self.buckets[i].l[0] # Make sure our table isn't FULL, this is really unlikely - if len(self.buckets) >= self.config['HASH_LENGTH']: + if len(self.buckets) >= (khash.HASH_LENGTH*8): log.err("Hash Table is FULL! Increase K!") return @@ -384,7 +389,7 @@ class TestKTable(unittest.TestCase): def setUp(self): self.a = Node(khash.newID(), '127.0.0.1', 2002) - self.t = KTable(self.a, {'HASH_LENGTH': 160, 'K': 8, 'MAX_FAILURES': 3}) + self.t = KTable(self.a, {'MAX_FAILURES': 3}) def testAddNode(self): self.b = Node(khash.newID(), '127.0.0.1', 2003) diff --git a/apt_p2p_Khashmir/node.py b/apt_p2p_Khashmir/node.py index c1300c7..a5f40eb 100644 --- a/apt_p2p_Khashmir/node.py +++ b/apt_p2p_Khashmir/node.py @@ -16,7 +16,7 @@ import khash from util import compact # magic id to use before we know a peer's id -NULL_ID = 20 * '\0' +NULL_ID = khash.HASH_LENGTH * '\0' class Node: """Encapsulate a node's contact info. diff --git a/apt_p2p_Khashmir/stats.py b/apt_p2p_Khashmir/stats.py index aee9bbb..126099c 100644 --- a/apt_p2p_Khashmir/stats.py +++ b/apt_p2p_Khashmir/stats.py @@ -4,13 +4,12 @@ from datetime import datetime, timedelta from StringIO import StringIO +from ktable import K from util import byte_format class StatsLogger: """Store the statistics for the Khashmir DHT. - @type config: C{dictionary} - @ivar config: the configuration parameters for the DHT @ivar startTime: the time the program was started @ivar reachable: whether we can be contacted by other nodes @type table: L{ktable.KTable} @@ -33,18 +32,15 @@ class StatsLogger: generated an error """ - def __init__(self, table, store, config): + def __init__(self, table, store): """Initialize the statistics. @type table: L{ktable.KTable} @param table: the routing table for the DHT @type store: L{db.DB} @param store: the database for the DHT - @type config: C{dictionary} - @param config: the configuration parameters for the DHT """ # General - self.config = config self.startTime = datetime.now().replace(microsecond=0) self.reachable = False @@ -77,7 +73,7 @@ class StatsLogger: if datetime.now() - self.lastTableUpdate > timedelta(seconds = 15): self.lastTableUpdate = datetime.now() self.nodes = reduce(lambda a, b: a + len(b.l), self.table.buckets, 0) - self.users = self.config['K'] * (2**(len(self.table.buckets) - 1)) + self.users = K * (2**(len(self.table.buckets) - 1)) return (self.nodes, self.users) def dbStats(self): diff --git a/apt_p2p_Khashmir/util.py b/apt_p2p_Khashmir/util.py index 55bd45a..626a860 100644 --- a/apt_p2p_Khashmir/util.py +++ b/apt_p2p_Khashmir/util.py @@ -5,6 +5,8 @@ from twisted.trial import unittest +from khash import HASH_LENGTH + def bucket_stats(l): """Given a list of khashmir instances, finds min, max, and average number of nodes in tables.""" max = avg = 0 @@ -35,11 +37,11 @@ def uncompact(s): @return: the node ID, IP address and port to contact the node on @raise ValueError: if the compact representation doesn't exist """ - if (len(s) != 26): + if (len(s) != HASH_LENGTH+6): raise ValueError - id = s[:20] - host = '.'.join([str(ord(i)) for i in s[20:24]]) - port = (ord(s[24]) << 8) | ord(s[25]) + id = s[:HASH_LENGTH] + host = '.'.join([str(ord(i)) for i in s[HASH_LENGTH:(HASH_LENGTH+4)]]) + port = (ord(s[HASH_LENGTH+4]) << 8) | ord(s[HASH_LENGTH+5]) return {'id': id, 'host': host, 'port': port} def compact(id, host, port): diff --git a/debian/apt-p2p.conf.sgml b/debian/apt-p2p.conf.sgml index 722de01..bbd1208 100644 --- a/debian/apt-p2p.conf.sgml +++ b/debian/apt-p2p.conf.sgml @@ -194,21 +194,6 @@ (Default is false) - - - - The number of the Kademlia "K" constant. - It should be an even number. - (Default is 8.) - - - - - - The number of bits in the hash to use. - (Default is 160.) - - diff --git a/test.py b/test.py index 3ccaa14..d4df4e7 100755 --- a/test.py +++ b/test.py @@ -357,9 +357,6 @@ CACHE_DIR = %(CACHE_DIR)s # for everybody to download # OTHER_DIRS = -# User name to try and run as -# USERNAME = - # Whether it's OK to use an IP addres from a known local/private range LOCAL_OK = yes @@ -387,12 +384,6 @@ BOOTSTRAP = %(BOOTSTRAP)s # whether this node is a bootstrap node BOOTSTRAP_NODE = %(BOOTSTRAP_NODE)s -# Kademlia "K" constant, this should be an even number -K = 8 - -# SHA1 is 160 bits long -HASH_LENGTH = 160 - # checkpoint every this many seconds CHECKPOINT_INTERVAL = 5m -- 2.39.2